From 03ae7d0f5617be2098636999a047eb91823938ac Mon Sep 17 00:00:00 2001 From: Jun Yi Date: Fri, 12 Mar 2021 09:50:17 +0800 Subject: [PATCH 01/59] fs: Move @f_count to different cacheline with @f_mode get_file_rcu_many, which is called by __fget_files, has used atomic_try_cmpxchg now and it can reduce the access number of the global variable to improve the performance of atomic instruction compared with atomic_cmpxchg. __fget_files does check the @f_mode with mask variable and will do some atomic operations on @f_count, but both are on the same cacheline. Many CPU cores do file access and it will cause much conflicts on @f_count. If we could make the two members into different cachelines, it shall relax the siutations. We have tested this on ARM64 and X86, the result is as follows: Syscall of unixbench has been run on Huawei Kunpeng920 with this patch: 24 x System Call Overhead 1 System Call Overhead 3160841.4 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 3160841.4 2107.2 ======== System Benchmarks Index Score (Partial Only) 2107.2 Without this patch: 24 x System Call Overhead 1 System Call Overhead 2222456.0 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 2222456.0 1481.6 ======== System Benchmarks Index Score (Partial Only) 1481.6 And on Intel 6248 platform with this patch: 40 CPUs in system; running 24 parallel copies of tests System Call Overhead 4288509.1 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 4288509.1 2859.0 ======== System Benchmarks Index Score (Partial Only) 2859.0 Without this patch: 40 CPUs in system; running 24 parallel copies of tests System Call Overhead 3666313.0 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 3666313.0 2444.2 ======== System Benchmarks Index Score (Partial Only) 2444.2 Change-Id: Id06876291af68c040f5a5dfb34f2c31634e9fb46 Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Alexander Viro Cc: Boqun Feng Signed-off-by: Yuqi Jin Signed-off-by: Shaokun Zhang Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index db632747781a..1ecda819e583 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -944,7 +944,6 @@ struct file { */ spinlock_t f_lock; enum rw_hint f_write_hint; - atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; @@ -952,6 +951,7 @@ struct file { struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; + atomic_long_t f_count; u64 f_version; #ifdef CONFIG_SECURITY -- Gitee From 0061d307d4fa88023810ed6d388541f5e03a2620 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 02/59] ACPI: Add LoongArch support for ACPI_PROCESSOR/ACPI_NUMA We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add its support for ACPI_PROCESSOR/ACPI_NUMA. Change-Id: Ia08822b793700a6a30c607da5d8c02fd9b868324 Signed-off-by: Huacai Chen --- drivers/acpi/Kconfig | 4 ++-- drivers/acpi/numa/Kconfig | 2 +- drivers/acpi/numa/srat.c | 2 +- include/linux/acpi.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b5ea34c340cc..b9753969b0db 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -272,9 +272,9 @@ config ACPI_CPPC_LIB config ACPI_PROCESSOR tristate "Processor" - depends on X86 || IA64 || ARM64 + depends on X86 || IA64 || ARM64 || LOONGARCH select ACPI_PROCESSOR_IDLE - select ACPI_CPU_FREQ_PSS if X86 || IA64 + select ACPI_CPU_FREQ_PSS if X86 || IA64 || LOONGARCH default y help This driver adds support for the ACPI Processor package. It is required diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index fcf2e556d69d..39b1f34c21df 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -2,7 +2,7 @@ config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (X86 || IA64 || ARM64) + depends on (X86 || IA64 || ARM64 || LOONGARCH) default y if IA64 || ARM64 config ACPI_HMAT diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index c9aca749ed66..e6a6b2380a53 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -206,7 +206,7 @@ int __init srat_disabled(void) return acpi_numa < 0; } -#if defined(CONFIG_X86) || defined(CONFIG_ARM64) +#if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) /* * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for * I/O localities since SRAT does not list them. I/O localities are diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1092434230c2..ff235833f31b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -248,7 +248,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void -- Gitee From bcf195b7802441dba2adba2701fb53ab35bb51c1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 25 Apr 2021 15:31:04 +0800 Subject: [PATCH 03/59] ACPICA: MADT: Add LoongArch APICs support Change-Id: I0e87730f6f7f3ac64e7d1ef9df054d5c34a33158 Signed-off-by: Huacai Chen --- drivers/acpi/tables.c | 10 ++++ include/acpi/actbl2.h | 125 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 9d581045acff..b36d9a22a07a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -207,6 +207,16 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } break; + case ACPI_MADT_TYPE_CORE_PIC: + { + struct acpi_madt_core_pic *p = + (struct acpi_madt_core_pic *)header; + pr_debug("CORE PIC (processor_id[0x%02x] core_id[0x%02x] %s)\n", + p->processor_id, p->core_id, + (p->flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled"); + } + break; + default: pr_warn("Found unsupported MADT entry (type = 0x%x)\n", header->type); diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index a1f33b091801..08436be89fe7 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -518,7 +518,14 @@ enum acpi_madt_type { ACPI_MADT_TYPE_GENERIC_MSI_FRAME = 13, ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR = 14, ACPI_MADT_TYPE_GENERIC_TRANSLATOR = 15, - ACPI_MADT_TYPE_RESERVED = 16, /* 16 and greater are reserved */ + ACPI_MADT_TYPE_CORE_PIC = 17, + ACPI_MADT_TYPE_LIO_PIC = 18, + ACPI_MADT_TYPE_HT_PIC = 19, + ACPI_MADT_TYPE_EIO_PIC = 20, + ACPI_MADT_TYPE_MSI_PIC = 21, + ACPI_MADT_TYPE_BIO_PIC = 22, + ACPI_MADT_TYPE_LPC_PIC = 23, + ACPI_MADT_TYPE_RESERVED = 24, /* 24 and greater are reserved */ ACPI_MADT_TYPE_PHYTIUM_2500 = 128 }; @@ -726,6 +733,122 @@ struct acpi_madt_generic_translator { u32 reserved2; }; +/* Values for Version field above */ + +enum acpi_madt_core_pic_version { + ACPI_MADT_CORE_PIC_VERSION_NONE = 0, + ACPI_MADT_CORE_PIC_VERSION_V1 = 1, + ACPI_MADT_CORE_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_lio_pic_version { + ACPI_MADT_LIO_PIC_VERSION_NONE = 0, + ACPI_MADT_LIO_PIC_VERSION_V1 = 1, + ACPI_MADT_LIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_eio_pic_version { + ACPI_MADT_EIO_PIC_VERSION_NONE = 0, + ACPI_MADT_EIO_PIC_VERSION_V1 = 1, + ACPI_MADT_EIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_ht_pic_version { + ACPI_MADT_HT_PIC_VERSION_NONE = 0, + ACPI_MADT_HT_PIC_VERSION_V1 = 1, + ACPI_MADT_HT_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_bio_pic_version { + ACPI_MADT_BIO_PIC_VERSION_NONE = 0, + ACPI_MADT_BIO_PIC_VERSION_V1 = 1, + ACPI_MADT_BIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_msi_pic_version { + ACPI_MADT_MSI_PIC_VERSION_NONE = 0, + ACPI_MADT_MSI_PIC_VERSION_V1 = 1, + ACPI_MADT_MSI_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_lpc_pic_version { + ACPI_MADT_LPC_PIC_VERSION_NONE = 0, + ACPI_MADT_LPC_PIC_VERSION_V1 = 1, + ACPI_MADT_LPC_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +/* Core Interrupt Controller */ + +struct acpi_madt_core_pic { + struct acpi_subtable_header header; + u8 version; + u32 processor_id; + u32 core_id; + u32 flags; +}; + +/* Legacy I/O Interrupt Controller */ + +struct acpi_madt_lio_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade[2]; + u32 cascade_map[2]; +}; + +/* Extend I/O Interrupt Controller */ + +struct acpi_madt_eio_pic { + struct acpi_subtable_header header; + u8 version; + u8 cascade; + u8 node; + u64 node_map; +}; + +/* HT Interrupt Controller */ + +struct acpi_madt_ht_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade[8]; +}; + +/* Bridge I/O Interrupt Controller */ + +struct acpi_madt_bio_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u16 id; + u16 gsi_base; +}; + +/* MSI Interrupt Controller */ + +struct acpi_madt_msi_pic { + struct acpi_subtable_header header; + u8 version; + u64 msg_address; + u32 start; + u32 count; +}; + +/* LPC Interrupt Controller */ + +struct acpi_madt_lpc_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade; +}; + /* * Common flags fields for MADT subtables */ -- Gitee From 8630031f4e85cba9a9e986b88144c84a0aa70971 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Apr 2021 14:11:06 +0800 Subject: [PATCH 04/59] ACPICA: Events: Support fixed pcie wake event Some chipsets support fixed pcie wake event which is defined in the PM1 block, such as LS7A1000 of Loongson company, so we add code to handle it. Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen Change-Id: I342b44a6494f3627eb1cb5d83c2f63bc1e65d2c4 --- drivers/acpi/acpica/evevent.c | 12 ++++++++---- drivers/acpi/acpica/hwsleep.c | 12 ++++++++++++ drivers/acpi/acpica/utglobal.c | 4 ++++ include/acpi/actypes.h | 3 ++- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c index 9efca54c51ac..50c0e2f1e758 100644 --- a/drivers/acpi/acpica/evevent.c +++ b/drivers/acpi/acpica/evevent.c @@ -140,9 +140,8 @@ static acpi_status acpi_ev_fixed_event_initialize(void) if (acpi_gbl_fixed_event_info[i].enable_register_id != 0xFF) { status = - acpi_write_bit_register(acpi_gbl_fixed_event_info - [i].enable_register_id, - ACPI_DISABLE_EVENT); + acpi_write_bit_register(acpi_gbl_fixed_event_info[i].enable_register_id, + (i == ACPI_EVENT_PCIE_WAKE) ? ACPI_ENABLE_EVENT : ACPI_DISABLE_EVENT); if (ACPI_FAILURE(status)) { return (status); } @@ -185,6 +184,11 @@ u32 acpi_ev_fixed_event_detect(void) return (int_status); } + if (fixed_enable & ACPI_BITMASK_PCIEXP_WAKE_DISABLE) + fixed_enable &= ~ACPI_BITMASK_PCIEXP_WAKE_DISABLE; + else + fixed_enable |= ACPI_BITMASK_PCIEXP_WAKE_DISABLE; + ACPI_DEBUG_PRINT((ACPI_DB_INTERRUPTS, "Fixed Event Block: Enable %08X Status %08X\n", fixed_enable, fixed_status)); @@ -250,7 +254,7 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event) if (!acpi_gbl_fixed_event_handlers[event].handler) { (void)acpi_write_bit_register(acpi_gbl_fixed_event_info[event]. enable_register_id, - ACPI_DISABLE_EVENT); + event == ACPI_EVENT_PCIE_WAKE ? ACPI_ENABLE_EVENT : ACPI_DISABLE_EVENT); ACPI_ERROR((AE_INFO, "No installed handler for fixed event - %s (%u), disabling", diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c index fcc84d196238..281705b121b8 100644 --- a/drivers/acpi/acpica/hwsleep.c +++ b/drivers/acpi/acpica/hwsleep.c @@ -309,6 +309,18 @@ acpi_status acpi_hw_legacy_wake(u8 sleep_state) [ACPI_EVENT_SLEEP_BUTTON]. status_register_id, ACPI_CLEAR_STATUS); + /* Enable pcie wake event if support */ + if ((acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE)) { + (void) + acpi_write_bit_register(acpi_gbl_fixed_event_info + [ACPI_EVENT_PCIE_WAKE]. + enable_register_id, ACPI_DISABLE_EVENT); + (void) + acpi_write_bit_register(acpi_gbl_fixed_event_info + [ACPI_EVENT_PCIE_WAKE]. + status_register_id, ACPI_CLEAR_STATUS); + } + acpi_hw_execute_sleep_method(METHOD_PATHNAME__SST, ACPI_SST_WORKING); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index e6dcbdc3fc6e..0dc81b85112c 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -186,6 +186,10 @@ struct acpi_fixed_event_info acpi_gbl_fixed_event_info[ACPI_NUM_FIXED_EVENTS] = ACPI_BITREG_RT_CLOCK_ENABLE, ACPI_BITMASK_RT_CLOCK_STATUS, ACPI_BITMASK_RT_CLOCK_ENABLE}, + /* ACPI_EVENT_PCIE_WAKE */ {ACPI_BITREG_PCIEXP_WAKE_STATUS, + ACPI_BITREG_PCIEXP_WAKE_DISABLE, + ACPI_BITMASK_PCIEXP_WAKE_STATUS, + ACPI_BITMASK_PCIEXP_WAKE_DISABLE}, }; #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 647cb11d0a0a..cc639699ae44 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -714,7 +714,8 @@ typedef u32 acpi_event_type; #define ACPI_EVENT_POWER_BUTTON 2 #define ACPI_EVENT_SLEEP_BUTTON 3 #define ACPI_EVENT_RTC 4 -#define ACPI_EVENT_MAX 4 +#define ACPI_EVENT_PCIE_WAKE 5 +#define ACPI_EVENT_MAX 5 #define ACPI_NUM_FIXED_EVENTS ACPI_EVENT_MAX + 1 /* -- Gitee From 3c155c71a713137e3023f38a36db6379f5c3b1fa Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 05/59] serial: 8250_pnp: Support configurable clock frequency Loongson boards need configurable clock frequency for serial ports. Change-Id: I0c0a192d7f2526e7b619a7b4f4f3dcf111de0442 Signed-off-by: lvjianmin Signed-off-by: Huacai Chen --- drivers/tty/serial/8250/8250_pnp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index de90d681b64c..e51421f97281 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -474,7 +474,9 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) uart.port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE) uart.port.flags |= UPF_SHARE_IRQ; - uart.port.uartclk = 1843200; + if (device_property_read_u32(&dev->dev, "clock-frequency", &uart.port.uartclk)) { + uart.port.uartclk = 1843200; + } uart.port.dev = &dev->dev; line = serial8250_register_8250_port(&uart); -- Gitee From b0e602bfd7deab5ef587a3b28ab4502f1f04021b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 06/59] irqchip: Adjust Kconfig for Loongson We are preparing to add new Loongson (based on LoongArch, not MIPS) support. HTVEC will be used by more Loongson processors, so we adjust its description. HTPIC is bound to MIPS-based Loongson, so we add a MIPS dependency, Change-Id: I1c64a95071ab47122ae2fa13b18ae5834698a440 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 214d7fd1fdd1..51ea7ca5e4e3 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -572,7 +572,7 @@ config LOONGSON_LIOINTC config LOONGSON_HTPIC bool "Loongson3 HyperTransport PIC Controller" - depends on MACH_LOONGSON64 + depends on (MACH_LOONGSON64 && MIPS) default y select IRQ_DOMAIN select GENERIC_IRQ_CHIP @@ -580,12 +580,12 @@ config LOONGSON_HTPIC Support for the Loongson-3 HyperTransport PIC Controller. config LOONGSON_HTVEC - bool "Loongson3 HyperTransport Interrupt Vector Controller" + bool "Loongson HyperTransport Interrupt Vector Controller" depends on MACH_LOONGSON64 default MACH_LOONGSON64 select IRQ_DOMAIN_HIERARCHY help - Support for the Loongson3 HyperTransport Interrupt Vector Controller. + Support for the Loongson HyperTransport Interrupt Vector Controller. config LOONGSON_PCH_PIC bool "Loongson PCH PIC Controller" -- Gitee From 1c479a12d25eaff589ac800bc17b6cd84b0c580a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 07/59] irqchip/loongson-pch-pic: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: Ic6fc82c5f9f69b853b74a386f8aa435071f34993 Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-pch-pic.c | 158 ++++++++++++++++++++----- 1 file changed, 131 insertions(+), 27 deletions(-) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index 90e1ad6e3612..3d9b1c53ebd9 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson PCH PIC support */ @@ -15,6 +17,7 @@ #include #include #include +#include /* Registers */ #define PCH_PIC_MASK 0x20 @@ -32,14 +35,23 @@ #define PIC_COUNT (PIC_COUNT_PER_REG * PIC_REG_COUNT) #define PIC_REG_IDX(irq_id) ((irq_id) / PIC_COUNT_PER_REG) #define PIC_REG_BIT(irq_id) ((irq_id) % PIC_COUNT_PER_REG) +#define PCH_PIC_SIZE 0x400 + +static int nr_pics; struct pch_pic { void __iomem *base; struct irq_domain *pic_domain; + struct fwnode_handle *domain_handle; u32 ht_vec_base; raw_spinlock_t pic_lock; + u32 saved_vec_en[PIC_REG_COUNT]; + u32 saved_vec_pol[PIC_REG_COUNT]; + u32 saved_vec_edge[PIC_REG_COUNT]; }; +static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; + static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit) { u32 reg; @@ -137,6 +149,7 @@ static struct irq_chip pch_pic_irq_chip = { .irq_ack = pch_pic_ack_irq, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_set_type = pch_pic_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq, @@ -180,7 +193,7 @@ static void pch_pic_reset(struct pch_pic *priv) int i; for (i = 0; i < PIC_COUNT; i++) { - /* Write vectore ID */ + /* Write vector ID */ writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i)); /* Hardcode route to HT0 Lo */ writeb(1, priv->base + PCH_INT_ROUTE(i)); @@ -198,50 +211,79 @@ static void pch_pic_reset(struct pch_pic *priv) } } -static int pch_pic_of_init(struct device_node *node, - struct device_node *parent) +static int pch_pic_suspend(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < PIC_REG_COUNT; j++) { + pch_pic_priv[i]->saved_vec_pol[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + pch_pic_priv[i]->saved_vec_edge[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + pch_pic_priv[i]->saved_vec_en[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } + + return 0; +} + +static void pch_pic_resume(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + pch_pic_reset(pch_pic_priv[i]); + for (j = 0; j < PIC_REG_COUNT; j++) { + writel(pch_pic_priv[i]->saved_vec_pol[j], + pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + writel(pch_pic_priv[i]->saved_vec_edge[j], + pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + writel(pch_pic_priv[i]->saved_vec_en[j], + pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } +} + +static struct syscore_ops pch_pic_syscore_ops = { + .suspend = pch_pic_suspend, + .resume = pch_pic_resume, +}; + +static int pch_pic_init(phys_addr_t addr, unsigned long size, int vec_base, + struct irq_domain *parent_domain, struct fwnode_handle *domain_handle) { + int vec_count; struct pch_pic *priv; - struct irq_domain *parent_domain; - int err; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; raw_spin_lock_init(&priv->pic_lock); - priv->base = of_iomap(node, 0); - if (!priv->base) { - err = -ENOMEM; + priv->base = ioremap(addr, size); + if (!priv->base) goto free_priv; - } - parent_domain = irq_find_host(parent); - if (!parent_domain) { - pr_err("Failed to find the parent domain\n"); - err = -ENXIO; - goto iounmap_base; - } + priv->domain_handle = domain_handle; - if (of_property_read_u32(node, "loongson,pic-base-vec", - &priv->ht_vec_base)) { - pr_err("Failed to determine pic-base-vec\n"); - err = -EINVAL; - goto iounmap_base; - } + priv->ht_vec_base = vec_base; + vec_count = ((readq(priv->base) >> 48) & 0xff) + 1; priv->pic_domain = irq_domain_create_hierarchy(parent_domain, 0, - PIC_COUNT, - of_node_to_fwnode(node), - &pch_pic_domain_ops, - priv); + vec_count, priv->domain_handle, + &pch_pic_domain_ops, priv); + if (!priv->pic_domain) { pr_err("Failed to create IRQ domain\n"); - err = -ENOMEM; goto iounmap_base; } pch_pic_reset(priv); + pch_pic_priv[nr_pics++] = priv; + + register_syscore_ops(&pch_pic_syscore_ops); return 0; @@ -250,7 +292,69 @@ static int pch_pic_of_init(struct device_node *node, free_priv: kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int pch_pic_of_init(struct device_node *node, + struct device_node *parent) +{ + int err, vec_base; + struct resource res; + struct irq_domain *parent_domain; + + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("Failed to find the parent domain\n"); + return -ENXIO; + } + + if (of_property_read_u32(node, "loongson,pic-base-vec", &vec_base)) { + pr_err("Failed to determine pic-base-vec\n"); + return -EINVAL; + } + + err = pch_pic_init(res.start, resource_size(&res), vec_base, + parent_domain, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(pch_pic, "loongson,pch-pic-1.0", pch_pic_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *pch_pic_acpi_init(struct irq_domain *parent, + struct acpi_madt_bio_pic *acpi_pchpic) +{ + int ret, vec_base; + struct fwnode_handle *domain_handle; + + if (!acpi_pchpic) + return NULL; + + vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchpic); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + ret = pch_pic_init(acpi_pchpic->address, acpi_pchpic->size, + vec_base, parent, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From e95a78c9f5631631ddbd1a78daf6662c533356ad Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 08/59] irqchip/loongson-pch-msi: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: Ic005e38561c0b7eae2e51ee1c9d03a97ff0d06dd Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-pch-msi.c | 130 +++++++++++++++++-------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 32562b7e681b..d148ea8e2d68 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson PCH MSI support */ @@ -15,14 +17,19 @@ #include #include +static int nr_pics; + struct pch_msi_data { struct mutex msi_map_lock; phys_addr_t doorbell; u32 irq_first; /* The vector number that MSIs starts */ u32 num_irqs; /* The number of vectors for MSIs */ unsigned long *msi_map; + struct fwnode_handle *domain_handle; }; +static struct pch_msi_data *pch_msi_priv[MAX_IO_PICS]; + static void pch_msi_mask_msi_irq(struct irq_data *d) { pci_msi_mask_irq(d); @@ -154,12 +161,14 @@ static const struct irq_domain_ops pch_msi_middle_domain_ops = { }; static int pch_msi_init_domains(struct pch_msi_data *priv, - struct device_node *node, - struct irq_domain *parent) + struct irq_domain *parent, + struct fwnode_handle *domain_handle) { struct irq_domain *middle_domain, *msi_domain; - middle_domain = irq_domain_create_linear(of_node_to_fwnode(node), + priv->domain_handle = domain_handle; + + middle_domain = irq_domain_create_linear(priv->domain_handle, priv->num_irqs, &pch_msi_middle_domain_ops, priv); @@ -171,7 +180,7 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, middle_domain->parent = parent; irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS); - msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), + msi_domain = pci_msi_create_irq_domain(priv->domain_handle, &pch_msi_domain_info, middle_domain); if (!msi_domain) { @@ -183,19 +192,11 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, return 0; } -static int pch_msi_init(struct device_node *node, - struct device_node *parent) +static int pch_msi_init(phys_addr_t msg_address, int irq_base, int irq_count, + struct irq_domain *parent_domain, struct fwnode_handle *domain_handle) { - struct pch_msi_data *priv; - struct irq_domain *parent_domain; - struct resource res; int ret; - - parent_domain = irq_find_host(parent); - if (!parent_domain) { - pr_err("Failed to find the parent domain\n"); - return -ENXIO; - } + struct pch_msi_data *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -203,48 +204,93 @@ static int pch_msi_init(struct device_node *node, mutex_init(&priv->msi_map_lock); - ret = of_address_to_resource(node, 0, &res); - if (ret) { - pr_err("Failed to allocate resource\n"); - goto err_priv; - } - - priv->doorbell = res.start; - - if (of_property_read_u32(node, "loongson,msi-base-vec", - &priv->irq_first)) { - pr_err("Unable to parse MSI vec base\n"); - ret = -EINVAL; - goto err_priv; - } - - if (of_property_read_u32(node, "loongson,msi-num-vecs", - &priv->num_irqs)) { - pr_err("Unable to parse MSI vec number\n"); - ret = -EINVAL; - goto err_priv; - } + priv->doorbell = msg_address; + priv->irq_first = irq_base; + priv->num_irqs = irq_count; priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL); - if (!priv->msi_map) { - ret = -ENOMEM; + if (!priv->msi_map) goto err_priv; - } pr_debug("Registering %d MSIs, starting at %d\n", priv->num_irqs, priv->irq_first); - ret = pch_msi_init_domains(priv, node, parent_domain); + ret = pch_msi_init_domains(priv, parent_domain, domain_handle); if (ret) goto err_map; + pch_msi_priv[nr_pics++] = priv; + return 0; err_map: kfree(priv->msi_map); err_priv: kfree(priv); - return ret; + + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int pch_msi_of_init(struct device_node *node, struct device_node *parent) +{ + int err; + int irq_base, irq_count; + struct resource res; + struct irq_domain *parent_domain; + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("Failed to find the parent domain\n"); + return -ENXIO; + } + + if (of_address_to_resource(node, 0, &res)) { + pr_err("Failed to allocate resource\n"); + return -EINVAL; + } + + if (of_property_read_u32(node, "loongson,msi-base-vec", &irq_base)) { + pr_err("Unable to parse MSI vec base\n"); + return -EINVAL; + } + + if (of_property_read_u32(node, "loongson,msi-num-vecs", &irq_count)) { + pr_err("Unable to parse MSI vec number\n"); + return -EINVAL; + } + + err = pch_msi_init(res.start, irq_base, irq_count, parent_domain, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; +} + +IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *pch_msi_acpi_init(struct irq_domain *parent, + struct acpi_madt_msi_pic *acpi_pchmsi) +{ + int ret; + struct fwnode_handle *domain_handle; + + if (!acpi_pchmsi) + return NULL; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchmsi); + + ret = pch_msi_init(acpi_pchmsi->msg_address, acpi_pchmsi->start, + acpi_pchmsi->count, parent, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_PCI_MSI); } -IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_init); +#endif -- Gitee From 27a0eca5efc6df0a520c54999a18be8a06d6130f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 09/59] irqchip/loongson-htvec: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: I2888505dd5d20d5cd0e32db87f246ac4484514e8 Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-htvec.c | 150 +++++++++++++++++++++------ 1 file changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c index 6392aafb9a63..7995171ba7e7 100644 --- a/drivers/irqchip/irq-loongson-htvec.c +++ b/drivers/irqchip/irq-loongson-htvec.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson HyperTransport Interrupt Vector support */ @@ -16,22 +18,27 @@ #include #include #include +#include /* Registers */ #define HTVEC_EN_OFF 0x20 #define HTVEC_MAX_PARENT_IRQ 8 - #define VEC_COUNT_PER_REG 32 #define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) #define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) +#define HTVEC_SIZE 0x400 struct htvec { int num_parents; void __iomem *base; struct irq_domain *htvec_domain; raw_spinlock_t htvec_lock; + struct fwnode_handle *domain_handle; + u32 saved_vec_en[HTVEC_MAX_PARENT_IRQ]; }; +static struct htvec *htvec_priv; + static void htvec_irq_dispatch(struct irq_desc *desc) { int i; @@ -155,64 +162,139 @@ static void htvec_reset(struct htvec *priv) } } -static int htvec_of_init(struct device_node *node, - struct device_node *parent) +static int htvec_suspend(void) +{ + int i; + + for (i = 0; i < htvec_priv->num_parents; i++) { + htvec_priv->saved_vec_en[i] = readl(htvec_priv->base + HTVEC_EN_OFF + 4 * i); + } + return 0; +} + +static void htvec_resume(void) { + int i; + + for (i = 0; i < htvec_priv->num_parents; i++) { + writel(htvec_priv->saved_vec_en[i], htvec_priv->base + HTVEC_EN_OFF + 4 * i); + } +} + +static struct syscore_ops htvec_syscore_ops = { + .suspend = htvec_suspend, + .resume = htvec_resume, +}; + +static int htvec_init(phys_addr_t addr, unsigned long size, + int num_parents, int parent_irq[], struct fwnode_handle *domain_handle) +{ + int i; struct htvec *priv; - int err, parent_irq[8], i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + priv->num_parents = num_parents; + priv->base = ioremap(addr, size); + priv->domain_handle = domain_handle; raw_spin_lock_init(&priv->htvec_lock); - priv->base = of_iomap(node, 0); - if (!priv->base) { - err = -ENOMEM; - goto free_priv; - } - - /* Interrupt may come from any of the 8 interrupt lines */ - for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) { - parent_irq[i] = irq_of_parse_and_map(node, i); - if (parent_irq[i] <= 0) - break; - priv->num_parents++; - } - - if (!priv->num_parents) { - pr_err("Failed to get parent irqs\n"); - err = -ENODEV; - goto iounmap_base; - } - - priv->htvec_domain = irq_domain_create_linear(of_node_to_fwnode(node), + /* Setup IRQ domain */ + priv->htvec_domain = irq_domain_create_linear(priv->domain_handle, (VEC_COUNT_PER_REG * priv->num_parents), &htvec_domain_ops, priv); if (!priv->htvec_domain) { - pr_err("Failed to create IRQ domain\n"); - err = -ENOMEM; - goto irq_dispose; + pr_err("loongson-htvec: cannot add IRQ domain\n"); + goto iounmap_base; } htvec_reset(priv); - for (i = 0; i < priv->num_parents; i++) + for (i = 0; i < priv->num_parents; i++) { irq_set_chained_handler_and_data(parent_irq[i], htvec_irq_dispatch, priv); + } + + htvec_priv = priv; + + register_syscore_ops(&htvec_syscore_ops); return 0; -irq_dispose: - for (; i > 0; i--) - irq_dispose_mapping(parent_irq[i - 1]); iounmap_base: iounmap(priv->base); -free_priv: + priv->domain_handle = NULL; kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int htvec_of_init(struct device_node *node, + struct device_node *parent) +{ + int i, err; + int parent_irq[8]; + int num_parents = 0; + struct resource res; + + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + /* Interrupt may come from any of the 8 interrupt lines */ + for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) { + parent_irq[i] = irq_of_parse_and_map(node, i); + if (parent_irq[i] <= 0) + break; + + num_parents++; + } + + err = htvec_init(res.start, resource_size(&res), + num_parents, parent_irq, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(htvec, "loongson,htvec-1.0", htvec_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec) +{ + int i, ret; + int num_parents, parent_irq[8]; + struct fwnode_handle *domain_handle; + + if (!acpi_htvec) + return NULL; + + num_parents = HTVEC_MAX_PARENT_IRQ; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_htvec); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + /* Interrupt may come from any of the 8 interrupt lines */ + for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) + parent_irq[i] = irq_create_mapping(parent, acpi_htvec->cascade[i]); + + ret = htvec_init(acpi_htvec->address, acpi_htvec->size, + num_parents, parent_irq, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From a05f3997314fe877a26f75f355e1f0631269a181 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 10/59] irqchip/loongson-liointc: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: I51f267a55d8ccd6bc7495ab33a09bf834f4d64ba Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-liointc.c | 149 +++++++++++++++++-------- 1 file changed, 105 insertions(+), 44 deletions(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 9ed1bc473663..6e6d9fad0821 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv * Loongson Local IO Interrupt Controller support */ @@ -19,10 +20,10 @@ #include #define LIOINTC_CHIP_IRQ 32 -#define LIOINTC_NUM_PARENT 4 +#define LIOINTC_NUM_PARENT 4 #define LIOINTC_INTC_CHIP_START 0x20 - +#define LIOINTC_MEM_SIZE 0x80 #define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20) #define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04) #define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08) @@ -40,6 +41,7 @@ struct liointc_handler_data { }; struct liointc_priv { + struct fwnode_handle *domain_handle; struct irq_chip_generic *gc; struct liointc_handler_data handler[LIOINTC_NUM_PARENT]; u8 map_cache[LIOINTC_CHIP_IRQ]; @@ -51,11 +53,11 @@ static void liointc_chained_handle_irq(struct irq_desc *desc) struct liointc_handler_data *handler = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_chip_generic *gc = handler->priv->gc; - u32 pending; + u32 pending, offset = cpu_logical_map(smp_processor_id()) * 8; chained_irq_enter(chip, desc); - pending = readl(gc->reg_base + LIOINTC_REG_INTC_STATUS); + pending = readl(gc->reg_base + LIOINTC_REG_INTC_STATUS + offset); if (!pending) { /* Always blame LPC IRQ if we have that bug */ @@ -140,67 +142,44 @@ static void liointc_resume(struct irq_chip_generic *gc) irq_gc_unlock_irqrestore(gc, flags); } -static const char * const parent_names[] = {"int0", "int1", "int2", "int3"}; +static int parent_irq[LIOINTC_NUM_PARENT]; +static u32 parent_int_map[LIOINTC_NUM_PARENT]; +static const char *const parent_names[] = {"int0", "int1", "int2", "int3"}; -int __init liointc_of_init(struct device_node *node, - struct device_node *parent) +static int liointc_init(phys_addr_t addr, unsigned long size, int revision, + struct fwnode_handle *domain_handle, struct device_node *node) { + int i, err; + void __iomem *base; + struct irq_chip_type *ct; struct irq_chip_generic *gc; struct irq_domain *domain; - struct irq_chip_type *ct; struct liointc_priv *priv; - void __iomem *base; - u32 of_parent_int_map[LIOINTC_NUM_PARENT]; - int parent_irq[LIOINTC_NUM_PARENT]; - bool have_parent = FALSE; - int sz, i, err = 0; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - base = of_iomap(node, 0); - if (!base) { - err = -ENODEV; + base = ioremap(addr, size); + if (!base) goto out_free_priv; - } - for (i = 0; i < LIOINTC_NUM_PARENT; i++) { - parent_irq[i] = of_irq_get_byname(node, parent_names[i]); - if (parent_irq[i] > 0) - have_parent = TRUE; - } - if (!have_parent) { - err = -ENODEV; - goto out_iounmap; - } - - sz = of_property_read_variable_u32_array(node, - "loongson,parent_int_map", - &of_parent_int_map[0], - LIOINTC_NUM_PARENT, - LIOINTC_NUM_PARENT); - if (sz < 4) { - pr_err("loongson-liointc: No parent_int_map\n"); - err = -ENODEV; - goto out_iounmap; - } + priv->domain_handle = domain_handle; for (i = 0; i < LIOINTC_NUM_PARENT; i++) - priv->handler[i].parent_int_map = of_parent_int_map[i]; + priv->handler[i].parent_int_map = parent_int_map[i]; /* Setup IRQ domain */ - domain = irq_domain_add_linear(node, 32, + domain = irq_domain_create_linear(domain_handle, LIOINTC_CHIP_IRQ, &irq_generic_chip_ops, priv); if (!domain) { pr_err("loongson-liointc: cannot add IRQ domain\n"); - err = -EINVAL; goto out_iounmap; } - err = irq_alloc_domain_generic_chips(domain, 32, 1, - node->full_name, handle_level_irq, - IRQ_NOPROBE, 0, 0); + err = irq_alloc_domain_generic_chips(domain, LIOINTC_CHIP_IRQ, 1, + (node ? node->full_name : "LIOINTC"), + handle_level_irq, 0, IRQ_NOPROBE, 0); if (err) { pr_err("loongson-liointc: unable to register IRQ domain\n"); goto out_free_domain; @@ -265,8 +244,90 @@ int __init liointc_of_init(struct device_node *node, out_free_priv: kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int __init liointc_of_init(struct device_node *node, + struct device_node *parent) +{ + bool have_parent = FALSE; + int sz, i, index, revision, err = 0; + struct resource res; + + if (!of_device_is_compatible(node, "loongson,liointc-2.0")) { + index = 0; + revision = 1; + } else { + index = of_property_match_string(node, "reg-names", "main"); + revision = 2; + } + + if (of_address_to_resource(node, index, &res)) + return -EINVAL; + + for (i = 0; i < LIOINTC_NUM_PARENT; i++) { + parent_irq[i] = of_irq_get_byname(node, parent_names[i]); + if (parent_irq[i] > 0) + have_parent = TRUE; + } + if (!have_parent) + return -ENODEV; + + sz = of_property_read_variable_u32_array(node, + "loongson,parent_int_map", + &parent_int_map[0], + LIOINTC_NUM_PARENT, + LIOINTC_NUM_PARENT); + if (sz < 4) { + pr_err("loongson-liointc: No parent_int_map\n"); + return -ENODEV; + } + + err = liointc_init(res.start, resource_size(&res), + revision, of_node_to_fwnode(node), node); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(loongson_liointc_1_0, "loongson,liointc-1.0", liointc_of_init); IRQCHIP_DECLARE(loongson_liointc_1_0a, "loongson,liointc-1.0a", liointc_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc) +{ + int ret; + struct fwnode_handle *domain_handle; + + if (!acpi_liointc) + return NULL; + + parent_int_map[0] = acpi_liointc->cascade_map[0]; + parent_int_map[1] = acpi_liointc->cascade_map[1]; + + parent_irq[0] = irq_create_mapping(parent, acpi_liointc->cascade[0]); + if (!cpu_has_extioi) + parent_irq[1] = irq_create_mapping(parent, acpi_liointc->cascade[1]); + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_liointc); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + ret = liointc_init(acpi_liointc->address, acpi_liointc->size, + 1, domain_handle, NULL); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From 1974fcbf4aa99d49a9ec258390965ac07640a802 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 11/59] irqchip: Add LoongArch CPU interrupt controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add LoongArch CPU interrupt controller support. Change-Id: Ie3c76519d679d80fff0d258f749d1d3659254286 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 6 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongarch-cpu.c | 92 +++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 drivers/irqchip/irq-loongarch-cpu.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 51ea7ca5e4e3..d1311760ca48 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -561,6 +561,12 @@ config EXYNOS_IRQ_COMBINER Say yes here to add support for the IRQ combiner devices embedded in Samsung Exynos chips. +config IRQ_LOONGARCH_CPU + bool + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + select GENERIC_IRQ_EFFECTIVE_AFF_MASK + config LOONGSON_LIOINTC bool "Loongson Local I/O Interrupt Controller" depends on MACH_LOONGSON64 diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 4c78b0f64e6c..65b9dfe75bd8 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o +obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c new file mode 100644 index 000000000000..ea4841ffc470 --- /dev/null +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technologies, Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static struct irq_domain *irq_domain; + +static void mask_loongarch_irq(struct irq_data *d) +{ + clear_csr_ecfg(ECFGF(d->hwirq)); +} + +static void unmask_loongarch_irq(struct irq_data *d) +{ + set_csr_ecfg(ECFGF(d->hwirq)); +} + +static struct irq_chip cpu_irq_controller = { + .name = "LoongArch", + .irq_mask = mask_loongarch_irq, + .irq_unmask = unmask_loongarch_irq, +}; + +static void handle_cpu_irq(struct pt_regs *regs) +{ + int hwirq; + unsigned int estat = read_csr_estat() & CSR_ESTAT_IS; + + while ((hwirq = ffs(estat))) { + estat &= ~BIT(hwirq - 1); + handle_domain_irq(irq_domain, hwirq - 1, regs); + } +} + +int get_ipi_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_IPI - EXCCODE_INT_START); +} + +int get_pmc_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_PMC - EXCCODE_INT_START); +} + +int get_timer_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_TIMER - EXCCODE_INT_START); +} + +static int loongarch_cpu_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_noprobe(irq); + irq_set_chip_and_handler(irq, &cpu_irq_controller, handle_percpu_irq); + + return 0; +} + +static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = { + .map = loongarch_cpu_intc_map, + .xlate = irq_domain_xlate_onecell, +}; + +struct irq_domain * __init loongarch_cpu_irq_init(void) +{ + struct fwnode_handle *domain_handle; + + /* Mask interrupts. */ + clear_csr_ecfg(ECFG0_IM); + clear_csr_estat(ESTATF_IP); + + domain_handle = irq_domain_alloc_fwnode(NULL); + irq_domain = irq_domain_create_linear(domain_handle, EXCCODE_INT_NUM, + &loongarch_cpu_intc_irq_domain_ops, NULL); + + if (!irq_domain) + panic("Failed to add irqdomain for LoongArch CPU"); + + set_handle_irq(&handle_cpu_irq); + + return irq_domain; +} -- Gitee From 98116393850facf6f97e0afcfc6e3a9a4d50a0b3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 12/59] irqchip: Add Loongson Extended I/O interrupt controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add Loongson Extended I/O CPU interrupt controller support. Change-Id: I512fab2851355158431591221b6f703507d5255d Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 10 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongson-eiointc.c | 371 +++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 383 insertions(+) create mode 100644 drivers/irqchip/irq-loongson-eiointc.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index d1311760ca48..b1643e1fe6c0 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -576,6 +576,16 @@ config LOONGSON_LIOINTC help Support for the Loongson Local I/O Interrupt Controller. +config LOONGSON_EIOINTC + bool "Loongson Extend I/O Interrupt Controller" + depends on LOONGARCH + depends on MACH_LOONGSON64 + default MACH_LOONGSON64 + select IRQ_DOMAIN_HIERARCHY + select GENERIC_IRQ_CHIP + help + Support for the Loongson3 Extend I/O Interrupt Vector Controller. + config LOONGSON_HTPIC bool "Loongson3 HyperTransport PIC Controller" depends on (MACH_LOONGSON64 && MIPS) diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 65b9dfe75bd8..b8e1d3e2cbfb 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o +obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o obj-$(CONFIG_LOONGSON_PCH_PIC) += irq-loongson-pch-pic.o diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c new file mode 100644 index 000000000000..7740ec5b24fc --- /dev/null +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Jianmin Lv + * Loongson Extend I/O Interrupt Vector support + */ + +#define pr_fmt(fmt) "eiointc: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EIOINTC_REG_NODEMAP 0x14a0 +#define EIOINTC_REG_IPMAP 0x14c0 +#define EIOINTC_REG_ENABLE 0x1600 +#define EIOINTC_REG_BOUNCE 0x1680 +#define EIOINTC_REG_ISR 0x1800 +#define EIOINTC_REG_ROUTE 0x1c00 + +#define VEC_REG_COUNT 4 +#define VEC_COUNT_PER_REG 64 +#define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG) +#define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) +#define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) +#define EIOINTC_ALL_ENABLE 0xffffffff + +#define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE) + +static int nr_pics; + +struct eiointc_priv { + u32 node; + nodemask_t node_map; + cpumask_t cpuspan_map; + struct fwnode_handle *domain_handle; + struct irq_domain *eiointc_domain; +}; + +static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; + +int eiointc_get_node(int id) +{ + return eiointc_priv[id]->node; +} + +static int cpu_to_eio_node(int cpu) +{ + return cpu_logical_map(cpu) / CORES_PER_EIO_NODE; +} + +static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, nodemask_t *node_map) +{ + int i, node, cpu_node, route_node; + unsigned char coremap[MAX_EIO_NODES]; + uint32_t pos_off, data, data_byte, data_mask; + + pos_off = pos & ~3; + data_byte = pos & 3; + data_mask = ~BIT_MASK(data_byte) & 0xf; + + memset(coremap, 0, sizeof(unsigned char) * MAX_EIO_NODES); + + /* Calculate node and coremap of target irq */ + cpu_node = cpu_logical_map(cpu) / CORES_PER_EIO_NODE; + coremap[cpu_node] |= BIT(cpu_logical_map(cpu) % CORES_PER_EIO_NODE); + + for_each_online_cpu(i) { + node = cpu_to_eio_node(i); + if (!node_isset(node, *node_map)) + continue; + + /* EIO node 0 is in charge of inter-node interrupt dispatch */ + route_node = (node == mnode) ? cpu_node : node; + data = ((coremap[node] | (route_node << 4)) << (data_byte * 8)); + csr_any_send(EIOINTC_REG_ROUTE + pos_off, data, data_mask, node * CORES_PER_EIO_NODE); + } +} + +static DEFINE_RAW_SPINLOCK(affinity_lock); + +static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) +{ + unsigned int cpu; + unsigned long flags; + uint32_t vector, regaddr; + struct cpumask intersect_affinity; + struct eiointc_priv *priv = d->domain->host_data; + + if (!IS_ENABLED(CONFIG_SMP)) + return -EPERM; + + raw_spin_lock_irqsave(&affinity_lock, flags); + + cpumask_and(&intersect_affinity, affinity, cpu_online_mask); + cpumask_and(&intersect_affinity, &intersect_affinity, &priv->cpuspan_map); + + if (cpumask_empty(&intersect_affinity)) { + raw_spin_unlock_irqrestore(&affinity_lock, flags); + return -EINVAL; + } + cpu = cpumask_first(&intersect_affinity); + + if (!d->parent_data) + vector = d->hwirq; + else + vector = d->parent_data->hwirq; + + regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); + + /* Mask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), 0x0, 0); + /* Set route for target vector */ + eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); + /* Unmask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE, 0x0, 0); + + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + + raw_spin_unlock_irqrestore(&affinity_lock, flags); + + return IRQ_SET_MASK_OK; +} + +static int eiointc_index(int node) +{ + int i; + + for (i = 0; i < nr_pics; i++) { + if (node_isset(node, eiointc_priv[i]->node_map)) + return i; + } + + return -1; +} + +static int eiointc_router_init(unsigned int cpu) +{ + int i, bit; + uint32_t data; + uint32_t node = cpu_to_eio_node(cpu); + uint32_t index = eiointc_index(node); + + if (index < 0) { + pr_err("Error: invalid nodemap!\n"); + return -1; + } + + if ((cpu_logical_map(cpu) % CORES_PER_EIO_NODE) == 0) { + eiointc_enable(); + + for (i = 0; i < VEC_COUNT / 32; i++) { + data = (((1 << (i * 2 + 1)) << 16) | (1 << (i * 2))); + iocsr_writel(data, EIOINTC_REG_NODEMAP + i * 4); + } + + for (i = 0; i < VEC_COUNT / 32 / 4; i++) { + bit = BIT(1 + index); /* Route to IP[1 + index] */ + data = bit | (bit << 8) | (bit << 16) | (bit << 24); + iocsr_writel(data, EIOINTC_REG_IPMAP + i * 4); + } + + for (i = 0; i < VEC_COUNT / 4; i++) { + /* Route to Node-0 Core-0 */ + if (index == 0) + bit = BIT(cpu_logical_map(0)); + else + bit = (eiointc_priv[index]->node << 4) | 1; + + data = bit | (bit << 8) | (bit << 16) | (bit << 24); + iocsr_writel(data, EIOINTC_REG_ROUTE + i * 4); + } + + for (i = 0; i < VEC_COUNT / 32; i++) { + data = 0xffffffff; + iocsr_writel(data, EIOINTC_REG_ENABLE + i * 4); + iocsr_writel(data, EIOINTC_REG_BOUNCE + i * 4); + } + } + + return 0; +} + +static void eiointc_irq_dispatch(struct irq_desc *desc) +{ + int i; + u64 pending; + bool handled = false; + struct irq_chip *chip = irq_desc_get_chip(desc); + struct eiointc_priv *priv = irq_desc_get_handler_data(desc); + + chained_irq_enter(chip, desc); + + for (i = 0; i < VEC_REG_COUNT; i++) { + pending = iocsr_readq(EIOINTC_REG_ISR + (i << 3)); + iocsr_writeq(pending, EIOINTC_REG_ISR + (i << 3)); + while (pending) { + int bit = __ffs(pending); + int virq = irq_linear_revmap(priv->eiointc_domain, bit + VEC_COUNT_PER_REG * i); + + generic_handle_irq(virq); + pending &= ~BIT(bit); + handled = true; + } + } + + if (!handled) + spurious_interrupt(); + + chained_irq_exit(chip, desc); +} + +static void eiointc_ack_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_ack_parent(d); +} + +static void eiointc_mask_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_mask_parent(d); +} + +static void eiointc_unmask_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_unmask_parent(d); +} + +static struct irq_chip eiointc_irq_chip = { + .name = "EIOINTC", + .irq_ack = eiointc_ack_irq, + .irq_mask = eiointc_mask_irq, + .irq_unmask = eiointc_unmask_irq, + .irq_set_affinity = eiointc_set_irq_affinity, +}; + +static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret; + unsigned int i, type; + unsigned long hwirq = 0; + struct eiointc *priv = domain->host_data; + + ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); + if (ret) + return ret; + + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, hwirq + i, &eiointc_irq_chip, + priv, handle_edge_irq, NULL, NULL); + } + + return 0; +} + +static void eiointc_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); + + irq_set_handler(virq + i, NULL); + irq_domain_reset_irq_data(d); + } +} + +static const struct irq_domain_ops eiointc_domain_ops = { + .translate = irq_domain_translate_onecell, + .alloc = eiointc_domain_alloc, + .free = eiointc_domain_free, +}; + +static int eiointc_suspend(void) +{ + return 0; +} + +static void eiointc_resume(void) +{ + int i, j; + struct irq_desc *desc; + struct irq_data *irq_data; + + eiointc_router_init(0); + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < VEC_COUNT; j++) { + desc = irq_to_desc(irq_find_mapping(eiointc_priv[i]->eiointc_domain, j)); + if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { + irq_data = &desc->irq_data; + eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); + } + } + } +} + +static struct syscore_ops eiointc_syscore_ops = { + .suspend = eiointc_suspend, + .resume = eiointc_resume, +}; + +struct irq_domain *eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc) +{ + int i, parent_irq; + unsigned long node_map; + struct eiointc_priv *priv; + + if (!acpi_eiointc) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_eiointc); + if (!priv->domain_handle) { + pr_err("Unable to allocate domain handle\n"); + goto out_free_priv; + } + + priv->node = acpi_eiointc->node; + node_map = acpi_eiointc->node_map ? : -1ULL; + + for_each_possible_cpu(i) { + if (node_map & (1ULL << cpu_to_eio_node(i))) { + node_set(cpu_to_eio_node(i), priv->node_map); + cpumask_or(&priv->cpuspan_map, &priv->cpuspan_map, cpumask_of(i)); + } + } + + /* Setup IRQ domain */ + priv->eiointc_domain = irq_domain_create_linear(priv->domain_handle, VEC_COUNT, + &eiointc_domain_ops, priv); + if (!priv->eiointc_domain) { + pr_err("loongson-eiointc: cannot add IRQ domain\n"); + goto out_free_priv; + } + + eiointc_priv[nr_pics++] = priv; + + eiointc_router_init(0); + + parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade); + irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv); + + register_syscore_ops(&eiointc_syscore_ops); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING, + "irqchip/loongarch/intc:starting", + eiointc_router_init, NULL); + + return irq_find_matching_fwnode(priv->domain_handle, DOMAIN_BUS_ANY); + +out_free_priv: + priv->domain_handle = NULL; + kfree(priv); + + return NULL; +} diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b98b9eb7d5f8..d71e1c144aad 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -105,6 +105,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_RISCV_STARTING, + CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, -- Gitee From 033d7b7cf9e943b0b047a172f1e0c910e7703574 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 13/59] irqchip: Add Loongson PCH LPC controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add Loongson PCH LPC interrupt controller support. Change-Id: I23d655d0439d4eba891d2f07fee4c6471378693a Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 8 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongson-pch-lpc.c | 224 +++++++++++++++++++++++++ 3 files changed, 233 insertions(+) create mode 100644 drivers/irqchip/irq-loongson-pch-lpc.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index b1643e1fe6c0..666a941f0fe8 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -622,6 +622,14 @@ config LOONGSON_PCH_MSI help Support for the Loongson PCH MSI Controller. +config LOONGSON_PCH_LPC + bool "Loongson PCH LPC Controller" + depends on MACH_LOONGSON64 || COMPILE_TEST + default (MACH_LOONGSON64 && LOONGARCH) + select IRQ_DOMAIN_HIERARCHY + help + Support for the Loongson PCH LPC Controller. + config MST_IRQ bool "MStar Interrupt Controller" depends on ARCH_MEDIATEK || ARCH_MSTARV7 || COMPILE_TEST diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index b8e1d3e2cbfb..cb21d27574f4 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -116,5 +116,6 @@ obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o obj-$(CONFIG_LOONGSON_PCH_PIC) += irq-loongson-pch-pic.o obj-$(CONFIG_LOONGSON_PCH_MSI) += irq-loongson-pch-msi.o +obj-$(CONFIG_LOONGSON_PCH_LPC) += irq-loongson-pch-lpc.o obj-$(CONFIG_MST_IRQ) += irq-mst-intc.o obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c new file mode 100644 index 000000000000..e3c8d22bcfee --- /dev/null +++ b/drivers/irqchip/irq-loongson-pch-lpc.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Jianmin Lv + * Loongson LPC support + */ + +#define pr_fmt(fmt) "lpc: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define LPC_INT_CTL 0x00 +#define LPC_INT_ENA 0x04 +#define LPC_INT_STS 0x08 +#define LPC_INT_CLR 0x0c +#define LPC_INT_POL 0x10 +#define LPC_COUNT 16 + +struct pch_lpc { + void __iomem *base; + struct irq_domain *lpc_domain; + struct fwnode_handle *domain_handle; + raw_spinlock_t lpc_lock; + u32 saved_reg_ctl; + u32 saved_reg_ena; + u32 saved_reg_pol; +}; + +static struct pch_lpc *pch_lpc_priv; + +static void ack_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(0x1 << d->irq, pch_lpc_priv->base + LPC_INT_CLR); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} +static void mask_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(readl(pch_lpc_priv->base + LPC_INT_ENA) & (~(0x1 << (d->irq))), + pch_lpc_priv->base + LPC_INT_ENA); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} + +static void mask_ack_lpc_irq(struct irq_data *d) +{ +} + +static void unmask_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(readl(pch_lpc_priv->base + LPC_INT_ENA) | (0x1 << (d->irq)), + pch_lpc_priv->base + LPC_INT_ENA); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} + +static int lpc_set_type(struct irq_data *d, unsigned int type) +{ + u32 val; + u32 mask = 0x1 << (d->hwirq); + + if (!(type & IRQ_TYPE_LEVEL_MASK)) + return 0; + + val = readl(pch_lpc_priv->base + LPC_INT_POL); + + if (type == IRQ_TYPE_LEVEL_HIGH) + val |= mask; + else + val &= ~mask; + + writel(val, pch_lpc_priv->base + LPC_INT_POL); + + return 0; +} + +static struct irq_chip pch_lpc_irq_chip = { + .name = "PCH LPC", + .irq_mask = mask_lpc_irq, + .irq_unmask = unmask_lpc_irq, + .irq_ack = ack_lpc_irq, + .irq_mask_ack = mask_ack_lpc_irq, + .irq_eoi = unmask_lpc_irq, + .irq_set_type = lpc_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static void lpc_irq_dispatch(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + u32 pending; + + chained_irq_enter(chip, desc); + + pending = readl(pch_lpc_priv->base + LPC_INT_ENA); + pending &= readl(pch_lpc_priv->base + LPC_INT_STS); + if (!pending) + spurious_interrupt(); + + while (pending) { + int bit = __ffs(pending); + + generic_handle_irq(bit); + pending &= ~BIT(bit); + } + chained_irq_exit(chip, desc); +} + +static int pch_lpc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + irq_set_chip_and_handler(irq, &pch_lpc_irq_chip, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops pch_lpc_domain_ops = { + .map = pch_lpc_map, + .translate = irq_domain_translate_twocell, +}; + +static void pch_lpc_reset(struct pch_lpc *priv) +{ + /* Enable the LPC interrupt, bit31: en bit30: edge */ + writel(0x80000000, priv->base + LPC_INT_CTL); + writel(0, priv->base + LPC_INT_ENA); + /* Clear all 18-bit interrpt bit */ + writel(0x3ffff, priv->base + LPC_INT_CLR); +} + +static int pch_lpc_disabled(struct pch_lpc *priv) +{ + return (readl(priv->base + LPC_INT_ENA) == 0xffffffff) && + (readl(priv->base + LPC_INT_STS) == 0xffffffff); +} + +static int pch_lpc_suspend(void) +{ + pch_lpc_priv->saved_reg_ctl = readl(pch_lpc_priv->base + LPC_INT_CTL); + pch_lpc_priv->saved_reg_ena = readl(pch_lpc_priv->base + LPC_INT_ENA); + pch_lpc_priv->saved_reg_pol = readl(pch_lpc_priv->base + LPC_INT_POL); + return 0; +} + +static void pch_lpc_resume(void) +{ + writel(pch_lpc_priv->saved_reg_ctl, pch_lpc_priv->base + LPC_INT_CTL); + writel(pch_lpc_priv->saved_reg_ena, pch_lpc_priv->base + LPC_INT_ENA); + writel(pch_lpc_priv->saved_reg_pol, pch_lpc_priv->base + LPC_INT_POL); +} + +static struct syscore_ops pch_lpc_syscore_ops = { + .suspend = pch_lpc_suspend, + .resume = pch_lpc_resume, +}; + +struct irq_domain *pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc) +{ + int parent_irq; + struct pch_lpc *priv; + struct irq_fwspec fwspec; + + if (!acpi_pchlpc) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + raw_spin_lock_init(&priv->lpc_lock); + + priv->base = ioremap(acpi_pchlpc->address, acpi_pchlpc->size); + if (!priv->base) + goto free_priv; + + if (pch_lpc_disabled(priv)) { + pr_err("Failed to get LPC status\n"); + goto iounmap_base; + } + + priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchlpc); + if (!priv->domain_handle) { + pr_err("Unable to allocate domain handle\n"); + goto iounmap_base; + } + priv->lpc_domain = irq_domain_add_legacy(NULL, LPC_COUNT, 0, 0, + &pch_lpc_domain_ops, priv); + if (!priv->lpc_domain) { + pr_err("Failed to create IRQ domain\n"); + goto iounmap_base; + } + pch_lpc_reset(priv); + + fwspec.fwnode = parent->fwnode; + fwspec.param[0] = acpi_pchlpc->cascade; + fwspec.param[1] = IRQ_TYPE_LEVEL_HIGH; + fwspec.param_count = 2; + parent_irq = irq_create_fwspec_mapping(&fwspec); + irq_set_chained_handler_and_data(parent_irq, lpc_irq_dispatch, priv); + pch_lpc_priv = priv; + + register_syscore_ops(&pch_lpc_syscore_ops); + + return irq_find_matching_fwnode(priv->domain_handle, DOMAIN_BUS_ANY); + +iounmap_base: + iounmap(priv->base); +free_priv: + kfree(priv); + + return NULL; +} -- Gitee From 7e181bbe8b117ddb9c5fb58a9ebd7fff514bc977 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 5 Sep 2020 15:14:50 +0800 Subject: [PATCH 14/59] PCI: loongson: Use correct pci config access operations LS2K/LS7A support 8/16/32-bits pci config access operations, so we can safely use pci_generic_config_read()/pci_generic_config_write() instead of pci_generic_config_read32()/pci_generic_config_write32(). Change-Id: I2fbdce2305563f0f7b38a9c6a4e2775bb8dcc436 Signed-off-by: Huacai Chen --- drivers/pci/controller/pci-loongson.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index 48169b1e3817..7140bdd04d35 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -159,8 +159,15 @@ static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return val; } -/* H/w only accept 32-bit PCI operations */ +/* LS2K/LS7A accept 8/16/32-bit PCI operations */ static struct pci_ops loongson_pci_ops = { + .map_bus = pci_loongson_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +/* RS780/SR5690 only accept 32-bit PCI operations */ +static struct pci_ops loongson_pci_ops32 = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, @@ -218,8 +225,11 @@ static int loongson_pci_probe(struct platform_device *pdev) } bridge->sysdata = priv; - bridge->ops = &loongson_pci_ops; bridge->map_irq = loongson_map_irq; + if (!of_device_is_compatible(node, "loongson,rs780e-pci")) + bridge->ops = &loongson_pci_ops; + else + bridge->ops = &loongson_pci_ops32; return pci_host_probe(bridge); } -- Gitee From d3c111333c8a931e9e2b2168b48464ecce0e3d6f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 5 Sep 2020 15:14:50 +0800 Subject: [PATCH 15/59] PCI/portdrv: Don't disable pci device during shutdown Use separate remove()/shutdown() callback, and don't disable pci device during shutdown. This can avoid some poweroff/reboot failures. The poweroff/reboot failures can easily reproduce on Loongson platforms. I think this is not a Loongson-specific problem, instead, is a problem related to some specific PCI hosts. On some x86 platforms, radeon/amdgpu devices can cause the same problem, and commit faefba95c9e8ca3a523831c2e ("drm/amdgpu: just suspend the hw on pci shutdown") can resolve it. As Tiezhu said, this occasionally shutdown or reboot failure is due to clear PCI_COMMAND_MASTER on the device in do_pci_disable_device(). drivers/pci/pci.c static void do_pci_disable_device(struct pci_dev *dev) { u16 pci_command; pci_read_config_word(dev, PCI_COMMAND, &pci_command); if (pci_command & PCI_COMMAND_MASTER) { pci_command &= ~PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, pci_command); } pcibios_disable_device(dev); } When remove "pci_command &= ~PCI_COMMAND_MASTER;", it can work well when shutdown or reboot. This may implies that there are DMA activities on the device while shutdown. Radeon driver is more difficult than amdgpu due to its confusing symbol names, and I have maintained an out-of-tree patch for a long time [1]. Recently, we found more and more devices can cause the same problem, and it is very difficult to modify all problematic drivers as radeon/amdgpu does (the .shutdown callback should make sure there is no DMA activity). So, I think modify the PCIe port driver is a simple and effective way. And as early discussed, kexec can still work after this patch. [1] https://github.com/chenhuacai/linux/commit/6612f9c1fc290d42a14618ce9a7d03014d8ebb1a Change-Id: Id4a9c657f0fa7630d4bb890a365935c96d6d42a3 Signed-off-by: Huacai Chen Signed-off-by: Tiezhu Yang --- drivers/pci/pcie/portdrv.h | 2 +- drivers/pci/pcie/portdrv_core.c | 6 ++++-- drivers/pci/pcie/portdrv_pci.c | 15 +++++++++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2ff5724b8f13..358d7281f6e8 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -117,7 +117,7 @@ int pcie_port_device_resume(struct device *dev); int pcie_port_device_runtime_suspend(struct device *dev); int pcie_port_device_runtime_resume(struct device *dev); #endif -void pcie_port_device_remove(struct pci_dev *dev); +void pcie_port_device_remove(struct pci_dev *dev, bool disable); int __must_check pcie_port_bus_register(void); void pcie_port_bus_unregister(void); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 3779b264dbec..682c1cce5965 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -492,11 +492,13 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device); * Remove PCI Express port service devices associated with given port and * disable MSI-X or MSI for the port. */ -void pcie_port_device_remove(struct pci_dev *dev) +void pcie_port_device_remove(struct pci_dev *dev, bool disable) { device_for_each_child(&dev->dev, NULL, remove_iter); pci_free_irq_vectors(dev); - pci_disable_device(dev); + + if (disable) + pci_disable_device(dev); } /** diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index d4559cf88f79..d06149d8d7f8 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -142,7 +142,18 @@ static void pcie_portdrv_remove(struct pci_dev *dev) pm_runtime_dont_use_autosuspend(&dev->dev); } - pcie_port_device_remove(dev); + pcie_port_device_remove(dev, true); +} + +static void pcie_portdrv_shutdown(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev, false); } static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, @@ -211,7 +222,7 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .shutdown = pcie_portdrv_remove, + .shutdown = pcie_portdrv_shutdown, .err_handler = &pcie_portdrv_err_handler, -- Gitee From 8e2f00f82af8b75ebefcbc2ea8b0a99deba4c049 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 16 Mar 2021 07:49:49 +0800 Subject: [PATCH 16/59] PCI: Move loongson pci quirks to quirks.c Loongson PCH (LS7A chipset) will be used by both MIPS-based and LoongArch-based Loongson processors. MIPS-based Loongson uses FDT but LoongArch-base Loongson uses ACPI, but the driver in drivers/ pci/controller/pci-loongson.c is FDT-only. So move the quirks to quirks.c where can be shared by all architectures. Change-Id: I75c33a3a43f09500513cd48de87711bb0d6d6641 Signed-off-by: Huacai Chen --- drivers/pci/controller/pci-loongson.c | 69 --------------------------- drivers/pci/quirks.c | 69 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 69 deletions(-) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index 7140bdd04d35..4f6755b343b4 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -12,15 +12,6 @@ #include "../pci.h" -/* Device IDs */ -#define DEV_PCIE_PORT_0 0x7a09 -#define DEV_PCIE_PORT_1 0x7a19 -#define DEV_PCIE_PORT_2 0x7a29 - -#define DEV_LS2K_APB 0x7a02 -#define DEV_LS7A_CONF 0x7a10 -#define DEV_LS7A_LPC 0x7a0c - #define FLAG_CFG0 BIT(0) #define FLAG_CFG1 BIT(1) #define FLAG_DEV_FIX BIT(2) @@ -32,66 +23,6 @@ struct loongson_pci { u32 flags; }; -/* Fixup wrong class code in PCIe bridges */ -static void bridge_class_quirk(struct pci_dev *dev) -{ - dev->class = PCI_CLASS_BRIDGE_PCI << 8; -} -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_PCIE_PORT_0, bridge_class_quirk); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_PCIE_PORT_1, bridge_class_quirk); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_PCIE_PORT_2, bridge_class_quirk); - -static void system_bus_quirk(struct pci_dev *pdev) -{ - /* - * The address space consumed by these devices is outside the - * resources of the host bridge. - */ - pdev->mmio_always_on = 1; - pdev->non_compliant_bars = 1; -} -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_LS2K_APB, system_bus_quirk); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_LS7A_CONF, system_bus_quirk); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, - DEV_LS7A_LPC, system_bus_quirk); - -static void loongson_mrrs_quirk(struct pci_dev *dev) -{ - struct pci_bus *bus = dev->bus; - struct pci_dev *bridge; - static const struct pci_device_id bridge_devids[] = { - { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_0) }, - { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_1) }, - { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_2) }, - { 0, }, - }; - - /* look for the matching bridge */ - while (!pci_is_root_bus(bus)) { - bridge = bus->self; - bus = bus->parent; - /* - * Some Loongson PCIe ports have a h/w limitation of - * 256 bytes maximum read request size. They can't handle - * anything larger than this. So force this limit on - * any devices attached under these ports. - */ - if (pci_match_id(bridge_devids, bridge)) { - if (pcie_get_readrq(dev) > 256) { - pci_info(dev, "limiting MRRS to 256\n"); - pcie_set_readrq(dev, 256); - } - break; - } - } -} -DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_mrrs_quirk); - static void __iomem *cfg1_map(struct loongson_pci *priv, int bus, unsigned int devfn, int where) { diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ae8782e3fae5..b6e0a4bc0842 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -206,6 +206,75 @@ static void quirk_mmio_always_on(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on); +/* Loongson-related quirks */ +#define DEV_PCIE_PORT_0 0x7a09 +#define DEV_PCIE_PORT_1 0x7a19 +#define DEV_PCIE_PORT_2 0x7a29 + +#define DEV_LS2K_APB 0x7a02 +#define DEV_LS7A_CONF 0x7a10 +#define DEV_LS7A_LPC 0x7a0c + +/* Fixup wrong class code in PCIe bridges */ +static void loongson_bridge_class_quirk(struct pci_dev *dev) +{ + dev->class = PCI_CLASS_BRIDGE_PCI << 8; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_0, loongson_bridge_class_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_1, loongson_bridge_class_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_2, loongson_bridge_class_quirk); + +static void loongson_system_bus_quirk(struct pci_dev *pdev) +{ + /* + * The address space consumed by these devices is outside the + * resources of the host bridge. + */ + pdev->mmio_always_on = 1; + pdev->non_compliant_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_LS2K_APB, loongson_system_bus_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_LS7A_CONF, loongson_system_bus_quirk); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, + DEV_LS7A_LPC, loongson_system_bus_quirk); + +static void loongson_mrrs_quirk(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + struct pci_dev *bridge; + static const struct pci_device_id bridge_devids[] = { + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_0) }, + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_1) }, + { PCI_VDEVICE(LOONGSON, DEV_PCIE_PORT_2) }, + { 0, }, + }; + + /* look for the matching bridge */ + while (!pci_is_root_bus(bus)) { + bridge = bus->self; + bus = bus->parent; + /* + * Some Loongson PCIe ports have a h/w limitation of + * 256 bytes maximum read request size. They can't handle + * anything larger than this. So force this limit on + * any devices attached under these ports. + */ + if (pci_match_id(bridge_devids, bridge)) { + if (pcie_get_readrq(dev) > 256) { + pci_info(dev, "limiting MRRS to 256\n"); + pcie_set_readrq(dev, 256); + } + break; + } + } +} +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_mrrs_quirk); + /* * The Mellanox Tavor device gives false positive parity errors. Mark this * device with a broken_parity_status to allow PCI scanning code to "skip" -- Gitee From b304f249b03381457e15b14f5f3d9fbe84deecb2 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 17/59] PCI: Improve mrrs quirk for LS7A In new revision of LS7A, some pcie ports support larger value than 256, but their mrrs values are not dectectable. And moreover, the current loongson_mrrs_quirk() cannot avoid devices increasing its mrrs after pci_enable_device(). So the only possible way is configure mrrs of all devices in BIOS, and add a pci dev flag (PCI_DEV_FLAGS_NO_INCREASE_MRRS) to stop the increasing mrrs operations. Change-Id: Ie957c3c9ab1fa3ad59343014de19b1ff9a12dddc Signed-off-by: lvjianmin Signed-off-by: Huacai Chen --- drivers/pci/pci.c | 8 ++++++++ drivers/pci/quirks.c | 8 +++++++- include/linux/pci.h | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 666fcc4c37a0..baa9d341b480 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -5748,6 +5749,13 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) v = (ffs(rq) - 8) << 12; + /* MRRS should be restored during resume. */ + if (pm_suspend_target_state == PM_SUSPEND_ON && + dev->dev_flags & PCI_DEV_FLAGS_NO_INCREASE_MRRS) { + if (rq > pcie_get_readrq(dev)) + return -EINVAL; + } + ret = pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_READRQ, v); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b6e0a4bc0842..ff347cfc83b9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -264,7 +264,13 @@ static void loongson_mrrs_quirk(struct pci_dev *dev) * anything larger than this. So force this limit on * any devices attached under these ports. */ - if (pci_match_id(bridge_devids, bridge)) { + if (bridge && pci_match_id(bridge_devids, bridge)) { + dev->dev_flags |= PCI_DEV_FLAGS_NO_INCREASE_MRRS; + + if (pcie_bus_config == PCIE_BUS_DEFAULT || + pcie_bus_config == PCIE_BUS_TUNE_OFF) + break; + if (pcie_get_readrq(dev) > 256) { pci_info(dev, "limiting MRRS to 256\n"); pcie_set_readrq(dev, 256); diff --git a/include/linux/pci.h b/include/linux/pci.h index ff6236a3dd6b..3ddc4f2053dc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -230,6 +230,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Don't increase BIOS's MRRS configuration */ + PCI_DEV_FLAGS_NO_INCREASE_MRRS = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { -- Gitee From 8e508969940e278ee0b3474f02d7370037086e3f Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 18/59] PCI: Add quirk for multifunction devices of LS7A In LS7A, multifunction device use same pci PIN and different irq for different function, so fix it for standard pci PIN usage. Change-Id: Ice33beca53dba3ca23e4658b7c2e6e568d52a07b Signed-off-by: lvjianmin Signed-off-by: Huacai Chen --- drivers/pci/quirks.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ff347cfc83b9..e8055afb9796 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -243,6 +243,31 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_LPC, loongson_system_bus_quirk); +static void loongson_pci_pin_quirk(struct pci_dev *pdev) +{ + pdev->pin = 1 + (PCI_FUNC(pdev->devfn) & 3); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_0, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_1, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + DEV_PCIE_PORT_2, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_AHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_EHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_OHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_DC1, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_DC2, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_GPU, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_GMAC, loongson_pci_pin_quirk); + static void loongson_mrrs_quirk(struct pci_dev *dev) { struct pci_bus *bus = dev->bus; -- Gitee From f3271dc7764af365c9920e00bb682132632cbe8d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 25 Apr 2021 15:57:56 +0800 Subject: [PATCH 19/59] PCI: Support ASpeed VGA cards behind a misbehaving bridge According to PCI-to-PCI bridge spec, bit 3 of Bridge Control Register is VGA Enable bit which modifies the response by the bridge to VGA compatible addresses. The Bridge Control register provides extensions to the Command register that are specific to a bridge. The Bridge Control register provides many of the same controls for the secondary interface that are provided by the Command register for the primary interface. There are some bits that affect the operation of both interfaces of the bridge. If the VGA Enable bit is set, the bridge will decode and forward the following accesses on the primary interface to the secondary interface. Forwarding of these accesses is qualified by the I/O Enable and Memory Enable bits in the Command register, and VGA Enable bit modifies the response by the bridge to VGA compatible addresses. when 0: do not forward VGA compatible memory and I/O addresses from the primary to secondary interface (addresses defined below) unless they are enabled for forwarding by the defined I/O and memory address ranges when 1: forward VGA compatible memory and I/O addresses (addresses defined below) from the primary interface to the secondary interface (if the I/O Enable and Memory Enable bits are set) independent of the I/O and memory address ranges and independent of the ISA Enable bit * memory accesses in the range 000A 0000h to 000B FFFFh * I/O addresses in the first 64 KB of the I/O address space (AD[31:16] are 0000h) where AD[9:: 0] are in the ranges 3B0h to 3BBh and 3C0h to 3DFh (inclusive of ISA address aliases - AD[15::10] are not decoded) If the VGA Enable bit is set, forwarding of these accesses is independent of the I/O address range and memory address ranges defined by the I/O Base and Limit registers, the Memory Base and Limit registers, and the Prefetchable Memory Base and Limit registers of the bridge. Forwarding of these accesses is also independent of the settings of the ISA Enable bit (in the Bridge Control register) or VGA Palette Snoop bits (in the Command register), The AST2500 hardward does not set the VGA Enable bit on its bridge control register, which causes vgaarb subsystem don't think the VGA card behind this bridge as a valid boot vga device. So we provide a quirk to fix Xorg auto-detection. See similar bug: https://patchwork.kernel.org/project/linux-pci/patch/20170619023528.11532-1-dja@axtens.net/ Change-Id: I18b5a7f232d718658cbeafbb7ac36539de3e76f3 Signed-off-by: Huacai Chen Signed-off-by: Jingfeng Sui --- drivers/pci/quirks.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e8055afb9796..e124b7040ca9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -306,6 +307,50 @@ static void loongson_mrrs_quirk(struct pci_dev *dev) } DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_mrrs_quirk); +static void aspeed_fixup_vgaarb(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + struct pci_dev *vdevp = NULL; + u16 config; + + bus = pdev->bus; + bridge = bus->self; + + /* Is VGA routed to us? */ + if (bridge && (pci_is_bridge(bridge))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &config); + + /* Yes, this bridge is PCI bridge-to-bridge spec compliant, + * just return! + */ + if (config & PCI_BRIDGE_CTL_VGA) + return; + + dev_warn(&pdev->dev, "VGA bridge control is not enabled\n"); + } + + /* Just return if the system already have a default device */ + if (vga_default_device()) + return; + + /* No default vga device */ + while ((vdevp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, vdevp))) { + if (vdevp->vendor != 0x1a03) { + /* Have other vga devcie in the system, do nothing */ + dev_info(&pdev->dev, "Another boot vga device: 0x%x:0x%x\n", + vdevp->vendor, vdevp->device); + return; + } + } + + vga_set_default_device(pdev); + + dev_info(&pdev->dev, "Boot vga device set as 0x%x:0x%x\n", + pdev->vendor, pdev->device); +} +DECLARE_PCI_FIXUP_CLASS_FINAL(0x1a03, 0x2000, PCI_CLASS_DISPLAY_VGA, 8, aspeed_fixup_vgaarb); + /* * The Mellanox Tavor device gives false positive parity errors. Mark this * device with a broken_parity_status to allow PCI scanning code to "skip" -- Gitee From 316ccfdd738025a422e4fe992338ff92b2f5b20a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 20/59] LoongArch: Add elf-related definitions for LoongArch Add elf-related definitions for LoongArch, including: EM_LOONGARCH, KEXEC_ARCH_LOONGARCH, AUDIT_ARCH_LOONGARCH32, AUDIT_ARCH_LOONGARCH64 and NT_LOONGARCH_*. Change-Id: Ib144489b25c096113eaffe339a2abba4565f17b1 Signed-off-by: Huacai Chen --- include/uapi/linux/audit.h | 2 ++ include/uapi/linux/elf-em.h | 1 + include/uapi/linux/elf.h | 5 +++++ include/uapi/linux/kexec.h | 1 + scripts/sorttable.c | 5 +++++ 5 files changed, 14 insertions(+) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 320b8645f6fd..0d5a923e617e 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -435,6 +435,8 @@ enum { #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_XTENSA (EM_XTENSA) #define AUDIT_ARCH_SW64 (EM_SW64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_LOONGARCH32 (EM_LOONGARCH|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_LOONGARCH64 (EM_LOONGARCH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 9da7992ebffe..32458706a403 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -51,6 +51,7 @@ #define EM_RISCV 243 /* RISC-V */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_CSKY 252 /* C-SKY */ +#define EM_LOONGARCH 258 /* LoongArch */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ /* diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 30f68b42eeb5..eb13d74ef436 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -431,6 +431,11 @@ typedef struct elf64_shdr { #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ /* Note types with note name "GNU" */ #define NT_GNU_PROPERTY_TYPE_0 5 diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 52b38feeb01a..84e94f2e3a18 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -44,6 +44,7 @@ #define KEXEC_ARCH_MIPS ( 8 << 16) #define KEXEC_ARCH_AARCH64 (183 << 16) #define KEXEC_ARCH_SW64 (0x9916UL << 16) +#define KEXEC_ARCH_LOONGARCH (258 << 16) /* The artificial cap on the number of segments passed to kexec_load. */ #define KEXEC_SEGMENT_MAX 16 diff --git a/scripts/sorttable.c b/scripts/sorttable.c index ac93e033b7cb..3edef1c6aaa0 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -54,6 +54,10 @@ #define EM_ARCV2 195 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + static uint32_t (*r)(const uint32_t *); static uint16_t (*r2)(const uint16_t *); static uint64_t (*r8)(const uint64_t *); @@ -347,6 +351,7 @@ static int do_file(char const *const fname, void *addr) break; case EM_ARCOMPACT: case EM_ARCV2: + case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: -- Gitee From 859780e4273d737d30773233961c399a3c4f9a91 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 6 Feb 2021 17:18:08 +0800 Subject: [PATCH 21/59] LoongArch: Add writecombine support for drm Change-Id: If4d66acade515001e82d3f8bab52afaecbd035dd Signed-off-by: Huacai Chen --- drivers/gpu/drm/drm_vm.c | 4 ++-- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- include/drm/drm_cache.h | 8 ++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 1a6369633789..2517c5a01d36 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -74,7 +74,7 @@ static pgprot_t drm_io_prot(struct drm_local_map *map, tmp = pgprot_decrypted(tmp); #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ - defined(__mips__) + defined(__mips__) || defined(__loongarch__) if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) tmp = pgprot_noncached(tmp); else @@ -397,7 +397,7 @@ static void drm_vm_open_locked(struct drm_device *dev, struct drm_vma_entry *vma_entry; DRM_DEBUG("0x%08lx,0x%08lx\n", - vma->vm_start, vma->vm_end - vma->vm_start); + vma->vm_start, vma->vm_end - vma->vm_start); vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL); if (vma_entry) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index fb2a25f8408f..d964bc639f9a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -397,7 +397,7 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp) tmp = pgprot_noncached(tmp); #endif #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ - defined(__powerpc__) || defined(__mips__) + defined(__powerpc__) || defined(__mips__) || defined(__loongarch__) if (caching_flags & TTM_PL_FLAG_WC) tmp = pgprot_writecombine(tmp); else diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index e9ad4863d915..9d1c3c8da570 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -65,6 +65,14 @@ static inline bool drm_arch_can_wc_memory(void) * optimization entirely for ARM and arm64. */ return false; +#elif defined(CONFIG_LOONGARCH) + /* + * LoongArch maintains cache coherency in hardware, but its WUC attribute + * (Weak-ordered UnCached, which is similar to WC) is out of the scope of + * cache coherency machanism. This means WUC can only used for write-only + * memory regions. + */ + return false; #else return true; #endif -- Gitee From 419dc47073d9e50ce3ad12041e2ede445b525780 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 22/59] LoongArch: Add basic booting support Change-Id: I24e9d9092f6fc6a73910140acb5a5aedb0d327c2 Signed-off-by: Huacai Chen --- arch/loongarch/.gitignore | 9 + arch/loongarch/Kbuild | 21 + arch/loongarch/Kbuild.platforms | 6 + arch/loongarch/Kconfig | 435 +++++ arch/loongarch/Kconfig.debug | 0 arch/loongarch/Makefile | 123 ++ arch/loongarch/include/asm/Kbuild | 30 + arch/loongarch/include/asm/acenv.h | 20 + arch/loongarch/include/asm/acpi.h | 42 + arch/loongarch/include/asm/addrspace.h | 110 ++ arch/loongarch/include/asm/asm-offsets.h | 5 + arch/loongarch/include/asm/asm-prototypes.h | 7 + arch/loongarch/include/asm/asm.h | 190 ++ arch/loongarch/include/asm/asmmacro-64.h | 44 + arch/loongarch/include/asm/asmmacro.h | 277 +++ arch/loongarch/include/asm/atomic.h | 368 ++++ arch/loongarch/include/asm/barrier.h | 53 + arch/loongarch/include/asm/bitops.h | 34 + arch/loongarch/include/asm/bitrev.h | 38 + arch/loongarch/include/asm/bootinfo.h | 40 + arch/loongarch/include/asm/branch.h | 25 + arch/loongarch/include/asm/bug.h | 23 + arch/loongarch/include/asm/cache.h | 13 + arch/loongarch/include/asm/cacheflush.h | 99 ++ arch/loongarch/include/asm/cacheops.h | 37 + arch/loongarch/include/asm/clocksource.h | 12 + arch/loongarch/include/asm/cmpxchg.h | 135 ++ arch/loongarch/include/asm/compiler.h | 19 + arch/loongarch/include/asm/cpu-features.h | 69 + arch/loongarch/include/asm/cpu-info.h | 136 ++ arch/loongarch/include/asm/cpu.h | 127 ++ arch/loongarch/include/asm/cpufeature.h | 24 + arch/loongarch/include/asm/delay.h | 34 + arch/loongarch/include/asm/dma-direct.h | 16 + arch/loongarch/include/asm/dma.h | 18 + arch/loongarch/include/asm/dmi.h | 24 + arch/loongarch/include/asm/efi.h | 36 + arch/loongarch/include/asm/elf.h | 302 ++++ arch/loongarch/include/asm/entry-common.h | 13 + arch/loongarch/include/asm/exec.h | 19 + arch/loongarch/include/asm/fb.h | 24 + arch/loongarch/include/asm/fixmap.h | 19 + arch/loongarch/include/asm/fpregdef.h | 53 + arch/loongarch/include/asm/fpu.h | 136 ++ arch/loongarch/include/asm/futex.h | 107 ++ arch/loongarch/include/asm/fw.h | 18 + arch/loongarch/include/asm/hardirq.h | 24 + arch/loongarch/include/asm/hugetlb.h | 79 + arch/loongarch/include/asm/hw_irq.h | 22 + arch/loongarch/include/asm/idle.h | 9 + arch/loongarch/include/asm/inst.h | 255 +++ arch/loongarch/include/asm/io.h | 129 ++ arch/loongarch/include/asm/irq.h | 55 + arch/loongarch/include/asm/irq_regs.h | 33 + arch/loongarch/include/asm/irqflags.h | 78 + arch/loongarch/include/asm/kdebug.h | 23 + arch/loongarch/include/asm/kmap_types.h | 7 + arch/loongarch/include/asm/linkage.h | 36 + arch/loongarch/include/asm/local.h | 142 ++ arch/loongarch/include/asm/loongarchregs.h | 1579 +++++++++++++++++ .../include/asm/mach-loongson64/boot_param.h | 107 ++ .../include/asm/mach-loongson64/irq.h | 83 + .../include/asm/mach-loongson64/loongson.h | 160 ++ arch/loongarch/include/asm/mmu.h | 16 + arch/loongarch/include/asm/mmu_context.h | 152 ++ arch/loongarch/include/asm/module.h | 15 + arch/loongarch/include/asm/page.h | 129 ++ arch/loongarch/include/asm/pci.h | 55 + arch/loongarch/include/asm/percpu.h | 20 + arch/loongarch/include/asm/perf_event.h | 16 + arch/loongarch/include/asm/pgalloc.h | 107 ++ arch/loongarch/include/asm/pgtable-64.h | 263 +++ arch/loongarch/include/asm/pgtable-bits.h | 139 ++ arch/loongarch/include/asm/pgtable.h | 338 ++++ arch/loongarch/include/asm/prefetch.h | 33 + arch/loongarch/include/asm/processor.h | 209 +++ arch/loongarch/include/asm/ptrace.h | 154 ++ arch/loongarch/include/asm/reboot.h | 10 + arch/loongarch/include/asm/regdef.h | 47 + arch/loongarch/include/asm/serial.h | 11 + arch/loongarch/include/asm/setup.h | 21 + arch/loongarch/include/asm/shmparam.h | 13 + arch/loongarch/include/asm/sparsemem.h | 23 + arch/loongarch/include/asm/spinlock.h | 18 + arch/loongarch/include/asm/spinlock_types.h | 15 + arch/loongarch/include/asm/stackframe.h | 212 +++ arch/loongarch/include/asm/stacktrace.h | 74 + arch/loongarch/include/asm/string.h | 21 + arch/loongarch/include/asm/switch_to.h | 37 + arch/loongarch/include/asm/syscall.h | 68 + arch/loongarch/include/asm/thread_info.h | 124 ++ arch/loongarch/include/asm/time.h | 55 + arch/loongarch/include/asm/timex.h | 32 + arch/loongarch/include/asm/tlb.h | 216 +++ arch/loongarch/include/asm/tlbflush.h | 38 + arch/loongarch/include/asm/topology.h | 15 + arch/loongarch/include/asm/types.h | 39 + arch/loongarch/include/asm/uaccess.h | 432 +++++ arch/loongarch/include/asm/unistd.h | 10 + arch/loongarch/include/asm/vdso.h | 43 + arch/loongarch/include/asm/vdso/clocksource.h | 8 + .../loongarch/include/asm/vdso/gettimeofday.h | 104 ++ arch/loongarch/include/asm/vdso/processor.h | 14 + arch/loongarch/include/asm/vdso/vdso.h | 31 + arch/loongarch/include/asm/vdso/vsyscall.h | 27 + arch/loongarch/include/asm/vermagic.h | 19 + arch/loongarch/include/asm/vmalloc.h | 4 + arch/loongarch/include/uapi/asm/Kbuild | 2 + arch/loongarch/include/uapi/asm/auxvec.h | 16 + arch/loongarch/include/uapi/asm/bitfield.h | 17 + arch/loongarch/include/uapi/asm/bitsperlong.h | 9 + arch/loongarch/include/uapi/asm/break.h | 27 + arch/loongarch/include/uapi/asm/byteorder.h | 12 + arch/loongarch/include/uapi/asm/hwcap.h | 20 + arch/loongarch/include/uapi/asm/inst.h | 470 +++++ arch/loongarch/include/uapi/asm/ptrace.h | 48 + arch/loongarch/include/uapi/asm/reg.h | 62 + arch/loongarch/include/uapi/asm/sigcontext.h | 39 + arch/loongarch/include/uapi/asm/signal.h | 13 + arch/loongarch/include/uapi/asm/swab.h | 54 + arch/loongarch/include/uapi/asm/ucontext.h | 30 + arch/loongarch/include/uapi/asm/unistd.h | 6 + arch/loongarch/kernel/Makefile | 22 + arch/loongarch/kernel/access-helper.h | 13 + arch/loongarch/kernel/acpi.c | 529 ++++++ arch/loongarch/kernel/asm-offsets.c | 255 +++ arch/loongarch/kernel/cacheinfo.c | 126 ++ arch/loongarch/kernel/cmdline.c | 30 + arch/loongarch/kernel/cmpxchg.c | 100 ++ arch/loongarch/kernel/cpu-probe.c | 305 ++++ arch/loongarch/kernel/efi.c | 213 +++ arch/loongarch/kernel/elf.c | 35 + arch/loongarch/kernel/entry.S | 89 + arch/loongarch/kernel/fpu.S | 267 +++ arch/loongarch/kernel/genex.S | 89 + arch/loongarch/kernel/head.S | 72 + arch/loongarch/kernel/idle.c | 16 + arch/loongarch/kernel/io.c | 98 + arch/loongarch/kernel/irq.c | 63 + arch/loongarch/kernel/module.c | 658 +++++++ arch/loongarch/kernel/proc.c | 122 ++ arch/loongarch/kernel/process.c | 261 +++ arch/loongarch/kernel/ptrace.c | 411 +++++ arch/loongarch/kernel/reset.c | 51 + arch/loongarch/kernel/setup.c | 392 ++++ arch/loongarch/kernel/signal-common.h | 37 + arch/loongarch/kernel/signal.c | 460 +++++ arch/loongarch/kernel/switch.S | 40 + arch/loongarch/kernel/syscall.c | 65 + arch/loongarch/kernel/time.c | 225 +++ arch/loongarch/kernel/topology.c | 13 + arch/loongarch/kernel/traps.c | 721 ++++++++ arch/loongarch/kernel/unaligned.c | 416 +++++ arch/loongarch/kernel/vdso.c | 138 ++ arch/loongarch/kernel/vmlinux.lds.S | 112 ++ arch/loongarch/lib/Makefile | 7 + arch/loongarch/lib/clear_user.S | 43 + arch/loongarch/lib/copy_user.S | 47 + arch/loongarch/lib/delay.c | 43 + arch/loongarch/lib/dump_tlb.c | 107 ++ arch/loongarch/lib/memcpy.S | 32 + arch/loongarch/lib/memmove.S | 45 + arch/loongarch/lib/memset.S | 30 + arch/loongarch/lib/strncpy_user.S | 48 + arch/loongarch/lib/strnlen_user.S | 44 + arch/loongarch/loongson64/Makefile | 5 + arch/loongarch/loongson64/Platform | 13 + arch/loongarch/loongson64/env.c | 209 +++ arch/loongarch/loongson64/init.c | 133 ++ arch/loongarch/loongson64/irq.c | 88 + arch/loongarch/loongson64/mem.c | 85 + arch/loongarch/loongson64/reset.c | 54 + arch/loongarch/loongson64/rtc.c | 36 + arch/loongarch/loongson64/setup.c | 41 + arch/loongarch/mm/Makefile | 10 + arch/loongarch/mm/cache.c | 151 ++ arch/loongarch/mm/extable.c | 22 + arch/loongarch/mm/fault.c | 261 +++ arch/loongarch/mm/hugetlbpage.c | 87 + arch/loongarch/mm/init.c | 177 ++ arch/loongarch/mm/ioremap.c | 31 + arch/loongarch/mm/maccess.c | 10 + arch/loongarch/mm/mmap.c | 125 ++ arch/loongarch/mm/page.S | 84 + arch/loongarch/mm/pgtable-64.c | 117 ++ arch/loongarch/mm/pgtable.c | 24 + arch/loongarch/mm/tlb.c | 285 +++ arch/loongarch/mm/tlbex.S | 478 +++++ arch/loongarch/pci/Makefile | 7 + arch/loongarch/pci/acpi.c | 319 ++++ arch/loongarch/pci/pci.c | 95 + arch/loongarch/vdso/Makefile | 96 + arch/loongarch/vdso/elf.S | 19 + arch/loongarch/vdso/gen_vdso_offsets.sh | 13 + arch/loongarch/vdso/sigreturn.S | 29 + arch/loongarch/vdso/vdso.S | 22 + arch/loongarch/vdso/vdso.lds.S | 72 + arch/loongarch/vdso/vgettimeofday.c | 27 + drivers/acpi/pci_mcfg.c | 13 + drivers/pci/controller/Kconfig | 2 +- drivers/pci/controller/pci-loongson.c | 130 +- include/linux/pci-ecam.h | 1 + include/linux/pci_ids.h | 11 + include/uapi/asm-generic/signal.h | 4 + scripts/subarch.include | 2 +- 205 files changed, 20713 insertions(+), 28 deletions(-) create mode 100644 arch/loongarch/.gitignore create mode 100644 arch/loongarch/Kbuild create mode 100644 arch/loongarch/Kbuild.platforms create mode 100644 arch/loongarch/Kconfig create mode 100644 arch/loongarch/Kconfig.debug create mode 100644 arch/loongarch/Makefile create mode 100644 arch/loongarch/include/asm/Kbuild create mode 100644 arch/loongarch/include/asm/acenv.h create mode 100644 arch/loongarch/include/asm/acpi.h create mode 100644 arch/loongarch/include/asm/addrspace.h create mode 100644 arch/loongarch/include/asm/asm-offsets.h create mode 100644 arch/loongarch/include/asm/asm-prototypes.h create mode 100644 arch/loongarch/include/asm/asm.h create mode 100644 arch/loongarch/include/asm/asmmacro-64.h create mode 100644 arch/loongarch/include/asm/asmmacro.h create mode 100644 arch/loongarch/include/asm/atomic.h create mode 100644 arch/loongarch/include/asm/barrier.h create mode 100644 arch/loongarch/include/asm/bitops.h create mode 100644 arch/loongarch/include/asm/bitrev.h create mode 100644 arch/loongarch/include/asm/bootinfo.h create mode 100644 arch/loongarch/include/asm/branch.h create mode 100644 arch/loongarch/include/asm/bug.h create mode 100644 arch/loongarch/include/asm/cache.h create mode 100644 arch/loongarch/include/asm/cacheflush.h create mode 100644 arch/loongarch/include/asm/cacheops.h create mode 100644 arch/loongarch/include/asm/clocksource.h create mode 100644 arch/loongarch/include/asm/cmpxchg.h create mode 100644 arch/loongarch/include/asm/compiler.h create mode 100644 arch/loongarch/include/asm/cpu-features.h create mode 100644 arch/loongarch/include/asm/cpu-info.h create mode 100644 arch/loongarch/include/asm/cpu.h create mode 100644 arch/loongarch/include/asm/cpufeature.h create mode 100644 arch/loongarch/include/asm/delay.h create mode 100644 arch/loongarch/include/asm/dma-direct.h create mode 100644 arch/loongarch/include/asm/dma.h create mode 100644 arch/loongarch/include/asm/dmi.h create mode 100644 arch/loongarch/include/asm/efi.h create mode 100644 arch/loongarch/include/asm/elf.h create mode 100644 arch/loongarch/include/asm/entry-common.h create mode 100644 arch/loongarch/include/asm/exec.h create mode 100644 arch/loongarch/include/asm/fb.h create mode 100644 arch/loongarch/include/asm/fixmap.h create mode 100644 arch/loongarch/include/asm/fpregdef.h create mode 100644 arch/loongarch/include/asm/fpu.h create mode 100644 arch/loongarch/include/asm/futex.h create mode 100644 arch/loongarch/include/asm/fw.h create mode 100644 arch/loongarch/include/asm/hardirq.h create mode 100644 arch/loongarch/include/asm/hugetlb.h create mode 100644 arch/loongarch/include/asm/hw_irq.h create mode 100644 arch/loongarch/include/asm/idle.h create mode 100644 arch/loongarch/include/asm/inst.h create mode 100644 arch/loongarch/include/asm/io.h create mode 100644 arch/loongarch/include/asm/irq.h create mode 100644 arch/loongarch/include/asm/irq_regs.h create mode 100644 arch/loongarch/include/asm/irqflags.h create mode 100644 arch/loongarch/include/asm/kdebug.h create mode 100644 arch/loongarch/include/asm/kmap_types.h create mode 100644 arch/loongarch/include/asm/linkage.h create mode 100644 arch/loongarch/include/asm/local.h create mode 100644 arch/loongarch/include/asm/loongarchregs.h create mode 100644 arch/loongarch/include/asm/mach-loongson64/boot_param.h create mode 100644 arch/loongarch/include/asm/mach-loongson64/irq.h create mode 100644 arch/loongarch/include/asm/mach-loongson64/loongson.h create mode 100644 arch/loongarch/include/asm/mmu.h create mode 100644 arch/loongarch/include/asm/mmu_context.h create mode 100644 arch/loongarch/include/asm/module.h create mode 100644 arch/loongarch/include/asm/page.h create mode 100644 arch/loongarch/include/asm/pci.h create mode 100644 arch/loongarch/include/asm/percpu.h create mode 100644 arch/loongarch/include/asm/perf_event.h create mode 100644 arch/loongarch/include/asm/pgalloc.h create mode 100644 arch/loongarch/include/asm/pgtable-64.h create mode 100644 arch/loongarch/include/asm/pgtable-bits.h create mode 100644 arch/loongarch/include/asm/pgtable.h create mode 100644 arch/loongarch/include/asm/prefetch.h create mode 100644 arch/loongarch/include/asm/processor.h create mode 100644 arch/loongarch/include/asm/ptrace.h create mode 100644 arch/loongarch/include/asm/reboot.h create mode 100644 arch/loongarch/include/asm/regdef.h create mode 100644 arch/loongarch/include/asm/serial.h create mode 100644 arch/loongarch/include/asm/setup.h create mode 100644 arch/loongarch/include/asm/shmparam.h create mode 100644 arch/loongarch/include/asm/sparsemem.h create mode 100644 arch/loongarch/include/asm/spinlock.h create mode 100644 arch/loongarch/include/asm/spinlock_types.h create mode 100644 arch/loongarch/include/asm/stackframe.h create mode 100644 arch/loongarch/include/asm/stacktrace.h create mode 100644 arch/loongarch/include/asm/string.h create mode 100644 arch/loongarch/include/asm/switch_to.h create mode 100644 arch/loongarch/include/asm/syscall.h create mode 100644 arch/loongarch/include/asm/thread_info.h create mode 100644 arch/loongarch/include/asm/time.h create mode 100644 arch/loongarch/include/asm/timex.h create mode 100644 arch/loongarch/include/asm/tlb.h create mode 100644 arch/loongarch/include/asm/tlbflush.h create mode 100644 arch/loongarch/include/asm/topology.h create mode 100644 arch/loongarch/include/asm/types.h create mode 100644 arch/loongarch/include/asm/uaccess.h create mode 100644 arch/loongarch/include/asm/unistd.h create mode 100644 arch/loongarch/include/asm/vdso.h create mode 100644 arch/loongarch/include/asm/vdso/clocksource.h create mode 100644 arch/loongarch/include/asm/vdso/gettimeofday.h create mode 100644 arch/loongarch/include/asm/vdso/processor.h create mode 100644 arch/loongarch/include/asm/vdso/vdso.h create mode 100644 arch/loongarch/include/asm/vdso/vsyscall.h create mode 100644 arch/loongarch/include/asm/vermagic.h create mode 100644 arch/loongarch/include/asm/vmalloc.h create mode 100644 arch/loongarch/include/uapi/asm/Kbuild create mode 100644 arch/loongarch/include/uapi/asm/auxvec.h create mode 100644 arch/loongarch/include/uapi/asm/bitfield.h create mode 100644 arch/loongarch/include/uapi/asm/bitsperlong.h create mode 100644 arch/loongarch/include/uapi/asm/break.h create mode 100644 arch/loongarch/include/uapi/asm/byteorder.h create mode 100644 arch/loongarch/include/uapi/asm/hwcap.h create mode 100644 arch/loongarch/include/uapi/asm/inst.h create mode 100644 arch/loongarch/include/uapi/asm/ptrace.h create mode 100644 arch/loongarch/include/uapi/asm/reg.h create mode 100644 arch/loongarch/include/uapi/asm/sigcontext.h create mode 100644 arch/loongarch/include/uapi/asm/signal.h create mode 100644 arch/loongarch/include/uapi/asm/swab.h create mode 100644 arch/loongarch/include/uapi/asm/ucontext.h create mode 100644 arch/loongarch/include/uapi/asm/unistd.h create mode 100644 arch/loongarch/kernel/Makefile create mode 100644 arch/loongarch/kernel/access-helper.h create mode 100644 arch/loongarch/kernel/acpi.c create mode 100644 arch/loongarch/kernel/asm-offsets.c create mode 100644 arch/loongarch/kernel/cacheinfo.c create mode 100644 arch/loongarch/kernel/cmdline.c create mode 100644 arch/loongarch/kernel/cmpxchg.c create mode 100644 arch/loongarch/kernel/cpu-probe.c create mode 100644 arch/loongarch/kernel/efi.c create mode 100644 arch/loongarch/kernel/elf.c create mode 100644 arch/loongarch/kernel/entry.S create mode 100644 arch/loongarch/kernel/fpu.S create mode 100644 arch/loongarch/kernel/genex.S create mode 100644 arch/loongarch/kernel/head.S create mode 100644 arch/loongarch/kernel/idle.c create mode 100644 arch/loongarch/kernel/io.c create mode 100644 arch/loongarch/kernel/irq.c create mode 100644 arch/loongarch/kernel/module.c create mode 100644 arch/loongarch/kernel/proc.c create mode 100644 arch/loongarch/kernel/process.c create mode 100644 arch/loongarch/kernel/ptrace.c create mode 100644 arch/loongarch/kernel/reset.c create mode 100644 arch/loongarch/kernel/setup.c create mode 100644 arch/loongarch/kernel/signal-common.h create mode 100644 arch/loongarch/kernel/signal.c create mode 100644 arch/loongarch/kernel/switch.S create mode 100644 arch/loongarch/kernel/syscall.c create mode 100644 arch/loongarch/kernel/time.c create mode 100644 arch/loongarch/kernel/topology.c create mode 100644 arch/loongarch/kernel/traps.c create mode 100644 arch/loongarch/kernel/unaligned.c create mode 100644 arch/loongarch/kernel/vdso.c create mode 100644 arch/loongarch/kernel/vmlinux.lds.S create mode 100644 arch/loongarch/lib/Makefile create mode 100644 arch/loongarch/lib/clear_user.S create mode 100644 arch/loongarch/lib/copy_user.S create mode 100644 arch/loongarch/lib/delay.c create mode 100644 arch/loongarch/lib/dump_tlb.c create mode 100644 arch/loongarch/lib/memcpy.S create mode 100644 arch/loongarch/lib/memmove.S create mode 100644 arch/loongarch/lib/memset.S create mode 100644 arch/loongarch/lib/strncpy_user.S create mode 100644 arch/loongarch/lib/strnlen_user.S create mode 100644 arch/loongarch/loongson64/Makefile create mode 100644 arch/loongarch/loongson64/Platform create mode 100644 arch/loongarch/loongson64/env.c create mode 100644 arch/loongarch/loongson64/init.c create mode 100644 arch/loongarch/loongson64/irq.c create mode 100644 arch/loongarch/loongson64/mem.c create mode 100644 arch/loongarch/loongson64/reset.c create mode 100644 arch/loongarch/loongson64/rtc.c create mode 100644 arch/loongarch/loongson64/setup.c create mode 100644 arch/loongarch/mm/Makefile create mode 100644 arch/loongarch/mm/cache.c create mode 100644 arch/loongarch/mm/extable.c create mode 100644 arch/loongarch/mm/fault.c create mode 100644 arch/loongarch/mm/hugetlbpage.c create mode 100644 arch/loongarch/mm/init.c create mode 100644 arch/loongarch/mm/ioremap.c create mode 100644 arch/loongarch/mm/maccess.c create mode 100644 arch/loongarch/mm/mmap.c create mode 100644 arch/loongarch/mm/page.S create mode 100644 arch/loongarch/mm/pgtable-64.c create mode 100644 arch/loongarch/mm/pgtable.c create mode 100644 arch/loongarch/mm/tlb.c create mode 100644 arch/loongarch/mm/tlbex.S create mode 100644 arch/loongarch/pci/Makefile create mode 100644 arch/loongarch/pci/acpi.c create mode 100644 arch/loongarch/pci/pci.c create mode 100644 arch/loongarch/vdso/Makefile create mode 100644 arch/loongarch/vdso/elf.S create mode 100755 arch/loongarch/vdso/gen_vdso_offsets.sh create mode 100644 arch/loongarch/vdso/sigreturn.S create mode 100644 arch/loongarch/vdso/vdso.S create mode 100644 arch/loongarch/vdso/vdso.lds.S create mode 100644 arch/loongarch/vdso/vgettimeofday.c diff --git a/arch/loongarch/.gitignore b/arch/loongarch/.gitignore new file mode 100644 index 000000000000..e9c2dedc1c5e --- /dev/null +++ b/arch/loongarch/.gitignore @@ -0,0 +1,9 @@ +*.lds +*.raw +calc_vmlinuz_load_addr +elf-entry +relocs +vmlinux.* +vmlinuz.* + +!kernel/vmlinux.lds.S diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild new file mode 100644 index 000000000000..e997ba117bd9 --- /dev/null +++ b/arch/loongarch/Kbuild @@ -0,0 +1,21 @@ +# Fail on warnings - also for files referenced in subdirs +# -Werror can be disabled for specific files using: +# CFLAGS_ := -Wno-error +ifeq ($(W),) +subdir-ccflags-y := -Werror +endif + +# platform specific definitions +include arch/loongarch/Kbuild.platforms +obj-y := $(platform-y) + +# make clean traverses $(obj-) without having included .config, so +# everything ends up here +obj- := $(platform-) + +# LoongArch object files +# The object files are linked as core-y files would be linked + +obj-y += kernel/ +obj-y += mm/ +obj-y += vdso/ diff --git a/arch/loongarch/Kbuild.platforms b/arch/loongarch/Kbuild.platforms new file mode 100644 index 000000000000..ad390d5c00f2 --- /dev/null +++ b/arch/loongarch/Kbuild.platforms @@ -0,0 +1,6 @@ +# All platforms listed in alphabetic order + +platforms += loongson64 + +# include the platform specific files +include $(patsubst %, $(srctree)/arch/loongarch/%/Platform, $(platforms)) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig new file mode 100644 index 000000000000..0d64f21d09b3 --- /dev/null +++ b/arch/loongarch/Kconfig @@ -0,0 +1,435 @@ +# SPDX-License-Identifier: GPL-2.0 +config LOONGARCH + bool + default y + select ACPI_MCFG if ACPI + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_PTE_SPECIAL if !32BIT + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_INLINE_READ_LOCK if !PREEMPTION + select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_SUPPORTS_ACPI + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if 64BIT + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select BUILDTIME_TABLE_SORT + select COMMON_CLK + select GENERIC_ATOMIC64 if !64BIT + select GENERIC_CLOCKEVENTS + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_ENTRY + select GENERIC_FIND_FIRST_BIT + select GENERIC_GETTIMEOFDAY + select GENERIC_IRQ_MULTI_HANDLER + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_LIB_ASHLDI3 + select GENERIC_LIB_ASHRDI3 + select GENERIC_LIB_CMPDI2 + select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_UCMPDI2 + select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK + select GENERIC_TIME_VSYSCALL + select HANDLE_DOMAIN_IRQ + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_COMPILER_H + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ASM_MODVERSIONS + select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_CONTIGUOUS + select HAVE_EXIT_THREAD + select HAVE_FAST_GUP + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_VDSO + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_TIF_NOHZ + select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT + select IRQ_FORCED_THREADING + select MODULES_USE_ELF_RELA if MODULES && 64BIT + select MODULES_USE_ELF_REL if MODULES + select PCI_DOMAINS_GENERIC if PCI + select PCI_ECAM if ACPI + select PCI_MSI_ARCH_FALLBACKS + select PERF_USE_VMALLOC + select RTC_LIB + select SYSCTL_EXCEPTION_TRACE + +menu "Machine selection" + +choice + prompt "System type" + default MACH_LOONGSON64 + +config MACH_LOONGSON64 + bool "Loongson 64-bit family of machines" + select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select HAVE_PCI + select PCI + select IRQ_LOONGARCH_CPU + select NR_CPUS_DEFAULT_4 + select SPARSE_IRQ + select SYS_HAS_CPU_LOONGSON64 + select SYS_SUPPORTS_64BIT_KERNEL + select ZONE_DMA32 + help + This enables the support of Loongson 64-bit family of machines. These + machines are based on new Loongson-3 processors (Old Loongson is MIPS + compatible, while new Loongson is based on LoongArch ISA). + +endchoice + +endmenu + +config GENERIC_HWEIGHT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config SCHED_OMIT_FRAME_POINTER + bool + default y + +config GENERIC_CSUM + def_bool y + +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config L1_CACHE_SHIFT + int + default "6" + +menu "CPU selection" + +choice + prompt "CPU type" + default CPU_LOONGSON64 + +config CPU_LOONGSON64 + bool "Loongson 64-bit CPU" + depends on SYS_HAS_CPU_LOONGSON64 + select CPU_SUPPORTS_64BIT_KERNEL + select GPIOLIB + select SWIOTLB + select ARCH_SUPPORTS_ATOMIC_RMW + help + The Loongson 64-bit processor implements the LoongArch64 (the 64-bit + version of LoongArch) instruction set. + +endchoice + +config SYS_HAS_CPU_LOONGSON64 + bool + +endmenu + +config SYS_SUPPORTS_32BIT_KERNEL + bool +config SYS_SUPPORTS_64BIT_KERNEL + bool +config CPU_SUPPORTS_32BIT_KERNEL + bool +config CPU_SUPPORTS_64BIT_KERNEL + bool + +menu "Kernel type" + +choice + prompt "Kernel code model" + help + You should only select this option if you have a workload that + actually benefits from 64-bit processing or if your machine has + large memory. You will only be presented a single option in this + menu if your system does not support both 32-bit and 64-bit kernels. + +config 32BIT + bool "32-bit kernel" + depends on CPU_SUPPORTS_32BIT_KERNEL && SYS_SUPPORTS_32BIT_KERNEL + help + Select this option if you want to build a 32-bit kernel. + +config 64BIT + bool "64-bit kernel" + depends on CPU_SUPPORTS_64BIT_KERNEL && SYS_SUPPORTS_64BIT_KERNEL + help + Select this option if you want to build a 64-bit kernel. + +endchoice + +config PAGE_SIZE_4KB + bool + +config PAGE_SIZE_16KB + bool + +config PAGE_SIZE_64KB + bool + +config PGTABLE_2LEVEL + bool + +config PGTABLE_3LEVEL + bool + +config PGTABLE_4LEVEL + bool + +config PGTABLE_LEVELS + int + default 2 if PGTABLE_2LEVEL + default 3 if PGTABLE_3LEVEL + default 4 if PGTABLE_4LEVEL + +choice + prompt "Page Table Layout" + default 16KB_2LEVEL if 32BIT + default 16KB_3LEVEL if 64BIT + help + Allows choosing the page table layout, which is a combination + of page size and page table levels. The virtual memory address + space bits are determined by the page table layout. + +config 4KB_3LEVEL + bool "4KB with 3 levels" + select PAGE_SIZE_4KB + select PGTABLE_3LEVEL + help + This option selects 4KB page size with 3 level page tables, which + support a maximum 39 bits of application virtual memory. + +config 4KB_4LEVEL + bool "4KB with 4 levels" + select PAGE_SIZE_4KB + select PGTABLE_4LEVEL + help + This option selects 4KB page size with 4 level page tables, which + support a maximum 48 bits of application virtual memory. + +config 16KB_2LEVEL + bool "16KB with 2 levels" + select PAGE_SIZE_16KB + select PGTABLE_2LEVEL + help + This option selects 16KB page size with 2 level page tables, which + support a maximum 36 bits of application virtual memory. + +config 16KB_3LEVEL + bool "16KB with 3 levels" + select PAGE_SIZE_16KB + select PGTABLE_3LEVEL + help + This option selects 16KB page size with 3 level page tables, which + support a maximum 47 bits of application virtual memory. + +config 64KB_2LEVEL + bool "64KB with 2 levels" + select PAGE_SIZE_64KB + select PGTABLE_2LEVEL + help + This option selects 64KB page size with 2 level page tables, which + support a maximum 42 bits of application virtual memory. + +config 64KB_3LEVEL + bool "64KB with 3 levels" + select PAGE_SIZE_64KB + select PGTABLE_3LEVEL + help + This option selects 64KB page size with 3 level page tables, which + support a maximum 55 bits of application virtual memory. + +endchoice + +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + range 14 64 if PAGE_SIZE_64KB + default "14" if PAGE_SIZE_64KB + range 12 64 if PAGE_SIZE_16KB + default "12" if PAGE_SIZE_16KB + range 11 64 + default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + The page size is not necessarily 4KB. Keep this in mind + when choosing a value for this option. + +config CPU_HAS_PREFETCH + bool + default y + +config CPU_HAS_FPU + bool + default y + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + help + Say Y to support efficient handling of sparse physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See for more. + +config DMI + bool "Enable DMI scanning" + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK + default y + help + Enabled scanning of DMI to identify machine quirks. Say Y + here unless you have verified that your setup is not + affected by entries in the DMI blacklist. Required by PNP + BIOS code. + +config EFI + bool "EFI runtime service support" + select UCS2_STRING + select EFI_RUNTIME_WRAPPERS + help + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. + +source "kernel/Kconfig.hz" + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc//seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on LOONGARCH + +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + depends on MEMORY_HOTPLUG + +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on TRANSPARENT_HUGEPAGE + +config ARCH_MEMORY_PROBE + def_bool y + depends on MEMORY_HOTPLUG + +config LOCKDEP_SUPPORT + bool + default y + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + +config MMU + bool + default y + +config ARCH_MMAP_RND_BITS_MIN + default 12 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 18 if 64BIT + default 15 + +config ZONE_DMA + bool + +config ZONE_DMA32 + bool + +menu "Bus options" + +endmenu + +menu "Power management options" + +source "drivers/acpi/Kconfig" + +endmenu + +source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile new file mode 100644 index 000000000000..a6e609d392ff --- /dev/null +++ b/arch/loongarch/Makefile @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +# +# Select the object file format to substitute into the linker script. +# +64bit-tool-archpref = loongarch64 +32bit-bfd = elf32-loongarch +64bit-bfd = elf64-loongarch +32bit-emul = elf32loongarch +64bit-emul = elf64loongarch + +ifdef CONFIG_64BIT +tool-archpref = $(64bit-tool-archpref) +UTS_MACHINE := loongarch64 +endif + +ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-linux-gnu- $(tool-archpref)-unknown-linux-gnu-) + endif +endif + +cflags-y += $(call cc-option, -mno-check-zero-division) + +ifdef CONFIG_64BIT +ld-emul = $(64bit-emul) +cflags-y += -mabi=lp64 +endif + +all-y := vmlinux + +# +# GCC uses -G0 -mabicalls -fpic as default. We don't want PIC in the kernel +# code since it only slows down the whole thing. At some point we might make +# use of global pointer optimizations but their use of $r2 conflicts with +# the current pointer optimization. +# +cflags-y += -G0 -pipe +cflags-y += -msoft-float +LDFLAGS_vmlinux += -G0 -static -n -nostdlib +KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel +KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel +KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs +KBUILD_CFLAGS_MODULE += -fno-plt -Wa,-mla-global-with-abs,-mla-local-with-abs + +cflags-y += -ffreestanding + +# +# Board-dependent options and extra files +# +include arch/loongarch/Kbuild.platforms + +ifdef CONFIG_PHYSICAL_START +load-y = $(CONFIG_PHYSICAL_START) +endif + +drivers-$(CONFIG_PCI) += arch/loongarch/pci/ + +KBUILD_AFLAGS += $(cflags-y) +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) + +bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) PLATFORM="$(platform-y)" + +ifdef CONFIG_64BIT +bootvars-y += ADDR_BITS=64 +endif + +# This is required to get dwarf unwinding tables into .debug_frame +# instead of .eh_frame so we don't discard them. +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += $(call cc-option,-mstrict-align) + +KBUILD_LDFLAGS += -m $(ld-emul) + +ifdef CONFIG_LOONGARCH +CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') +endif + +head-y := arch/loongarch/kernel/head.o + +libs-y += arch/loongarch/lib/ + +# See arch/loongarch/Kbuild for content of core part of the kernel +core-y += arch/loongarch/ + +ifeq ($(KBUILD_EXTMOD),) +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/loongarch/vdso include/generated/vdso-offsets.h +endif + +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/loongarch/vdso $@ + +# boot image targets (arch/loongarch/boot/) +boot-y := vmlinux.bin + +all: $(all-y) + +# boot +$(boot-y): vmlinux FORCE + $(Q)$(MAKE) $(build)=arch/loongarch/boot VMLINUX=vmlinux \ + $(bootvars-y) arch/loongarch/boot/$@ + +CLEAN_FILES += vmlinux + +install: + $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) + $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) + $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) + +define archhelp + echo ' install - install kernel into $(INSTALL_PATH)' + echo ' vmlinux.bin - Raw binary boot image' + echo +endef diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild new file mode 100644 index 000000000000..0c311f3098eb --- /dev/null +++ b/arch/loongarch/include/asm/Kbuild @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +generic-y += dma-contiguous.h +generic-y += export.h +generic-y += mcs_spinlock.h +generic-y += parport.h +generic-y += early_ioremap.h +generic-y += qrwlock.h +generic-y += qspinlock.h +generic-y += rwsem.h +generic-y += segment.h +generic-y += user.h +generic-y += stat.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += sembuf.h +generic-y += shmbuf.h +generic-y += statfs.h +generic-y += socket.h +generic-y += sockios.h +generic-y += termios.h +generic-y += termbits.h +generic-y += poll.h +generic-y += param.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += seccomp.h +generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/acenv.h b/arch/loongarch/include/asm/acenv.h new file mode 100644 index 000000000000..daff39155fac --- /dev/null +++ b/arch/loongarch/include/asm/acenv.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch specific ACPICA environments and implementation + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: lvjianmin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_ACENV_H +#define _ASM_LOONGARCH_ACENV_H + +/* The head file is required by ACPI core, but we have nothing to fill + * it now, update it later when needed. + */ + +#endif /* _ASM_LOONGARCH_ACENV_H */ diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h new file mode 100644 index 000000000000..978ac2419d70 --- /dev/null +++ b/arch/loongarch/include/asm/acpi.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Jianmin Lv + * Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_ACPI_H +#define _ASM_LOONGARCH_ACPI_H + +#ifdef CONFIG_ACPI +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_pci_disabled; +extern int acpi_noirq; + +#define acpi_os_ioremap acpi_os_ioremap +void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +static inline bool acpi_has_cpu_in_madt(void) +{ + return true; +} + +extern struct list_head acpi_wakeup_device_list; + +#endif /* !CONFIG_ACPI */ + +#define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT + +#endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h new file mode 100644 index 000000000000..f17e6c1cbc0c --- /dev/null +++ b/arch/loongarch/include/asm/addrspace.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ADDRSPACE_H +#define _ASM_ADDRSPACE_H + +#include + +#include + +/* + * This gives the physical RAM offset. + */ +#ifndef __ASSEMBLY__ +#ifndef PHYS_OFFSET +#define PHYS_OFFSET _AC(0, UL) +#endif +extern unsigned long vm_map_base; +#endif /* __ASSEMBLY__ */ + +#ifndef IO_BASE +#define IO_BASE CSR_DMW0_BASE +#endif + +#ifndef CAC_BASE +#define CAC_BASE CSR_DMW1_BASE +#endif + +#ifndef UNCAC_BASE +#define UNCAC_BASE CSR_DMW0_BASE +#endif + +#define DMW_PABITS 48 +#define TO_PHYS_MASK ((1ULL << DMW_PABITS) - 1) + +/* + * Memory above this physical address will be considered highmem. + */ +#ifndef HIGHMEM_START +#define HIGHMEM_START (_AC(1, UL) << _AC(DMW_PABITS, UL)) +#endif + +#define TO_PHYS(x) (((x) & TO_PHYS_MASK)) +#define TO_CAC(x) (CAC_BASE | ((x) & TO_PHYS_MASK)) +#define TO_UNCAC(x) (UNCAC_BASE | ((x) & TO_PHYS_MASK)) + +/* + * This handles the memory map. + */ +#ifndef PAGE_OFFSET +#define PAGE_OFFSET (CAC_BASE + PHYS_OFFSET) +#endif + +#ifndef FIXADDR_TOP +#define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) +#endif + +/* + * Configure language + */ +#ifdef __ASSEMBLY__ +#define _ATYPE_ +#define _ATYPE32_ +#define _ATYPE64_ +#define _CONST64_(x) x +#else +#define _ATYPE_ __PTRDIFF_TYPE__ +#define _ATYPE32_ int +#define _ATYPE64_ __s64 +#ifdef CONFIG_64BIT +#define _CONST64_(x) x ## L +#else +#define _CONST64_(x) x ## LL +#endif +#endif + +/* + * 32/64-bit LoongArch address spaces + */ +#ifdef __ASSEMBLY__ +#define _ACAST32_ +#define _ACAST64_ +#else +#define _ACAST32_ ((_ATYPE_)(_ATYPE32_)) /* widen if necessary */ +#define _ACAST64_ (_ATYPE64_) /* do _not_ narrow */ +#endif + +#ifdef CONFIG_32BIT + +#define UVRANGE 0x00000000 +#define KPRANGE0 0x80000000 +#define KPRANGE1 0xa0000000 +#define KVRANGE 0xc0000000 + +#else + +#define XUVRANGE _CONST64_(0x0000000000000000) +#define XSPRANGE _CONST64_(0x4000000000000000) +#define XKPRANGE _CONST64_(0x8000000000000000) +#define XKVRANGE _CONST64_(0xc000000000000000) + +#endif + +/* + * Returns the physical address of a KPRANGEx / XKPRANGE address + */ +#define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) + +#endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/asm-offsets.h b/arch/loongarch/include/asm/asm-offsets.h new file mode 100644 index 000000000000..b49215b280fe --- /dev/null +++ b/arch/loongarch/include/asm/asm-offsets.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..a63d09f8ee86 --- /dev/null +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h new file mode 100644 index 000000000000..d0ead65fa7fe --- /dev/null +++ b/arch/loongarch/include/asm/asm.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Some useful macros for LoongArch assembler code + * + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1995, 1996, 1997, 1999, 2001 by Ralf Baechle + * Copyright (C) 1999 by Silicon Graphics, Inc. + * Copyright (C) 2001 MIPS Technologies, Inc. + * Copyright (C) 2002 Maciej W. Rozycki + */ +#ifndef __ASM_ASM_H +#define __ASM_ASM_H + +/* LoongArch pref instruction. */ +#ifdef CONFIG_CPU_HAS_PREFETCH + +#define PREF(hint, addr, offs) \ + preld hint, addr, offs; \ + +#define PREFX(hint, addr, index) \ + preldx hint, addr, index; \ + +#else /* !CONFIG_CPU_HAS_PREFETCH */ + +#define PREF(hint, addr, offs) +#define PREFX(hint, addr, index) + +#endif /* !CONFIG_CPU_HAS_PREFETCH */ + +/* + * Stack alignment + */ +#define ALSZ 0xf +#define ALMASK ~ALSZ + +/* + * Macros to handle different pointer/register sizes for 32/64-bit code + */ + +/* + * Size of a register + */ +#ifndef __loongarch64 +#define SZREG 4 +#else +#define SZREG 8 +#endif + +/* + * Use the following macros in assemblercode to load/store registers, + * pointers etc. + */ +#if (SZREG == 4) +#define REG_L ld.w +#define REG_S st.w +#define REG_ADDU add.w +#define REG_SUBU sub.w +#else /* SZREG == 8 */ +#define REG_L ld.d +#define REG_S st.d +#define REG_ADDU add.d +#define REG_SUBU sub.d +#endif + +/* + * How to add/sub/load/store/shift C int variables. + */ +#if (_LOONGARCH_SZINT == 32) +#define INT_ADDU add.w +#define INT_ADDIU addi.w +#define INT_SUBU sub.w +#define INT_L ld.w +#define INT_S st.w +#define INT_SLL slli.w +#define INT_SLLV sll.w +#define INT_SRL srli.w +#define INT_SRLV srl.w +#define INT_SRA srai.w +#define INT_SRAV sra.w +#endif + +#if (_LOONGARCH_SZINT == 64) +#define INT_ADDU add.d +#define INT_ADDIU addi.d +#define INT_SUBU sub.d +#define INT_L ld.d +#define INT_S st.d +#define INT_SLL slli.d +#define INT_SLLV sll.d +#define INT_SRL srli.d +#define INT_SRLV srl.d +#define INT_SRA sra.w +#define INT_SRAV sra.d +#endif + +/* + * How to add/sub/load/store/shift C long variables. + */ +#if (_LOONGARCH_SZLONG == 32) +#define LONG_ADDU add.w +#define LONG_ADDIU addi.w +#define LONG_SUBU sub.w +#define LONG_L ld.w +#define LONG_S st.w +#define LONG_SP swp +#define LONG_SLL slli.w +#define LONG_SLLV sll.w +#define LONG_SRL srli.w +#define LONG_SRLV srl.w +#define LONG_SRA srai.w +#define LONG_SRAV sra.w + +#ifdef __ASSEMBLY__ +#define LONG .word +#endif +#define LONGSIZE 4 +#define LONGMASK 3 +#define LONGLOG 2 +#endif + +#if (_LOONGARCH_SZLONG == 64) +#define LONG_ADDU add.d +#define LONG_ADDIU addi.d +#define LONG_SUBU sub.d +#define LONG_L ld.d +#define LONG_S st.d +#define LONG_SP sdp +#define LONG_SLL slli.d +#define LONG_SLLV sll.d +#define LONG_SRL srli.d +#define LONG_SRLV srl.d +#define LONG_SRA sra.w +#define LONG_SRAV sra.d + +#ifdef __ASSEMBLY__ +#define LONG .dword +#endif +#define LONGSIZE 8 +#define LONGMASK 7 +#define LONGLOG 3 +#endif + +/* + * How to add/sub/load/store/shift pointers. + */ +#if (_LOONGARCH_SZPTR == 32) +#define PTR_ADDU add.w +#define PTR_ADDIU addi.w +#define PTR_SUBU sub.w +#define PTR_L ld.w +#define PTR_S st.w +#define PTR_LI li.w +#define PTR_SLL slli.w +#define PTR_SLLV sll.w +#define PTR_SRL srli.w +#define PTR_SRLV srl.w +#define PTR_SRA srai.w +#define PTR_SRAV sra.w + +#define PTR_SCALESHIFT 2 + +#define PTR .word +#define PTRSIZE 4 +#define PTRLOG 2 +#endif + +#if (_LOONGARCH_SZPTR == 64) +#define PTR_ADDU add.d +#define PTR_ADDIU addi.d +#define PTR_SUBU sub.d +#define PTR_L ld.d +#define PTR_S st.d +#define PTR_LI li.d +#define PTR_SLL slli.d +#define PTR_SLLV sll.d +#define PTR_SRL srli.d +#define PTR_SRLV srl.d +#define PTR_SRA srai.d +#define PTR_SRAV sra.d + +#define PTR_SCALESHIFT 3 + +#define PTR .dword +#define PTRSIZE 8 +#define PTRLOG 3 +#endif + +#endif /* __ASM_ASM_H */ diff --git a/arch/loongarch/include/asm/asmmacro-64.h b/arch/loongarch/include/asm/asmmacro-64.h new file mode 100644 index 000000000000..90843d1002d9 --- /dev/null +++ b/arch/loongarch/include/asm/asmmacro-64.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * asmmacro.h: Assembler macros to make things easier to read. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ASMMACRO_64_H +#define _ASM_ASMMACRO_64_H + +#include +#include +#include +#include + + .macro cpu_save_nonscratch thread + stptr.d s0, \thread, THREAD_REG23 + stptr.d s1, \thread, THREAD_REG24 + stptr.d s2, \thread, THREAD_REG25 + stptr.d s3, \thread, THREAD_REG26 + stptr.d s4, \thread, THREAD_REG27 + stptr.d s5, \thread, THREAD_REG28 + stptr.d s6, \thread, THREAD_REG29 + stptr.d s7, \thread, THREAD_REG30 + stptr.d s8, \thread, THREAD_REG31 + stptr.d sp, \thread, THREAD_REG03 + stptr.d fp, \thread, THREAD_REG22 + .endm + + .macro cpu_restore_nonscratch thread + ldptr.d s0, \thread, THREAD_REG23 + ldptr.d s1, \thread, THREAD_REG24 + ldptr.d s2, \thread, THREAD_REG25 + ldptr.d s3, \thread, THREAD_REG26 + ldptr.d s4, \thread, THREAD_REG27 + ldptr.d s5, \thread, THREAD_REG28 + ldptr.d s6, \thread, THREAD_REG29 + ldptr.d s7, \thread, THREAD_REG30 + ldptr.d s8, \thread, THREAD_REG31 + ldptr.d sp, \thread, THREAD_REG03 + ldptr.d fp, \thread, THREAD_REG22 + ldptr.d ra, \thread, THREAD_REG01 + .endm + +#endif /* _ASM_ASMMACRO_64_H */ diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h new file mode 100644 index 000000000000..a88d05a271f4 --- /dev/null +++ b/arch/loongarch/include/asm/asmmacro.h @@ -0,0 +1,277 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_ASMMACRO_H +#define _ASM_ASMMACRO_H + +#include + +#ifdef CONFIG_32BIT +#include +#endif +#ifdef CONFIG_64BIT +#include +#endif + +#undef v0 +#undef v1 + + .macro parse_v var val + \var = \val + .endm + + .macro parse_r var r + \var = -1 + .ifc \r, $r0 + \var = 0 + .endif + .ifc \r, $r1 + \var = 1 + .endif + .ifc \r, $r2 + \var = 2 + .endif + .ifc \r, $r3 + \var = 3 + .endif + .ifc \r, $r4 + \var = 4 + .endif + .ifc \r, $r5 + \var = 5 + .endif + .ifc \r, $r6 + \var = 6 + .endif + .ifc \r, $r7 + \var = 7 + .endif + .ifc \r, $r8 + \var = 8 + .endif + .ifc \r, $r9 + \var = 9 + .endif + .ifc \r, $r10 + \var = 10 + .endif + .ifc \r, $r11 + \var = 11 + .endif + .ifc \r, $r12 + \var = 12 + .endif + .ifc \r, $r13 + \var = 13 + .endif + .ifc \r, $r14 + \var = 14 + .endif + .ifc \r, $r15 + \var = 15 + .endif + .ifc \r, $r16 + \var = 16 + .endif + .ifc \r, $r17 + \var = 17 + .endif + .ifc \r, $r18 + \var = 18 + .endif + .ifc \r, $r19 + \var = 19 + .endif + .ifc \r, $r20 + \var = 20 + .endif + .ifc \r, $r21 + \var = 21 + .endif + .ifc \r, $r22 + \var = 22 + .endif + .ifc \r, $r23 + \var = 23 + .endif + .ifc \r, $r24 + \var = 24 + .endif + .ifc \r, $r25 + \var = 25 + .endif + .ifc \r, $r26 + \var = 26 + .endif + .ifc \r, $r27 + \var = 27 + .endif + .ifc \r, $r28 + \var = 28 + .endif + .ifc \r, $r29 + \var = 29 + .endif + .ifc \r, $r30 + \var = 30 + .endif + .ifc \r, $r31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro fpu_save_csr thread tmp + movfcsr2gr \tmp, fcsr0 + stptr.w \tmp, \thread, THREAD_FCSR + .endm + + .macro fpu_restore_csr thread tmp + ldptr.w \tmp, \thread, THREAD_FCSR + movgr2fcsr fcsr0, \tmp + .endm + + .macro fpu_save_cc thread tmp0 tmp1 + movcf2gr \tmp0, $fcc0 + move \tmp1, \tmp0 + movcf2gr \tmp0, $fcc1 + bstrins.d \tmp1, \tmp0, 15, 8 + movcf2gr \tmp0, $fcc2 + bstrins.d \tmp1, \tmp0, 23, 16 + movcf2gr \tmp0, $fcc3 + bstrins.d \tmp1, \tmp0, 31, 24 + movcf2gr \tmp0, $fcc4 + bstrins.d \tmp1, \tmp0, 39, 32 + movcf2gr \tmp0, $fcc5 + bstrins.d \tmp1, \tmp0, 47, 40 + movcf2gr \tmp0, $fcc6 + bstrins.d \tmp1, \tmp0, 55, 48 + movcf2gr \tmp0, $fcc7 + bstrins.d \tmp1, \tmp0, 63, 56 + stptr.d \tmp1, \thread, THREAD_FCC + .endm + + .macro fpu_restore_cc thread tmp0 tmp1 + ldptr.d \tmp0, \thread, THREAD_FCC + bstrpick.d \tmp1, \tmp0, 7, 0 + movgr2cf $fcc0, \tmp1 + bstrpick.d \tmp1, \tmp0, 15, 8 + movgr2cf $fcc1, \tmp1 + bstrpick.d \tmp1, \tmp0, 23, 16 + movgr2cf $fcc2, \tmp1 + bstrpick.d \tmp1, \tmp0, 31, 24 + movgr2cf $fcc3, \tmp1 + bstrpick.d \tmp1, \tmp0, 39, 32 + movgr2cf $fcc4, \tmp1 + bstrpick.d \tmp1, \tmp0, 47, 40 + movgr2cf $fcc5, \tmp1 + bstrpick.d \tmp1, \tmp0, 55, 48 + movgr2cf $fcc6, \tmp1 + bstrpick.d \tmp1, \tmp0, 63, 56 + movgr2cf $fcc7, \tmp1 + .endm + + .macro fpu_save_double thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \tmp, \thread + fst.d $f0, \tmp, THREAD_FPR0 - THREAD_FPR0 + fst.d $f1, \tmp, THREAD_FPR1 - THREAD_FPR0 + fst.d $f2, \tmp, THREAD_FPR2 - THREAD_FPR0 + fst.d $f3, \tmp, THREAD_FPR3 - THREAD_FPR0 + fst.d $f4, \tmp, THREAD_FPR4 - THREAD_FPR0 + fst.d $f5, \tmp, THREAD_FPR5 - THREAD_FPR0 + fst.d $f6, \tmp, THREAD_FPR6 - THREAD_FPR0 + fst.d $f7, \tmp, THREAD_FPR7 - THREAD_FPR0 + fst.d $f8, \tmp, THREAD_FPR8 - THREAD_FPR0 + fst.d $f9, \tmp, THREAD_FPR9 - THREAD_FPR0 + fst.d $f10, \tmp, THREAD_FPR10 - THREAD_FPR0 + fst.d $f11, \tmp, THREAD_FPR11 - THREAD_FPR0 + fst.d $f12, \tmp, THREAD_FPR12 - THREAD_FPR0 + fst.d $f13, \tmp, THREAD_FPR13 - THREAD_FPR0 + fst.d $f14, \tmp, THREAD_FPR14 - THREAD_FPR0 + fst.d $f15, \tmp, THREAD_FPR15 - THREAD_FPR0 + fst.d $f16, \tmp, THREAD_FPR16 - THREAD_FPR0 + fst.d $f17, \tmp, THREAD_FPR17 - THREAD_FPR0 + fst.d $f18, \tmp, THREAD_FPR18 - THREAD_FPR0 + fst.d $f19, \tmp, THREAD_FPR19 - THREAD_FPR0 + fst.d $f20, \tmp, THREAD_FPR20 - THREAD_FPR0 + fst.d $f21, \tmp, THREAD_FPR21 - THREAD_FPR0 + fst.d $f22, \tmp, THREAD_FPR22 - THREAD_FPR0 + fst.d $f23, \tmp, THREAD_FPR23 - THREAD_FPR0 + fst.d $f24, \tmp, THREAD_FPR24 - THREAD_FPR0 + fst.d $f25, \tmp, THREAD_FPR25 - THREAD_FPR0 + fst.d $f26, \tmp, THREAD_FPR26 - THREAD_FPR0 + fst.d $f27, \tmp, THREAD_FPR27 - THREAD_FPR0 + fst.d $f28, \tmp, THREAD_FPR28 - THREAD_FPR0 + fst.d $f29, \tmp, THREAD_FPR29 - THREAD_FPR0 + fst.d $f30, \tmp, THREAD_FPR30 - THREAD_FPR0 + fst.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro fpu_restore_double thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \tmp, \thread + fld.d $f0, \tmp, THREAD_FPR0 - THREAD_FPR0 + fld.d $f1, \tmp, THREAD_FPR1 - THREAD_FPR0 + fld.d $f2, \tmp, THREAD_FPR2 - THREAD_FPR0 + fld.d $f3, \tmp, THREAD_FPR3 - THREAD_FPR0 + fld.d $f4, \tmp, THREAD_FPR4 - THREAD_FPR0 + fld.d $f5, \tmp, THREAD_FPR5 - THREAD_FPR0 + fld.d $f6, \tmp, THREAD_FPR6 - THREAD_FPR0 + fld.d $f7, \tmp, THREAD_FPR7 - THREAD_FPR0 + fld.d $f8, \tmp, THREAD_FPR8 - THREAD_FPR0 + fld.d $f9, \tmp, THREAD_FPR9 - THREAD_FPR0 + fld.d $f10, \tmp, THREAD_FPR10 - THREAD_FPR0 + fld.d $f11, \tmp, THREAD_FPR11 - THREAD_FPR0 + fld.d $f12, \tmp, THREAD_FPR12 - THREAD_FPR0 + fld.d $f13, \tmp, THREAD_FPR13 - THREAD_FPR0 + fld.d $f14, \tmp, THREAD_FPR14 - THREAD_FPR0 + fld.d $f15, \tmp, THREAD_FPR15 - THREAD_FPR0 + fld.d $f16, \tmp, THREAD_FPR16 - THREAD_FPR0 + fld.d $f17, \tmp, THREAD_FPR17 - THREAD_FPR0 + fld.d $f18, \tmp, THREAD_FPR18 - THREAD_FPR0 + fld.d $f19, \tmp, THREAD_FPR19 - THREAD_FPR0 + fld.d $f20, \tmp, THREAD_FPR20 - THREAD_FPR0 + fld.d $f21, \tmp, THREAD_FPR21 - THREAD_FPR0 + fld.d $f22, \tmp, THREAD_FPR22 - THREAD_FPR0 + fld.d $f23, \tmp, THREAD_FPR23 - THREAD_FPR0 + fld.d $f24, \tmp, THREAD_FPR24 - THREAD_FPR0 + fld.d $f25, \tmp, THREAD_FPR25 - THREAD_FPR0 + fld.d $f26, \tmp, THREAD_FPR26 - THREAD_FPR0 + fld.d $f27, \tmp, THREAD_FPR27 - THREAD_FPR0 + fld.d $f28, \tmp, THREAD_FPR28 - THREAD_FPR0 + fld.d $f29, \tmp, THREAD_FPR29 - THREAD_FPR0 + fld.d $f30, \tmp, THREAD_FPR30 - THREAD_FPR0 + fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + +.macro jr dst + jirl zero, \dst, 0 +.endm + +.macro jalr dst + jirl ra, \dst, 0 +.endm + +.macro not dst src + nor \dst, \src, zero +.endm + +.macro bgt r0 r1 label + blt \r1, \r0, \label +.endm + +.macro bltz r0 label + blt \r0, zero, \label +.endm + +.macro bgez r0 label + bge \r0, zero, \label +.endm + +#define v0 $r4 +#define v1 $r5 +#endif /* _ASM_ASMMACRO_H */ diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h new file mode 100644 index 000000000000..d2a4f29f9a5e --- /dev/null +++ b/arch/loongarch/include/asm/atomic.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * But use these as seldom as possible since they are much more slower + * than regular operations. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_ATOMIC_H +#define _ASM_ATOMIC_H + +#include +#include +#include +#include +#include + +#if _LOONGARCH_SZLONG == 32 +#define __LL "ll.w " +#define __SC "sc.w " +#define __AMADD "amadd.w " +#define __AMAND_SYNC "amand_db.w " +#define __AMOR_SYNC "amor_db.w " +#define __AMXOR_SYNC "amxor_db.w " +#elif _LOONGARCH_SZLONG == 64 +#define __LL "ll.d " +#define __SC "sc.d " +#define __AMADD "amadd.d " +#define __AMAND_SYNC "amand_db.d " +#define __AMOR_SYNC "amor_db.d " +#define __AMXOR_SYNC "amxor_db.d " +#endif + +#define ATOMIC_INIT(i) { (i) } + +/* + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) READ_ONCE((v)->counter) + +/* + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) + +#define ATOMIC_OP(op, I, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ +static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC_FETCH_OP(op, I, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, I, asm_op, c_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(add, i, add, +) +ATOMIC_OPS(sub, -i, add, +) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS + +#define ATOMIC_OPS(op, I, asm_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(and, i, and) +ATOMIC_OPS(or, i, or) +ATOMIC_OPS(xor, i, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int prev, rc; + + __asm__ __volatile__ ( + "0: ll.w %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add.w %[rc], %[p], %[a]\n" + " sc.w %[rc], %[c]\n" + " beqz %[rc], 0b\n" + " b 2f\n" + "1:\n" + __WEAK_LLSC_MB + "2:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), + [c]"=ZB" (v->counter) + : [a]"r" (a), [u]"r" (u) + : "memory"); + + return prev; +} +#define atomic_fetch_add_unless atomic_fetch_add_unless + +/* + * atomic_sub_if_positive - conditionally subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static inline int atomic_sub_if_positive(int i, atomic_t *v) +{ + int result; + int temp; + + if (__builtin_constant_p(i)) { + __asm__ __volatile__( + "1: ll.w %1, %2 # atomic_sub_if_positive\n" + " addi.w %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.w %1, %2 \n" + " beq $zero, %1, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "I" (-i)); + } else { + __asm__ __volatile__( + "1: ll.w %1, %2 # atomic_sub_if_positive\n" + " sub.w %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.w %1, %2 \n" + " beq $zero, %1, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "r" (i)); + } + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) + +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + */ +#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + +#ifdef CONFIG_64BIT + +#define ATOMIC64_INIT(i) { (i) } + +/* + * atomic64_read - read atomic variable + * @v: pointer of type atomic64_t + * + */ +#define atomic64_read(v) READ_ONCE((v)->counter) + +/* + * atomic64_set - set atomic variable + * @v: pointer of type atomic64_t + * @i: required value + */ +#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) + +#define ATOMIC64_OP(op, I, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ +static inline long atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ +{ \ + long result; \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC64_FETCH_OP(op, I, asm_op) \ +static inline long atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ +{ \ + long result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC64_OPS(op, I, asm_op, c_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ + ATOMIC64_FETCH_OP(op, I, asm_op) + +ATOMIC64_OPS(add, i, add, +) +ATOMIC64_OPS(sub, -i, add, +) + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS + +#define ATOMIC64_OPS(op, I, asm_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_FETCH_OP(op, I, asm_op) + +ATOMIC64_OPS(and, i, and) +ATOMIC64_OPS(or, i, or) +ATOMIC64_OPS(xor, i, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +{ + long prev, rc; + + __asm__ __volatile__ ( + "0: ll.d %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add.d %[rc], %[p], %[a]\n" + " sc.d %[rc], %[c]\n" + " beqz %[rc], 0b\n" + " b 2f\n" + "1:\n" + __WEAK_LLSC_MB + "2:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), + [c] "=ZB" (v->counter) + : [a]"r" (a), [u]"r" (u) + : "memory"); + + return prev; +} +#define atomic64_fetch_add_unless atomic64_fetch_add_unless + +/* + * atomic64_sub_if_positive - conditionally subtract integer from atomic + * variable + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static inline long atomic64_sub_if_positive(long i, atomic64_t *v) +{ + long result; + long temp; + + if (__builtin_constant_p(i)) { + __asm__ __volatile__( + "1: ll.d %1, %2 # atomic64_sub_if_positive \n" + " addi.d %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.d %1, %2 \n" + " beq %1, $zero, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "I" (-i)); + } else { + __asm__ __volatile__( + "1: ll.d %1, %2 # atomic64_sub_if_positive \n" + " sub.d %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.d %1, %2 \n" + " beq %1, $zero, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "r" (i)); + } + + return result; +} + +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + */ +#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v) + +#endif /* CONFIG_64BIT */ + +#endif /* _ASM_ATOMIC_H */ diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h new file mode 100644 index 000000000000..3ed771ed63a8 --- /dev/null +++ b/arch/loongarch/include/asm/barrier.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#include + +#define __sync() __asm__ __volatile__("dbar 0" : : : "memory") + +#define fast_wmb() __sync() +#define fast_rmb() __sync() +#define fast_mb() __sync() +#define fast_iob() __sync() +#define wbflush() __sync() + +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define mb() fast_mb() +#define iob() fast_iob() + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (@index < @size) + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + __asm__ __volatile__( + "sltu %0, %1, %2\n\t" +#if (_LOONGARCH_SZLONG == 32) + "sub.w %0, $r0, %0\n\t" +#elif (_LOONGARCH_SZLONG == 64) + "sub.d %0, $r0, %0\n\t" +#endif + : "=r" (mask) + : "r" (index), "r" (size) + :); + + return mask; +} + +#include + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/loongarch/include/asm/bitops.h b/arch/loongarch/include/asm/bitops.h new file mode 100644 index 000000000000..c7384870267b --- /dev/null +++ b/arch/loongarch/include/asm/bitops.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BITOPS_H +#define _ASM_BITOPS_H + +#include + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#endif /* _ASM_BITOPS_H */ diff --git a/arch/loongarch/include/asm/bitrev.h b/arch/loongarch/include/asm/bitrev.h new file mode 100644 index 000000000000..38147173f2ad --- /dev/null +++ b/arch/loongarch/include/asm/bitrev.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __LOONGARCH_ASM_BITREV_H__ +#define __LOONGARCH_ASM_BITREV_H__ + +#include + +static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x) +{ + u32 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(__swab32(x))); + return ret; +} + +static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x) +{ + u16 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(__swab16(x))); + return ret; +} + +static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x) +{ + u8 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(x)); + return ret; +} + +#endif /* __LOONGARCH_ASM_BITREV_H__ */ diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h new file mode 100644 index 000000000000..669d45d958be --- /dev/null +++ b/arch/loongarch/include/asm/bootinfo.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_BOOTINFO_H +#define _ASM_BOOTINFO_H + +#include +#include + +const char *get_system_type(void); + +extern void early_memblock_init(void); +extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max); + +extern void early_init(void); +extern void platform_init(void); + +/* + * Initial kernel command line, usually setup by fw_init_cmdline() + */ +extern char arcs_cmdline[COMMAND_LINE_SIZE]; + +/* + * Registers a0, a1, a2 and a3 as passed to the kernel entry by firmware + */ +extern unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; + +extern unsigned long initrd_start, initrd_end; + +/* + * Platform memory detection hook called by setup_arch + */ +extern void plat_mem_setup(void); + +#endif /* _ASM_BOOTINFO_H */ diff --git a/arch/loongarch/include/asm/branch.h b/arch/loongarch/include/asm/branch.h new file mode 100644 index 000000000000..830279859f4c --- /dev/null +++ b/arch/loongarch/include/asm/branch.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_BRANCH_H +#define _ASM_BRANCH_H + +#include + +static inline unsigned long exception_era(struct pt_regs *regs) +{ + return regs->csr_era; +} + +static inline int compute_return_era(struct pt_regs *regs) +{ + regs->csr_era += 4; + return 0; +} + +#endif /* _ASM_BRANCH_H */ diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h new file mode 100644 index 000000000000..bda49108a76d --- /dev/null +++ b/arch/loongarch/include/asm/bug.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BUG_H +#define __ASM_BUG_H + +#include + +#ifdef CONFIG_BUG + +#include + +static inline void __noreturn BUG(void) +{ + __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); + unreachable(); +} + +#define HAVE_ARCH_BUG + +#endif + +#include + +#endif /* __ASM_BUG_H */ diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h new file mode 100644 index 000000000000..62eb6271af42 --- /dev/null +++ b/arch/loongarch/include/asm/cache.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CACHE_H +#define _ASM_CACHE_H + +#define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif /* _ASM_CACHE_H */ diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h new file mode 100644 index 000000000000..9876f527e31e --- /dev/null +++ b/arch/loongarch/include/asm/cacheflush.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CACHEFLUSH_H +#define _ASM_CACHEFLUSH_H + +#include +#include +#include + + /* + * This flag is used to indicate that the page pointed to by a pte + * is dirty and requires cleaning before returning it to the user. + */ +#define PG_dcache_dirty PG_arch_1 + +#define Page_dcache_dirty(page) \ + test_bit(PG_dcache_dirty, &(page)->flags) +#define SetPageDcacheDirty(page) \ + set_bit(PG_dcache_dirty, &(page)->flags) +#define ClearPageDcacheDirty(page) \ + clear_bit(PG_dcache_dirty, &(page)->flags) + +extern void local_flush_icache_range(unsigned long start, unsigned long end); + +#define flush_icache_range local_flush_icache_range +#define flush_icache_user_range local_flush_icache_range + +extern void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len); + +extern void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len); + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) +#define flush_icache_page(vma, page) do { } while (0) +#define flush_icache_user_page(vma, page, addr, len) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) + +#define cache_op(op, addr) \ + __asm__ __volatile__( \ + " cacop %0, %1 \n" \ + : \ + : "i" (op), "ZC" (*(unsigned char *)(addr))) + +static inline void flush_icache_line_indexed(unsigned long addr) +{ + cache_op(Index_Invalidate_I, addr); +} + +static inline void flush_dcache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_D, addr); +} + +static inline void flush_vcache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_V, addr); +} + +static inline void flush_scache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_S, addr); +} + +static inline void flush_icache_line(unsigned long addr) +{ + cache_op(Hit_Invalidate_I, addr); +} + +static inline void flush_dcache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_D, addr); +} + +static inline void flush_vcache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_V, addr); +} + +static inline void flush_scache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_S, addr); +} + +#endif /* _ASM_CACHEFLUSH_H */ diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h new file mode 100644 index 000000000000..8d7649a918fe --- /dev/null +++ b/arch/loongarch/include/asm/cacheops.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Cache operations for the cache instruction. + */ +#ifndef __ASM_CACHEOPS_H +#define __ASM_CACHEOPS_H + +/* + * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * bit field identifying the cache operation. + */ +#define CacheOp_Cache 0x03 +#define CacheOp_Op 0x1c + +#define Cache_I 0x00 +#define Cache_D 0x01 +#define Cache_V 0x02 +#define Cache_S 0x03 + +#define Index_Invalidate 0x08 +#define Index_Writeback_Inv 0x08 +#define Hit_Invalidate 0x10 +#define Hit_Writeback_Inv 0x10 +#define CacheOp_User_Defined 0x18 + +#define Index_Invalidate_I (Cache_I | Index_Invalidate) +#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) +#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) +#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) +#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) +#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) + +#endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/clocksource.h b/arch/loongarch/include/asm/clocksource.h new file mode 100644 index 000000000000..803f0b42b28c --- /dev/null +++ b/arch/loongarch/include/asm/clocksource.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef __ASM_CLOCKSOURCE_H +#define __ASM_CLOCKSOURCE_H + +#include + +#endif /* __ASM_CLOCKSOURCE_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h new file mode 100644 index 000000000000..7233176d751b --- /dev/null +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include + +#define __xchg_asm(amswap_db, m, val) \ +({ \ + __typeof(val) __ret; \ + \ + __asm__ __volatile__ ( \ + " "amswap_db" %1, %z2, %0 \n" \ + : "+ZB" (*m), "=&r" (__ret) \ + : "Jr" (val) \ + : "memory"); \ + \ + __ret; \ +}) + +extern unsigned long __xchg_small(volatile void *ptr, unsigned long x, + unsigned int size); + +static inline unsigned long __xchg(volatile void *ptr, unsigned long x, + int size) +{ + switch (size) { + case 1: + case 2: + return __xchg_small(ptr, x, size); + + case 4: + return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); + + case 8: + return __xchg_asm("amswap_db.d", (volatile u64 *)ptr, (u64)x); + + default: + BUILD_BUG(); + } + + return 0; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __res; \ + \ + __res = (__typeof__(*(ptr))) \ + __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \ + \ + __res; \ +}) + +#define __cmpxchg_asm(ld, st, m, old, new) \ +({ \ + __typeof(old) __ret; \ + \ + __asm__ __volatile__( \ + "1: " ld " %0, %2 # __cmpxchg_asm \n" \ + " bne %0, %z3, 2f \n" \ + " or $t0, %z4, $zero \n" \ + " " st " $t0, %1 \n" \ + " beq $zero, $t0, 1b \n" \ + "2: \n" \ + : "=&r" (__ret), "=ZB"(*m) \ + : "ZB"(*m), "Jr" (old), "Jr" (new) \ + : "t0", "memory"); \ + \ + __ret; \ +}) + +extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size); + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) +{ + switch (size) { + case 1: + case 2: + return __cmpxchg_small(ptr, old, new, size); + + case 4: + return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr, + (u32)old, new); + + case 8: + return __cmpxchg_asm("ll.d", "sc.d", (volatile u64 *)ptr, + (u64)old, new); + + default: + BUILD_BUG(); + } + + return 0; +} + +#define cmpxchg_local(ptr, old, new) \ + ((__typeof__(*(ptr))) \ + __cmpxchg((ptr), \ + (unsigned long)(__typeof__(*(ptr)))(old), \ + (unsigned long)(__typeof__(*(ptr)))(new), \ + sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ +({ \ + __typeof__(*(ptr)) __res; \ + \ + __res = cmpxchg_local((ptr), (old), (new)); \ + \ + __res; \ +}) + +#ifdef CONFIG_64BIT +#define cmpxchg64_local(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ + }) + +#define cmpxchg64(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ + }) +#else +#include +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n)) +#endif + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/loongarch/include/asm/compiler.h b/arch/loongarch/include/asm/compiler.h new file mode 100644 index 000000000000..2dc4d84b2a32 --- /dev/null +++ b/arch/loongarch/include/asm/compiler.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef _ASM_COMPILER_H +#define _ASM_COMPILER_H + +#define GCC_OFF_SMALL_ASM() "ZC" + +#define LOONGARCH_ISA_LEVEL "loongarch" +#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch" +#define LOONGARCH_ISA_LEVEL_RAW loongarch +#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW + +#endif /* _ASM_COMPILER_H */ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h new file mode 100644 index 000000000000..dca5c43851f2 --- /dev/null +++ b/arch/loongarch/include/asm/cpu-features.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CPU_FEATURES_H +#define __ASM_CPU_FEATURES_H + +#include +#include + +#define cpu_opt(opt) (cpu_data[0].options & (opt)) +#define cpu_has(feat) (cpu_data[0].options & BIT_ULL(feat)) + +#define cpu_has_loongarch (cpu_has_loongarch32 | cpu_has_loongarch64) +#define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) +#define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) + +#define cpu_icache_line_size() cpu_data[0].icache.linesz +#define cpu_dcache_line_size() cpu_data[0].dcache.linesz +#define cpu_vcache_line_size() cpu_data[0].vcache.linesz +#define cpu_scache_line_size() cpu_data[0].scache.linesz + +#ifdef CONFIG_32BIT +# define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) +# define cpu_vabits 31 +# define cpu_pabits 31 +#endif + +#ifdef CONFIG_64BIT +# define cpu_has_64bits 1 +# define cpu_vabits cpu_data[0].vabits +# define cpu_pabits cpu_data[0].pabits +# define __NEED_ADDRBITS_PROBE +#endif + +/* + * SMP assumption: Options of CPU 0 are a superset of all processors. + * This is true for all known LoongArch systems. + */ +#define cpu_has_cpucfg cpu_opt(LOONGARCH_CPU_CPUCFG) +#define cpu_has_lam cpu_opt(LOONGARCH_CPU_LAM) +#define cpu_has_ual cpu_opt(LOONGARCH_CPU_UAL) +#define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) +#define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX) +#define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX) +#define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX) +#define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO) +#define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ) +#define cpu_has_lbt_x86 cpu_opt(LOONGARCH_CPU_LBT_X86) +#define cpu_has_lbt_arm cpu_opt(LOONGARCH_CPU_LBT_ARM) +#define cpu_has_lbt_mips cpu_opt(LOONGARCH_CPU_LBT_MIPS) +#define cpu_has_lbt (cpu_has_lbt_x86|cpu_has_lbt_arm|cpu_has_lbt_mips) +#define cpu_has_csr cpu_opt(LOONGARCH_CPU_CSR) +#define cpu_has_tlb cpu_opt(LOONGARCH_CPU_TLB) +#define cpu_has_watch cpu_opt(LOONGARCH_CPU_WATCH) +#define cpu_has_vint cpu_opt(LOONGARCH_CPU_VINT) +#define cpu_has_csripi cpu_opt(LOONGARCH_CPU_CSRIPI) +#define cpu_has_extioi cpu_opt(LOONGARCH_CPU_EXTIOI) +#define cpu_has_prefetch cpu_opt(LOONGARCH_CPU_PREFETCH) +#define cpu_has_pmp cpu_opt(LOONGARCH_CPU_PMP) +#define cpu_has_perf cpu_opt(LOONGARCH_CPU_PMP) +#define cpu_has_scalefreq cpu_opt(LOONGARCH_CPU_SCALEFREQ) +#define cpu_has_flatmode cpu_opt(LOONGARCH_CPU_FLATMODE) +#define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) +#define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) +#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) + + +#endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h new file mode 100644 index 000000000000..96ba457a085f --- /dev/null +++ b/arch/loongarch/include/asm/cpu-info.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CPU_INFO_H +#define __ASM_CPU_INFO_H + +#include +#include + +#include + +/* + * Descriptor for a cache + */ +struct cache_desc { + unsigned int waysize; /* Bytes per way */ + unsigned short sets; /* Number of lines per set */ + unsigned char ways; /* Number of ways */ + unsigned char linesz; /* Size of line in bytes */ + unsigned char waybit; /* Bits to select in a cache set */ + unsigned char flags; /* Flags describing cache properties */ +}; + +struct cpuinfo_loongarch { + u64 asid_cache; + unsigned long asid_mask; + + /* + * Capability and feature descriptor structure for LoongArch CPU + */ + unsigned long ases; + unsigned long long options; + unsigned int processor_id; + unsigned int fpu_vers; + unsigned int fpu_csr0; + unsigned int fpu_mask; + unsigned int cputype; + int isa_level; + int tlbsize; + int tlbsizemtlb; + int tlbsizestlbsets; + int tlbsizestlbways; + struct cache_desc icache; /* Primary I-cache */ + struct cache_desc dcache; /* Primary D or combined I/D cache */ + struct cache_desc vcache; /* Victim cache, between pcache and scache */ + struct cache_desc scache; /* Secondary cache */ + struct cache_desc tcache; /* Tertiary/split secondary cache */ + int package;/* physical package number */ + unsigned int globalnumber; + int vabits; /* Virtual Address size in bits */ + int pabits; /* Physical Address size in bits */ + void *data; /* Additional data */ + unsigned int watch_dreg_count; /* Number data breakpoints */ + unsigned int watch_ireg_count; /* Number instruction breakpoints */ + unsigned int watch_reg_use_cnt; /* min(NUM_WATCH_REGS, watch_dreg_count + watch_ireg_count), Usable by ptrace */ + unsigned int kscratch_mask; /* Usable KScratch mask. */ +} __attribute__((aligned(SMP_CACHE_BYTES))); + +extern struct cpuinfo_loongarch cpu_data[]; +#define boot_cpu_data cpu_data[0] +#define current_cpu_data cpu_data[smp_processor_id()] +#define raw_current_cpu_data cpu_data[raw_smp_processor_id()] + +extern void cpu_probe(void); + +extern const char *__cpu_family[]; +extern const char *__cpu_full_name[]; +#define cpu_family_string() __cpu_family[raw_smp_processor_id()] +#define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()] + +struct seq_file; +struct notifier_block; + +extern int register_proc_cpuinfo_notifier(struct notifier_block *nb); +extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v); + +#define proc_cpuinfo_notifier(fn, pri) \ +({ \ + static struct notifier_block fn##_nb = { \ + .notifier_call = fn, \ + .priority = pri \ + }; \ + \ + register_proc_cpuinfo_notifier(&fn##_nb); \ +}) + +struct proc_cpuinfo_notifier_args { + struct seq_file *m; + unsigned long n; +}; + +static inline unsigned int cpu_cluster(struct cpuinfo_loongarch *cpuinfo) +{ + return (cpuinfo->globalnumber & LOONGARCH_GLOBALNUMBER_CLUSTER) >> + LOONGARCH_GLOBALNUMBER_CLUSTER_SHF; +} + +static inline unsigned int cpu_core(struct cpuinfo_loongarch *cpuinfo) +{ + return (cpuinfo->globalnumber & LOONGARCH_GLOBALNUMBER_CORE) >> + LOONGARCH_GLOBALNUMBER_CORE_SHF; +} + +extern void cpu_set_cluster(struct cpuinfo_loongarch *cpuinfo, unsigned int cluster); +extern void cpu_set_core(struct cpuinfo_loongarch *cpuinfo, unsigned int core); + +static inline bool cpus_are_siblings(int cpua, int cpub) +{ + struct cpuinfo_loongarch *infoa = &cpu_data[cpua]; + struct cpuinfo_loongarch *infob = &cpu_data[cpub]; + unsigned int gnuma, gnumb; + + if (infoa->package != infob->package) + return false; + + gnuma = infoa->globalnumber & ~LOONGARCH_GLOBALNUMBER_VP; + gnumb = infob->globalnumber & ~LOONGARCH_GLOBALNUMBER_VP; + if (gnuma != gnumb) + return false; + + return true; +} + +static inline unsigned long cpu_asid_mask(struct cpuinfo_loongarch *cpuinfo) +{ + return cpuinfo->asid_mask; +} + +static inline void set_cpu_asid_mask(struct cpuinfo_loongarch *cpuinfo, + unsigned long asid_mask) +{ + cpuinfo->asid_mask = asid_mask; +} + +#endif /* __ASM_CPU_INFO_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h new file mode 100644 index 000000000000..48e14283d834 --- /dev/null +++ b/arch/loongarch/include/asm/cpu.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cpu.h: Values of the PRId register used to match up + * various LoongArch cpu types. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CPU_H +#define _ASM_CPU_H + +/* + As of the LoongArch specs from Loongson Technology, the PRId register + (CPUCFG.00) is defined in this (backwards compatible) way: + + +----------------+----------------+----------------+----------------+ + | Reserved | Company ID | Processor ID | Revision | + +----------------+----------------+----------------+----------------+ + 31 24 23 16 15 8 7 0 + +*/ + +/* + * Assigned Company values for bits 23:16 of the PRId register. + */ + +#define PRID_COMP_MASK 0xff0000 + +#define PRID_COMP_LOONGSON 0x140000 + +/* + * Assigned Processor ID (implementation) values for bits 15:8 of the PRId + * register. In order to detect a certain CPU type exactly eventually + * additional registers may need to be examined. + */ + +#define PRID_IMP_MASK 0xff00 + +#define PRID_IMP_LOONGSON_32 0x4200 /* Loongson 32bit */ +#define PRID_IMP_LOONGSON_64R 0x6100 /* Reduced Loongson 64bit */ +#define PRID_IMP_LOONGSON_64C 0x6300 /* Classic Loongson 64bit */ +#define PRID_IMP_LOONGSON_64G 0xc000 /* Generic Loongson 64bit */ +#define PRID_IMP_UNKNOWN 0xff00 + +/* + * Particular Revision values for bits 7:0 of the PRId register. + */ + +#define PRID_REV_MASK 0x00ff + +#if !defined(__ASSEMBLY__) + +enum cpu_type_enum { + CPU_UNKNOWN, + CPU_LOONGSON32, + CPU_LOONGSON64, + CPU_LAST +}; + +#endif /* !__ASSEMBLY */ + +/* + * ISA Level encodings + * + */ + +#define LOONGARCH_CPU_ISA_LA32R 0x00000001 +#define LOONGARCH_CPU_ISA_LA32S 0x00000002 +#define LOONGARCH_CPU_ISA_LA64 0x00000004 + +#define LOONGARCH_CPU_ISA_32BIT (LOONGARCH_CPU_ISA_LA32R | LOONGARCH_CPU_ISA_LA32S) +#define LOONGARCH_CPU_ISA_64BIT LOONGARCH_CPU_ISA_LA64 + +/* + * CPU Option encodings + */ +#define CPU_FEATURE_CPUCFG 0 /* CPU has CPUCFG */ +#define CPU_FEATURE_LAM 1 /* CPU has Atomic instructions */ +#define CPU_FEATURE_UAL 2 /* CPU has Unaligned Access support */ +#define CPU_FEATURE_FPU 3 /* CPU has FPU */ +#define CPU_FEATURE_LSX 4 /* CPU has 128bit SIMD instructions */ +#define CPU_FEATURE_LASX 5 /* CPU has 256bit SIMD instructions */ +#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 12 /* CPU has TLB */ +#define CPU_FEATURE_CSR 13 /* CPU has CSR feature */ +#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 20 /* CPU support scale cpufreq */ +#define CPU_FEATURE_FLATMODE 21 /* CPU has flatmode */ +#define CPU_FEATURE_EIODECODE 22 /* CPU has extioi int pin decode mode */ +#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (run in VM) */ + +#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) +#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) +#define LOONGARCH_CPU_UAL BIT_ULL(CPU_FEATURE_UAL) +#define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) +#define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX) +#define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX) +#define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX) +#define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO) +#define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ) +#define LOONGARCH_CPU_LBT_X86 BIT_ULL(CPU_FEATURE_LBT_X86) +#define LOONGARCH_CPU_LBT_ARM BIT_ULL(CPU_FEATURE_LBT_ARM) +#define LOONGARCH_CPU_LBT_MIPS BIT_ULL(CPU_FEATURE_LBT_MIPS) +#define LOONGARCH_CPU_TLB BIT_ULL(CPU_FEATURE_TLB) +#define LOONGARCH_CPU_CSR BIT_ULL(CPU_FEATURE_CSR) +#define LOONGARCH_CPU_WATCH BIT_ULL(CPU_FEATURE_WATCH) +#define LOONGARCH_CPU_VINT BIT_ULL(CPU_FEATURE_VINT) +#define LOONGARCH_CPU_CSRIPI BIT_ULL(CPU_FEATURE_CSRIPI) +#define LOONGARCH_CPU_EXTIOI BIT_ULL(CPU_FEATURE_EXTIOI) +#define LOONGARCH_CPU_PREFETCH BIT_ULL(CPU_FEATURE_PREFETCH) +#define LOONGARCH_CPU_PMP BIT_ULL(CPU_FEATURE_PMP) +#define LOONGARCH_CPU_SCALEFREQ BIT_ULL(CPU_FEATURE_SCALEFREQ) +#define LOONGARCH_CPU_FLATMODE BIT_ULL(CPU_FEATURE_FLATMODE) +#define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) +#define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) +#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/cpufeature.h b/arch/loongarch/include/asm/cpufeature.h new file mode 100644 index 000000000000..8cf9cc9cd072 --- /dev/null +++ b/arch/loongarch/include/asm/cpufeature.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CPU feature definitions for module loading, used by + * module_cpu_feature_match(), see uapi/asm/hwcap.h for LoongArch CPU features. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include +#include + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) + +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif /* __ASM_CPUFEATURE_H */ diff --git a/arch/loongarch/include/asm/delay.h b/arch/loongarch/include/asm/delay.h new file mode 100644 index 000000000000..1a287a615224 --- /dev/null +++ b/arch/loongarch/include/asm/delay.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 by Waldorf Electronics + * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_DELAY_H +#define _ASM_DELAY_H + +#include + +extern void __delay(unsigned long loops); +extern void __ndelay(unsigned long ns); +extern void __udelay(unsigned long us); + +#define ndelay(ns) __ndelay(ns) +#define udelay(us) __udelay(us) + +/* make sure "usecs *= ..." in udelay do not overflow. */ +#if HZ >= 1000 +#define MAX_UDELAY_MS 1 +#elif HZ <= 200 +#define MAX_UDELAY_MS 5 +#else +#define MAX_UDELAY_MS (1000 / HZ) +#endif + +#endif /* _ASM_DELAY_H */ diff --git a/arch/loongarch/include/asm/dma-direct.h b/arch/loongarch/include/asm/dma-direct.h new file mode 100644 index 000000000000..7056788e9c1e --- /dev/null +++ b/arch/loongarch/include/asm/dma-direct.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#ifndef _LOONGARCH_DMA_DIRECT_H +#define _LOONGARCH_DMA_DIRECT_H + +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); + +#endif /* _LOONGARCH_DMA_DIRECT_H */ diff --git a/arch/loongarch/include/asm/dma.h b/arch/loongarch/include/asm/dma.h new file mode 100644 index 000000000000..7e5b3e7d462b --- /dev/null +++ b/arch/loongarch/include/asm/dma.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + */ +#ifndef __ASM_DMA_H +#define __ASM_DMA_H + +#define MAX_DMA_ADDRESS PAGE_OFFSET +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) + +extern int isa_dma_bridge_buggy; + +#endif diff --git a/arch/loongarch/include/asm/dmi.h b/arch/loongarch/include/asm/dmi.h new file mode 100644 index 000000000000..61ed0b2b0f2d --- /dev/null +++ b/arch/loongarch/include/asm/dmi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_DMI_H +#define _ASM_DMI_H + +#include +#include + +#define dmi_early_remap(x, l) dmi_remap(x, l) +#define dmi_early_unmap(x, l) dmi_unmap(x) +#define dmi_alloc(l) memblock_alloc(l, PAGE_SIZE) + +static inline void *dmi_remap(u64 phys_addr, unsigned long size) +{ + return ((void *)TO_CAC(phys_addr)); +} + +static inline void dmi_unmap(void *addr) +{ +} + +#endif /* _ASM_DMI_H */ diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h new file mode 100644 index 000000000000..330fbafe9ddc --- /dev/null +++ b/arch/loongarch/include/asm/efi.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_LOONGARCH_EFI_H +#define _ASM_LOONGARCH_EFI_H + +#include + +extern void __init efi_init(void); +extern void __init efi_runtime_init(void); + +#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000001 /*bit0: CP0 Status.IE*/ + +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ +} + +#define arch_efi_call_virt_setup() \ +({ \ +}) + +#define arch_efi_call_virt(p, f, args...) \ +({ \ + efi_##f##_t * __f; \ + __f = p->f; \ + __f(args); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ +}) + +#define EFI_ALLOC_ALIGN SZ_16K + +#endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h new file mode 100644 index 000000000000..0d2de3adcf58 --- /dev/null +++ b/arch/loongarch/include/asm/elf.h @@ -0,0 +1,302 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Much of this is taken from binutils and GNU libc ... + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ELF_H +#define _ASM_ELF_H + +#include +#include +#include + +#include + +#include +#include + +/* ELF header e_flags defines. */ + +/* The ABI of a file. */ +#define EF_LARCH_ABI_LP32 0x00000001 /* LP32 ABI. */ +#define EF_LARCH_ABI_LP64 0x00000003 /* LP64 ABI */ +#define EF_LARCH_ABI 0x00000003 + +/* LoongArch relocation types used by the dynamic linker */ +#define R_LARCH_NONE 0 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_RELATIVE 3 +#define R_LARCH_COPY 4 +#define R_LARCH_JUMP_SLOT 5 +#define R_LARCH_TLS_DTPMOD32 6 +#define R_LARCH_TLS_DTPMOD64 7 +#define R_LARCH_TLS_DTPREL32 8 +#define R_LARCH_TLS_DTPREL64 9 +#define R_LARCH_TLS_TPREL32 10 +#define R_LARCH_TLS_TPREL64 11 +#define R_LARCH_IRELATIVE 12 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_MARK_PCREL 21 +#define R_LARCH_SOP_PUSH_PCREL 22 +#define R_LARCH_SOP_PUSH_ABSOLUTE 23 +#define R_LARCH_SOP_PUSH_DUP 24 +#define R_LARCH_SOP_PUSH_GPREL 25 +#define R_LARCH_SOP_PUSH_TLS_TPREL 26 +#define R_LARCH_SOP_PUSH_TLS_GOT 27 +#define R_LARCH_SOP_PUSH_TLS_GD 28 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#define R_LARCH_SOP_ASSERT 30 +#define R_LARCH_SOP_NOT 31 +#define R_LARCH_SOP_SUB 32 +#define R_LARCH_SOP_SL 33 +#define R_LARCH_SOP_SR 34 +#define R_LARCH_SOP_ADD 35 +#define R_LARCH_SOP_AND 36 +#define R_LARCH_SOP_IF_ELSE 37 +#define R_LARCH_SOP_POP_32_S_10_5 38 +#define R_LARCH_SOP_POP_32_U_10_12 39 +#define R_LARCH_SOP_POP_32_S_10_12 40 +#define R_LARCH_SOP_POP_32_S_10_16 41 +#define R_LARCH_SOP_POP_32_S_10_16_S2 42 +#define R_LARCH_SOP_POP_32_S_5_20 43 +#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 +#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 +#define R_LARCH_SOP_POP_32_U 46 +#define R_LARCH_ADD8 47 +#define R_LARCH_ADD16 48 +#define R_LARCH_ADD24 49 +#define R_LARCH_ADD32 50 +#define R_LARCH_ADD64 51 +#define R_LARCH_SUB8 52 +#define R_LARCH_SUB16 53 +#define R_LARCH_SUB24 54 +#define R_LARCH_SUB32 55 +#define R_LARCH_SUB64 56 +#define R_LARCH_GNU_VTINHERIT 57 +#define R_LARCH_GNU_VTENTRY 58 + +#ifndef ELF_ARCH + +/* ELF register definitions */ + +/* + * General purpose have the following registers: + * Register Number + * GPRs 32 + * ORIG_A0 1 + * ERA 1 + * BADVADDR 1 + * CRMD 1 + * PRMD 1 + * EUEN 1 + * ECFG 1 + * ESTAT 1 + * Reserved 5 + */ +#define ELF_NGREG 45 + +/* + * Floating point have the following registers: + * Register Number + * FPR 32 + * FCC 1 + * FCSR 1 + */ +#define ELF_NFPREG 34 + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs); + +#ifdef CONFIG_32BIT +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch elf32_check_arch + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 + +#define ELF_CORE_COPY_REGS(dest, regs) \ + loongarch_dump_regs32((u32 *)&(dest), (regs)); + +#endif /* CONFIG_32BIT */ + +#ifdef CONFIG_64BIT +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch elf64_check_arch + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 + +#define ELF_CORE_COPY_REGS(dest, regs) \ + loongarch_dump_regs64((u64 *)&(dest), (regs)); + +#endif /* CONFIG_64BIT */ + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_LOONGARCH + +#endif /* !defined(ELF_ARCH) */ + +#define loongarch_elf_check_machine(x) ((x)->e_machine == EM_LOONGARCH) + +#define vmcore_elf32_check_arch loongarch_elf_check_machine +#define vmcore_elf64_check_arch loongarch_elf_check_machine + +/* + * Return non-zero if HDR identifies an 32bit ELF binary. + */ +#define elf32_check_arch(hdr) \ +({ \ + int __res = 1; \ + struct elfhdr *__h = (hdr); \ + \ + if (!loongarch_elf_check_machine(__h)) \ + __res = 0; \ + if (__h->e_ident[EI_CLASS] != ELFCLASS32) \ + __res = 0; \ + \ + __res; \ +}) + +/* + * Return non-zero if HDR identifies an 64bit ELF binary. + */ +#define elf64_check_arch(hdr) \ +({ \ + int __res = 1; \ + struct elfhdr *__h = (hdr); \ + \ + if (!loongarch_elf_check_machine(__h)) \ + __res = 0; \ + if (__h->e_ident[EI_CLASS] != ELFCLASS64) \ + __res = 0; \ + \ + __res; \ +}) + +#ifdef CONFIG_32BIT + +#define SET_PERSONALITY2(ex, state) \ +do { \ + current->thread.vdso = &vdso_info; \ + \ + loongarch_set_personality_fcsr(state); \ + \ + if (personality(current->personality) != PER_LINUX) \ + set_personality(PER_LINUX); \ +} while (0) + +#endif /* CONFIG_32BIT */ + +#ifdef CONFIG_64BIT + +#define SET_PERSONALITY2(ex, state) \ +do { \ + unsigned int p; \ + \ + clear_thread_flag(TIF_32BIT_REGS); \ + clear_thread_flag(TIF_32BIT_ADDR); \ + \ + current->thread.vdso = &vdso_info; \ + loongarch_set_personality_fcsr(state); \ + \ + p = personality(current->personality); \ + if (p != PER_LINUX32 && p != PER_LINUX) \ + set_personality(PER_LINUX); \ +} while (0) + +#endif /* CONFIG_64BIT */ + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. This could be done in userspace, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (elf_hwcap) +extern unsigned int elf_hwcap; +#include + +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ + +#define ELF_PLATFORM __elf_platform +extern const char *__elf_platform; + +#define ELF_PLAT_INIT(_r, load_addr) do { \ + _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ + _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ + _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ + _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ + _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ + _r->regs[25] = _r->regs[26] = _r->regs[27] = _r->regs[28] = 0; \ + _r->regs[29] = _r->regs[30] = _r->regs[31] = 0; \ +} while (0) + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +struct arch_elf_state { + int fp_abi; + int interp_fp_abi; +}; + +#define LOONGARCH_ABI_FP_ANY (0) + +#define INIT_ARCH_ELF_STATE { \ + .fp_abi = LOONGARCH_ABI_FP_ANY, \ + .interp_fp_abi = LOONGARCH_ABI_FP_ANY, \ +} + +#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT) + +extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf, + bool is_interp, struct arch_elf_state *state); + +extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr, + struct arch_elf_state *state); + +extern void loongarch_set_personality_fcsr(struct arch_elf_state *state); + +#endif /* _ASM_ELF_H */ diff --git a/arch/loongarch/include/asm/entry-common.h b/arch/loongarch/include/asm/entry-common.h new file mode 100644 index 000000000000..0fe2a098ded9 --- /dev/null +++ b/arch/loongarch/include/asm/entry-common.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_LOONGARCH_ENTRY_COMMON_H +#define ARCH_LOONGARCH_ENTRY_COMMON_H + +#include +#include + +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); +} + +#endif diff --git a/arch/loongarch/include/asm/exec.h b/arch/loongarch/include/asm/exec.h new file mode 100644 index 000000000000..371d5fb327a8 --- /dev/null +++ b/arch/loongarch/include/asm/exec.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle + * Copyright (C) 1996 by Paul M. Antoine + * Copyright (C) 1999 Silicon Graphics + * Kevin D. Kissell, kevink@mips.org and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_EXEC_H +#define _ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* _ASM_EXEC_H */ diff --git a/arch/loongarch/include/asm/fb.h b/arch/loongarch/include/asm/fb.h new file mode 100644 index 000000000000..5cd96907d913 --- /dev/null +++ b/arch/loongarch/include/asm/fb.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is released under the GPLv2 + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include +#include +#include + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h new file mode 100644 index 000000000000..cd810ee0d251 --- /dev/null +++ b/arch/loongarch/include/asm/fixmap.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include + +#define NR_FIX_BTMAPS 64 + +#endif diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h new file mode 100644 index 000000000000..97ad5f458613 --- /dev/null +++ b/arch/loongarch/include/asm/fpregdef.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definitions for the FPU register names + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FPREGDEF_H +#define _ASM_FPREGDEF_H + +#define fv0 $f0 /* return value */ +#define fv1 $f2 +#define fa0 $f12 /* argument registers */ +#define fa1 $f13 +#define fa2 $f14 +#define fa3 $f15 +#define fa4 $f16 +#define fa5 $f17 +#define fa6 $f18 +#define fa7 $f19 +#define ft0 $f4 /* caller saved */ +#define ft1 $f5 +#define ft2 $f6 +#define ft3 $f7 +#define ft4 $f8 +#define ft5 $f9 +#define ft6 $f10 +#define ft7 $f11 +#define ft8 $f20 +#define ft9 $f21 +#define ft10 $f22 +#define ft11 $f23 +#define ft12 $f1 +#define ft13 $f3 +#define fs0 $f24 /* callee saved */ +#define fs1 $f25 +#define fs2 $f26 +#define fs3 $f27 +#define fs4 $f28 +#define fs5 $f29 +#define fs6 $f30 +#define fs7 $f31 + +#define fcsr0 $r0 +#define fcsr1 $r1 +#define fcsr2 $r2 +#define fcsr3 $r3 +#define vcsr16 $r16 + +#endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h new file mode 100644 index 000000000000..7c71fc56602f --- /dev/null +++ b/arch/loongarch/include/asm/fpu.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FPU_H +#define _ASM_FPU_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct sigcontext; + +extern void _init_fpu(unsigned int); +extern void _save_fp(struct loongarch_fpu *); +extern void _restore_fp(struct loongarch_fpu *); + +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcsr_x(unsigned long fcsr) +{ + return fcsr & ((fcsr & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E))); +} + +static inline int is_fp_enabled(void) +{ + return (csr_readl(LOONGARCH_CSR_EUEN) & CSR_EUEN_FPEN) ? + 1 : 0; +} + +#define enable_fpu() \ +do { \ + set_csr_euen(CSR_EUEN_FPEN); \ +} while (0) + +#define disable_fpu() \ +do { \ + clear_csr_euen(CSR_EUEN_FPEN); \ +} while (0) + +#define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU) + +static inline int is_fpu_owner(void) +{ + return test_thread_flag(TIF_USEDFPU); +} + +static inline void __own_fpu(void) +{ + enable_fpu(); + set_thread_flag(TIF_USEDFPU); + KSTK_EUEN(current) |= CSR_EUEN_FPEN; +} + +static inline void own_fpu_inatomic(int restore) +{ + if (cpu_has_fpu && !is_fpu_owner()) { + __own_fpu(); + if (restore) + _restore_fp(¤t->thread.fpu); + } +} + +static inline void own_fpu(int restore) +{ + preempt_disable(); + own_fpu_inatomic(restore); + preempt_enable(); +} + +static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) +{ + if (is_fpu_owner()) { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + clear_tsk_thread_flag(tsk, TIF_USEDFPU); + } + KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); +} + +static inline void lose_fpu(int save) +{ + preempt_disable(); + lose_fpu_inatomic(save, current); + preempt_enable(); +} + +static inline void init_fpu(void) +{ + unsigned int fcsr = current->thread.fpu.fcsr; + + __own_fpu(); + _init_fpu(fcsr); + set_used_math(); +} + +static inline void save_fp(struct task_struct *tsk) +{ + if (cpu_has_fpu) + _save_fp(&tsk->thread.fpu); +} + +static inline void restore_fp(struct task_struct *tsk) +{ + if (cpu_has_fpu) + _restore_fp(&tsk->thread.fpu); +} + +static inline union fpureg *get_fpu_regs(struct task_struct *tsk) +{ + if (tsk == current) { + preempt_disable(); + if (is_fpu_owner()) + _save_fp(¤t->thread.fpu); + preempt_enable(); + } + + return tsk->thread.fpu.fpr; +} + +#endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h new file mode 100644 index 000000000000..fa0ccf9ea024 --- /dev/null +++ b/arch/loongarch/include/asm/futex.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#include +#include +#include +#include +#include + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +{ \ + __asm__ __volatile__( \ + "1: ll.w %1, %4 # __futex_atomic_op\n" \ + " " insn " \n" \ + "2: sc.w $t0, %2 \n" \ + " beq $t0, $zero, 1b \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %6 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " "__UA_ADDR "\t1b, 4b \n" \ + " "__UA_ADDR "\t2b, 4b \n" \ + " .previous \n" \ + : "=r" (ret), "=&r" (oldval), \ + "=ZC" (*uaddr) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ + "i" (-EFAULT) \ + : "memory", "t0"); \ +} + +static inline int +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +{ + int oldval = 0, ret = 0; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("move $t0, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("add.w $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("or $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("and $t0, %1, %z5", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("xor $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) + *oval = oldval; + + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) +{ + int ret = 0; + u32 val = 0; + + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__( + "# futex_atomic_cmpxchg_inatomic \n" + "1: ll.w %1, %3 \n" + " bne %1, %z4, 3f \n" + " or $t0, %z5, $zero \n" + "2: sc.w $t0, %2 \n" + " beq $zero, $t0, 1b \n" + "3: \n" + " .section .fixup,\"ax\" \n" + "4: li.d %0, %6 \n" + " b 3b \n" + " .previous \n" + " .section __ex_table,\"a\" \n" + " "__UA_ADDR "\t1b, 4b \n" + " "__UA_ADDR "\t2b, 4b \n" + " .previous \n" + : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr) + : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), + "i" (-EFAULT) + : "memory", "t0"); + + *uval = val; + + return ret; +} + +#endif /* _ASM_FUTEX_H */ diff --git a/arch/loongarch/include/asm/fw.h b/arch/loongarch/include/asm/fw.h new file mode 100644 index 000000000000..87b01b0fc76b --- /dev/null +++ b/arch/loongarch/include/asm/fw.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef __ASM_FW_H_ +#define __ASM_FW_H_ + +#include + +extern int fw_argc; +extern long *_fw_argv, *_fw_envp; + +#define fw_argv(index) ((char *)TO_CAC((long)_fw_argv[(index)])) +#define fw_envp(index) ((char *)TO_CAC((long)_fw_envp[(index)])) + +extern void fw_init_cmdline(void); + +#endif /* __ASM_FW_H_ */ diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h new file mode 100644 index 000000000000..5effe88fce3e --- /dev/null +++ b/arch/loongarch/include/asm/hardirq.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_HARDIRQ_H +#define _ASM_HARDIRQ_H + +#include +#include +#include + +extern void ack_bad_irq(unsigned int irq); +#define ack_bad_irq ack_bad_irq + +#define NR_IPI 2 + +typedef struct { + unsigned int ipi_irqs[NR_IPI]; + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +#endif /* _ASM_HARDIRQ_H */ diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h new file mode 100644 index 000000000000..8a25f471a17c --- /dev/null +++ b/arch/loongarch/include/asm/hugetlb.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include + +uint64_t pmd_to_entrylo(unsigned long pmd_val); + +#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, + unsigned long len) +{ + unsigned long task_size = STACK_TOP; + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + if (task_size - len < addr) + return -EINVAL; + return 0; +} + +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t clear; + pte_t pte = *ptep; + + pte_val(clear) = (unsigned long)invalid_pte_table; + set_pte_at(mm, addr, ptep, clear); + return pte; +} + +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + flush_tlb_page(vma, addr & huge_page_mask(hstate_vma(vma))); +} + +#define __HAVE_ARCH_HUGE_PTE_NONE +static inline int huge_pte_none(pte_t pte) +{ + unsigned long val = pte_val(pte) & ~_PAGE_GLOBAL; + return !val || (val == (unsigned long)invalid_pte_table); +} + +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep, pte_t pte, + int dirty) +{ + int changed = !pte_same(*ptep, pte); + + if (changed) { + set_pte_at(vma->vm_mm, addr, ptep, pte); + /* + * There could be some standard sized pages in there, + * get them all. + */ + flush_tlb_range(vma, addr, addr + HPAGE_SIZE); + } + return changed; +} + +#include + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/loongarch/include/asm/hw_irq.h b/arch/loongarch/include/asm/hw_irq.h new file mode 100644 index 000000000000..1633972591a0 --- /dev/null +++ b/arch/loongarch/include/asm/hw_irq.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000, 2001, 2002 by Ralf Baechle + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_HW_IRQ_H +#define __ASM_HW_IRQ_H + +#include + +extern atomic_t irq_err_count; + +/* + * interrupt-retrigger: NOP for now. This may not be appropriate for all + * machines, we'll see ... + */ + +#endif /* __ASM_HW_IRQ_H */ diff --git a/arch/loongarch/include/asm/idle.h b/arch/loongarch/include/asm/idle.h new file mode 100644 index 000000000000..f7f2b7dbf958 --- /dev/null +++ b/arch/loongarch/include/asm/idle.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_IDLE_H +#define __ASM_IDLE_H + +#include + +extern asmlinkage void __arch_cpu_idle(void); + +#endif /* __ASM_IDLE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h new file mode 100644 index 000000000000..d027c4ab1531 --- /dev/null +++ b/arch/loongarch/include/asm/inst.h @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Format of an instruction in memory. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_INST_H +#define _ASM_INST_H + +#include +#include + +/* HACHACHAHCAHC ... */ + +/* In case some other massaging is needed, keep LOONGARCHInst as wrapper */ + +#define LOONGARCHInst(x) x + +#define I_OPCODE_SFT 26 +#define LOONGARCHInst_OPCODE(x) (LOONGARCHInst(x) >> I_OPCODE_SFT) + +#define I_JTARGET_SFT 0 +#define LOONGARCHInst_JTARGET(x) (LOONGARCHInst(x) & 0x03ffffff) + +#define I_RS_SFT 21 +#define LOONGARCHInst_RS(x) ((LOONGARCHInst(x) & 0x03e00000) >> I_RS_SFT) + +#define I_RT_SFT 16 +#define LOONGARCHInst_RT(x) ((LOONGARCHInst(x) & 0x001f0000) >> I_RT_SFT) + +#define I_IMM_SFT 0 +#define LOONGARCHInst_SIMM(x) ((int)((short)(LOONGARCHInst(x) & 0xffff))) +#define LOONGARCHInst_UIMM(x) (LOONGARCHInst(x) & 0xffff) + +#define I_CACHEOP_SFT 18 +#define LOONGARCHInst_CACHEOP(x) ((LOONGARCHInst(x) & 0x001c0000) >> I_CACHEOP_SFT) + +#define I_CACHESEL_SFT 16 +#define LOONGARCHInst_CACHESEL(x) ((LOONGARCHInst(x) & 0x00030000) >> I_CACHESEL_SFT) + +#define I_RD_SFT 11 +#define LOONGARCHInst_RD(x) ((LOONGARCHInst(x) & 0x0000f800) >> I_RD_SFT) + +#define I_RE_SFT 6 +#define LOONGARCHInst_RE(x) ((LOONGARCHInst(x) & 0x000007c0) >> I_RE_SFT) + +#define I_FUNC_SFT 0 +#define LOONGARCHInst_FUNC(x) (LOONGARCHInst(x) & 0x0000003f) + +#define I_FFMT_SFT 21 +#define LOONGARCHInst_FFMT(x) ((LOONGARCHInst(x) & 0x01e00000) >> I_FFMT_SFT) + +#define I_FT_SFT 16 +#define LOONGARCHInst_FT(x) ((LOONGARCHInst(x) & 0x001f0000) >> I_FT_SFT) + +#define I_FS_SFT 11 +#define LOONGARCHInst_FS(x) ((LOONGARCHInst(x) & 0x0000f800) >> I_FS_SFT) + +#define I_FD_SFT 6 +#define LOONGARCHInst_FD(x) ((LOONGARCHInst(x) & 0x000007c0) >> I_FD_SFT) + +#define I_FR_SFT 21 +#define LOONGARCHInst_FR(x) ((LOONGARCHInst(x) & 0x03e00000) >> I_FR_SFT) + +#define I_FMA_FUNC_SFT 2 +#define LOONGARCHInst_FMA_FUNC(x) ((LOONGARCHInst(x) & 0x0000003c) >> I_FMA_FUNC_SFT) + +#define I_FMA_FFMT_SFT 0 +#define LOONGARCHInst_FMA_FFMT(x) (LOONGARCHInst(x) & 0x00000003) + +typedef unsigned int loongarch_instruction; + +/* Recode table from 16-bit register notation to 32-bit GPR. Do NOT export!!! */ +extern const int reg16to32[]; + +#define STR(x) __STR(x) +#define __STR(x) #x + +#define _LoadHW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tld.b\t%0, %2, 1\n" \ + "2:\tld.bu\t$r19, %2, 0\n\t" \ + "slli.w\t%0, %0, 0x8\n\t" \ + "or\t%0, %0, $r19\n\t" \ + "li.w\t%1, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%1, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT) \ + : "$r19"); \ +} while (0) + +#define _LoadW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl.w\t%0, %2, 3\n" \ + "2:\tldr.w\t%0, %2, 0\n\t" \ + "li.w\t%1, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%1, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadHWU(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tld.bu\t%0, %2, 1\n" \ + "2:\tld.bu\t$r19, %2, 0\n\t" \ + "slli.w\t%0, %0, 0x8\n\t" \ + "or\t%0, %0, $r19\n\t" \ + "li.w\t%1, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%1, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT) \ + : "$r19"); \ +} while (0) + +#define _LoadWU(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl.w\t%0, %2, 3\n" \ + "2:\tldr.w\t%0, %2, 0\n\t" \ + "slli.d\t%0, %0, 32\n\t" \ + "srli.d\t%0, %0, 32\n\t" \ + "li.w\t%1, 0\n" \ + "3:\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%1, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl.d\t%0, %2, 7\n" \ + "2:\tldr.d\t%0, %2, 0\n\t" \ + "li.w\t%1, 0\n" \ + "3:\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%1, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreHW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tst.b\t%1, %2, 0\n" \ + "srli.w\t$r19,%1, 0x8\n" \ + "2:\tst.b\t$r19, %2, 1\n" \ + "li.w\t%0, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%0, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "$r19"); \ +} while (0) + +#define _StoreW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tstl.w\t%1, %2, 3\n" \ + "2:\tstr.w\t%1, %2, 0\n\t" \ + "li.w\t%0, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%0, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tstl.d\t%1, %2, 7\n" \ + "2:\tstr.d\t%1, %2, 0\n\t" \ + "li.w\t%0, 0\n" \ + "3:\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli.w\t%0, %3\n\t" \ + "b\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res) +#define LoadWU(addr, value, res) _LoadWU(addr, value, res) +#define LoadHW(addr, value, res) _LoadHW(addr, value, res) +#define LoadW(addr, value, res) _LoadW(addr, value, res) +#define LoadDW(addr, value, res) _LoadDW(addr, value, res) + +#define StoreHW(addr, value, res) _StoreHW(addr, value, res) +#define StoreW(addr, value, res) _StoreW(addr, value, res) +#define StoreDW(addr, value, res) _StoreDW(addr, value, res) + +#endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h new file mode 100644 index 000000000000..a8a3b935c35b --- /dev/null +++ b/arch/loongarch/include/asm/io.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#define ARCH_HAS_IOREMAP_WC + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(16K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init early_iounmap(void __iomem *addr, unsigned long size); + +#define early_memremap early_ioremap +#define early_memunmap early_iounmap + +static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val) +{ + if (prot_val == _CACHE_CC) + return (void __iomem *)(unsigned long)(CAC_BASE + offset); + else + return (void __iomem *)(unsigned long)(UNCAC_BASE + offset); +} + +/* + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +#define ioremap(offset, size) \ + ioremap_prot((offset), (size), _CACHE_SUC) + +/* + * ioremap_wc - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_wc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * but accelerated by means of write-combining feature. It is specifically + * useful for PCIe prefetchable windows, which may vastly improve a + * communications performance. If it was determined on boot stage, what + * CPU CCA doesn't support WUC, the method shall fall-back to the + * _CACHE_SUC option (see cpu_probe() method). + */ +#define ioremap_wc(offset, size) \ + ioremap_prot((offset), (size), _CACHE_WUC) + +/* + * ioremap_cache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_cache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked cachable by + * the CPU. Also enables full write-combining. Useful for some + * memory-like regions on I/O busses. + */ +#define ioremap_cache(offset, size) \ + ioremap_prot((offset), (size), _CACHE_CC) + +static inline void iounmap(const volatile void __iomem *addr) +{ +} + +#define mmiowb() asm volatile ("dbar 0" ::: "memory") + +/* + * String version of I/O memory access operations. + */ +extern void __memset_io(volatile void __iomem *dst, int c, size_t count); +extern void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count); +extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count); +#define memset_io(c, v, l) __memset_io((c), (v), (l)) +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) + +#include + +#endif /* _ASM_IO_H */ diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h new file mode 100644 index 000000000000..c718aea9fe81 --- /dev/null +++ b/arch/loongarch/include/asm/irq.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +#include +#include +#include + +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START (IRQ_STACK_SIZE - 16) + +DECLARE_PER_CPU(unsigned long, irq_stack); + +/* + * The highest address on the IRQ stack contains a dummy frame put down in + * genex.S (except_vec_vi_handler) which is structured as follows: + * + * top ------------ + * | task sp | <- irq_stack[cpu] + IRQ_STACK_START + * ------------ + * | | <- First frame of IRQ context + * ------------ + * + * task sp holds a copy of the task stack pointer where the struct pt_regs + * from exception entry can be found. + */ + +static inline bool on_irq_stack(int cpu, unsigned long sp) +{ + unsigned long low = per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_SIZE; + + return (low <= sp && sp <= high); +} + +struct irq_data; +struct device_node; + +int get_ipi_irq(void); +int get_pmc_irq(void); +int get_timer_irq(void); +void arch_init_irq(void); +void spurious_interrupt(void); +struct irq_domain *loongarch_cpu_irq_init(void); + +#define NR_IRQS_LEGACY 16 + +bool arch_trigger_cpumask_backtrace(const struct cpumask *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + +#endif /* _ASM_IRQ_H */ diff --git a/arch/loongarch/include/asm/irq_regs.h b/arch/loongarch/include/asm/irq_regs.h new file mode 100644 index 000000000000..92e72213944e --- /dev/null +++ b/arch/loongarch/include/asm/irq_regs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_IRQ_REGS_H +#define __ASM_IRQ_REGS_H + +#define ARCH_HAS_OWN_IRQ_REGS + +#include + +static inline struct pt_regs *get_irq_regs(void) +{ + return current_thread_info()->regs; +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + current_thread_info()->regs = new_regs; + + return old_regs; +} + +#endif /* __ASM_IRQ_REGS_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h new file mode 100644 index 000000000000..8811bdf884e7 --- /dev/null +++ b/arch/loongarch/include/asm/irqflags.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +static inline void arch_local_irq_enable(void) +{ + u32 flags = CSR_CRMD_IE; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void arch_local_irq_disable(void) +{ + u32 flags = 0; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline unsigned long arch_local_irq_save(void) +{ + u32 flags = 0; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + return flags; +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline unsigned long arch_local_save_flags(void) +{ + u32 flags; + __asm__ __volatile__( + "csrrd %[val], %[reg]\n\t" + : [val] "=r" (flags) + : [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + return flags; +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & CSR_CRMD_IE); +} + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* #ifndef __ASSEMBLY__ */ + +#endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/kdebug.h b/arch/loongarch/include/asm/kdebug.h new file mode 100644 index 000000000000..64cdaead21a8 --- /dev/null +++ b/arch/loongarch/include/asm/kdebug.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LOONGARCH_KDEBUG_H +#define _ASM_LOONGARCH_KDEBUG_H + +#include + +enum die_val { + DIE_OOPS = 1, + DIE_RI, + DIE_FP, + DIE_SIMD, + DIE_TRAP, + DIE_PAGE_FAULT, + DIE_BREAK, + DIE_SSTEPBP, + DIE_UPROBE, + DIE_UPROBE_XOL, +}; + +#endif /* _ASM_LOONGARCH_KDEBUG_H */ diff --git a/arch/loongarch/include/asm/kmap_types.h b/arch/loongarch/include/asm/kmap_types.h new file mode 100644 index 000000000000..5c2173103727 --- /dev/null +++ b/arch/loongarch/include/asm/kmap_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +#include + +#endif diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h new file mode 100644 index 000000000000..283b3389b561 --- /dev/null +++ b/arch/loongarch/include/asm/linkage.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .align 2 +#define __ALIGN_STR ".align 2" + +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_END(name) \ + .cfi_endproc; \ + SYM_END(name, SYM_T_FUNC) + +#endif diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h new file mode 100644 index 000000000000..fa1585346c4b --- /dev/null +++ b/arch/loongarch/include/asm/local.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ARCH_LOONGARCH_LOCAL_H +#define _ARCH_LOONGARCH_LOCAL_H + +#include +#include +#include +#include +#include + +typedef struct { + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +#define local_add(i, l) atomic_long_add((i), (&(l)->a)) +#define local_sub(i, l) atomic_long_sub((i), (&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +/* + * Same as above, but return the result value + */ +static inline long local_add_return(long i, local_t *l) +{ + unsigned long result; + + __asm__ __volatile__( + " " __AMADD " %1, %2, %0 \n" + : "+ZB" (l->a.counter), "=&r" (result) + : "r" (i) + : "memory"); + result = result + i; + + return result; +} + +static inline long local_sub_return(long i, local_t *l) +{ + unsigned long result; + + __asm__ __volatile__( + " " __AMADD "%1, %2, %0 \n" + : "+ZB" (l->a.counter), "=&r" (result) + : "r" (-i) + : "memory"); + + result = result - i; + + return result; +} + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + while (c != (u) && (old = local_cmpxchg((l), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_dec_return(l) local_sub_return(1, (l)) +#define local_inc_return(l) local_add_return(1, (l)) + +/* + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer of type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +#define local_sub_and_test(i, l) (local_sub_return((i), (l)) == 0) + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +/* + * local_dec_and_test - decrement by 1 and test + * @l: pointer of type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) + +/* + * local_add_negative - add and test if negative + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#define local_add_negative(i, l) (local_add_return(i, (l)) < 0) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + */ + +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i, l) ((l)->a.counter += (i)) +#define __local_sub(i, l) ((l)->a.counter -= (i)) + +#endif /* _ARCH_LOONGARCH_LOCAL_H */ diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h new file mode 100644 index 000000000000..1985f5f854b2 --- /dev/null +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -0,0 +1,1579 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_LOONGARCHREGS_H +#define _ASM_LOONGARCHREGS_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ +#include + +/* + * parse_r var, r - Helper assembler macro for parsing register names. + * + * This converts the register name in $n form provided in \r to the + * corresponding register number, which is assigned to the variable \var. It is + * needed to allow explicit encoding of instructions in inline assembly where + * registers are chosen by the compiler in $n form, allowing us to avoid using + * fixed register numbers. + * + * It also allows newer instructions (not implemented by the assembler) to be + * transparently implemented using assembler macros, instead of needing separate + * cases depending on toolchain support. + * + * Simple usage example: + * __asm__ __volatile__("parse_r __rt, %0\n\t" + * "# di %0\n\t" + * ".word (0x41606000 | (__rt << 16))" + * : "=r" (status); + */ + +/* Match an individual register number and assign to \var */ +#define _IFC_REG(n) \ + ".ifc \\r, $r" #n "\n\t" \ + "\\var = " #n "\n\t" \ + ".endif\n\t" + +__asm__(".macro parse_r var r\n\t" + "\\var = -1\n\t" + _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) + _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) + _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) + _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) + _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) + _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) + _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) + _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) + ".iflt \\var\n\t" + ".error \"Unable to parse register name \\r\"\n\t" + ".endif\n\t" + ".endm"); + +#undef _IFC_REG + +/* CPUCFG */ +static inline u32 read_cpucfg(u32 reg) +{ + return __cpucfg(reg); +} + +#endif /* !__ASSEMBLY__ */ + +/* LoongArch Registers */ +#define REG_RA 0x1 +#define REG_TP 0x2 +#define REG_SP 0x3 +#define REG_A0 0x4 +#define REG_A1 0x5 +#define REG_A2 0x6 +#define REG_A3 0x7 +#define REG_A4 0x8 +#define REG_A5 0x9 +#define REG_A6 0xa +#define REG_A7 0xb +#define REG_V0 REG_A0 +#define REG_V1 REG_A1 +#define REG_T0 0xc +#define REG_T1 0xd +#define REG_T2 0xe +#define REG_T3 0xf +#define REG_T4 0x10 +#define REG_T5 0x11 +#define REG_T6 0x12 +#define REG_T7 0x13 +#define REG_T8 0x14 +#define REG_U0 0x15 +#define REG_FP 0x16 +#define REG_S0 0x17 +#define REG_S1 0x18 +#define REG_S2 0x19 +#define REG_S3 0x1a +#define REG_S4 0x1b +#define REG_S5 0x1c +#define REG_S6 0x1d +#define REG_S7 0x1e +#define REG_S8 0x1f + +/* Bit Domains for CPUCFG registers */ +#define LOONGARCH_CPUCFG0 0x0 +#define CPUCFG0_PRID GENMASK(31, 0) + +#define LOONGARCH_CPUCFG1 0x1 +#define CPUCFG1_ISGR32 BIT(0) +#define CPUCFG1_ISGR64 BIT(1) +#define CPUCFG1_PAGING BIT(2) +#define CPUCFG1_IOCSR BIT(3) +#define CPUCFG1_PABITS GENMASK(11, 4) +#define CPUCFG1_VABITS GENMASK(19, 12) +#define CPUCFG1_UAL BIT(20) +#define CPUCFG1_RI BIT(21) +#define CPUCFG1_EP BIT(22) +#define CPUCFG1_RPLV BIT(23) +#define CPUCFG1_HUGEPG BIT(24) +#define CPUCFG1_IOCSRBRD BIT(25) +#define CPUCFG1_MSGINT BIT(26) + +#define LOONGARCH_CPUCFG2 0x2 +#define CPUCFG2_FP BIT(0) +#define CPUCFG2_FPSP BIT(1) +#define CPUCFG2_FPDP BIT(2) +#define CPUCFG2_FPVERS GENMASK(5, 3) +#define CPUCFG2_LSX BIT(6) +#define CPUCFG2_LASX BIT(7) +#define CPUCFG2_COMPLEX BIT(8) +#define CPUCFG2_CRYPTO BIT(9) +#define CPUCFG2_LVZP BIT(10) +#define CPUCFG2_LVZVER GENMASK(13, 11) +#define CPUCFG2_LLFTP BIT(14) +#define CPUCFG2_LLFTPREV GENMASK(17, 15) +#define CPUCFG2_X86BT BIT(18) +#define CPUCFG2_ARMBT BIT(19) +#define CPUCFG2_MIPSBT BIT(20) +#define CPUCFG2_LSPW BIT(21) +#define CPUCFG2_LAM BIT(22) + +#define LOONGARCH_CPUCFG3 0x3 +#define CPUCFG3_CCDMA BIT(0) +#define CPUCFG3_SFB BIT(1) +#define CPUCFG3_UCACC BIT(2) +#define CPUCFG3_LLEXC BIT(3) +#define CPUCFG3_SCDLY BIT(4) +#define CPUCFG3_LLDBAR BIT(5) +#define CPUCFG3_ITLBT BIT(6) +#define CPUCFG3_ICACHET BIT(7) +#define CPUCFG3_SPW_LVL GENMASK(10, 8) +#define CPUCFG3_SPW_HG_HF BIT(11) +#define CPUCFG3_RVA BIT(12) +#define CPUCFG3_RVAMAX GENMASK(16, 13) + +#define LOONGARCH_CPUCFG4 0x4 +#define CPUCFG4_CCFREQ GENMASK(31, 0) + +#define LOONGARCH_CPUCFG5 0x5 +#define CPUCFG5_CCMUL GENMASK(15, 0) +#define CPUCFG5_CCDIV GENMASK(31, 16) + +#define LOONGARCH_CPUCFG6 0x6 +#define CPUCFG6_PMP BIT(0) +#define CPUCFG6_PAMVER GENMASK(3, 1) +#define CPUCFG6_PMNUM GENMASK(7, 4) +#define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_UPM BIT(14) + +#define LOONGARCH_CPUCFG16 0x10 +#define CPUCFG16_L1_IUPRE BIT(0) +#define CPUCFG16_L1_IUUNIFY BIT(1) +#define CPUCFG16_L1_DPRE BIT(2) +#define CPUCFG16_L2_IUPRE BIT(3) +#define CPUCFG16_L2_IUUNIFY BIT(4) +#define CPUCFG16_L2_IUPRIV BIT(5) +#define CPUCFG16_L2_IUINCL BIT(6) +#define CPUCFG16_L2_DPRE BIT(7) +#define CPUCFG16_L2_DPRIV BIT(8) +#define CPUCFG16_L2_DINCL BIT(9) +#define CPUCFG16_L3_IUPRE BIT(10) +#define CPUCFG16_L3_IUUNIFY BIT(11) +#define CPUCFG16_L3_IUPRIV BIT(12) +#define CPUCFG16_L3_IUINCL BIT(13) +#define CPUCFG16_L3_DPRE BIT(14) +#define CPUCFG16_L3_DPRIV BIT(15) +#define CPUCFG16_L3_DINCL BIT(16) + +#define LOONGARCH_CPUCFG17 0x11 +#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0) +#define CPUCFG17_L1I_SETS_M GENMASK(23, 16) +#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24) +#define CPUCFG17_L1I_WAYS 0 +#define CPUCFG17_L1I_SETS 16 +#define CPUCFG17_L1I_SIZE 24 + +#define LOONGARCH_CPUCFG18 0x12 +#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0) +#define CPUCFG18_L1D_SETS_M GENMASK(23, 16) +#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24) +#define CPUCFG18_L1D_WAYS 0 +#define CPUCFG18_L1D_SETS 16 +#define CPUCFG18_L1D_SIZE 24 + +#define LOONGARCH_CPUCFG19 0x13 +#define CPUCFG19_L2_WAYS_M GENMASK(15, 0) +#define CPUCFG19_L2_SETS_M GENMASK(23, 16) +#define CPUCFG19_L2_SIZE_M GENMASK(30, 24) +#define CPUCFG19_L2_WAYS 0 +#define CPUCFG19_L2_SETS 16 +#define CPUCFG19_L2_SIZE 24 + +#define LOONGARCH_CPUCFG20 0x14 +#define CPUCFG20_L3_WAYS_M GENMASK(15, 0) +#define CPUCFG20_L3_SETS_M GENMASK(23, 16) +#define CPUCFG20_L3_SIZE_M GENMASK(30, 24) +#define CPUCFG20_L3_WAYS 0 +#define CPUCFG20_L3_SETS 16 +#define CPUCFG20_L3_SIZE 24 + +#define LOONGARCH_CPUCFG48 0x30 +#define CPUCFG48_MCSR_LCK BIT(0) +#define CPUCFG48_NAP_EN BIT(1) +#define CPUCFG48_VFPU_CG BIT(2) +#define CPUCFG48_RAM_CG BIT(3) + +#ifndef __ASSEMBLY__ + +/* CSR */ +static inline u32 csr_readl(u32 reg) +{ + return __csrrd(reg); +} + +static inline u64 csr_readq(u32 reg) +{ + return __dcsrrd(reg); +} + +static inline void csr_writel(u32 val, u32 reg) +{ + __csrwr(val, reg); +} + +static inline void csr_writeq(u64 val, u32 reg) +{ + __dcsrwr(val, reg); +} + +static inline u32 csr_xchgl(u32 val, u32 mask, u32 reg) +{ + return __csrxchg(val, mask, reg); +} + +static inline u64 csr_xchgq(u64 val, u64 mask, u32 reg) +{ + return __dcsrxchg(val, mask, reg); +} + +/* IOCSR */ +static inline u32 iocsr_readl(u32 reg) +{ + return __iocsrrd_w(reg); +} + +static inline u64 iocsr_readq(u32 reg) +{ + return __iocsrrd_d(reg); +} + +static inline void iocsr_writel(u32 val, u32 reg) +{ + __iocsrwr_w(val, reg); +} + +static inline void iocsr_writeq(u64 val, u32 reg) +{ + __iocsrwr_d(val, reg); +} + +#endif /* !__ASSEMBLY__ */ + +/* CSR register number */ + +/* Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_WE_SHIFT 9 +#define CSR_CRMD_WE (_ULCAST_(0x1) << CSR_CRMD_WE_SHIFT) +#define CSR_CRMD_DACM_SHIFT 7 +#define CSR_CRMD_DACM_WIDTH 2 +#define CSR_CRMD_DACM (_ULCAST_(0x3) << CSR_CRMD_DACM_SHIFT) +#define CSR_CRMD_DACF_SHIFT 5 +#define CSR_CRMD_DACF_WIDTH 2 +#define CSR_CRMD_DACF (_ULCAST_(0x3) << CSR_CRMD_DACF_SHIFT) +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG (_ULCAST_(0x1) << CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_DA_SHIFT 3 +#define CSR_CRMD_DA (_ULCAST_(0x1) << CSR_CRMD_DA_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE (_ULCAST_(0x1) << CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (_ULCAST_(0x3) << CSR_CRMD_PLV_SHIFT) + +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 + +#define LOONGARCH_CSR_PRMD 0x1 /* Prev-exception mode info */ +#define CSR_PRMD_PWE_SHIFT 3 +#define CSR_PRMD_PWE (_ULCAST_(0x1) << CSR_PRMD_PWE_SHIFT) +#define CSR_PRMD_PIE_SHIFT 2 +#define CSR_PRMD_PIE (_ULCAST_(0x1) << CSR_PRMD_PIE_SHIFT) +#define CSR_PRMD_PPLV_SHIFT 0 +#define CSR_PRMD_PPLV_WIDTH 2 +#define CSR_PRMD_PPLV (_ULCAST_(0x3) << CSR_PRMD_PPLV_SHIFT) + +#define LOONGARCH_CSR_EUEN 0x2 /* Extended unit enable */ +#define CSR_EUEN_LBTEN_SHIFT 3 +#define CSR_EUEN_LBTEN (_ULCAST_(0x1) << CSR_EUEN_LBTEN_SHIFT) +#define CSR_EUEN_LASXEN_SHIFT 2 +#define CSR_EUEN_LASXEN (_ULCAST_(0x1) << CSR_EUEN_LASXEN_SHIFT) +#define CSR_EUEN_LSXEN_SHIFT 1 +#define CSR_EUEN_LSXEN (_ULCAST_(0x1) << CSR_EUEN_LSXEN_SHIFT) +#define CSR_EUEN_FPEN_SHIFT 0 +#define CSR_EUEN_FPEN (_ULCAST_(0x1) << CSR_EUEN_FPEN_SHIFT) + +#define LOONGARCH_CSR_MISC 0x3 /* Misc config */ + +#define LOONGARCH_CSR_ECFG 0x4 /* Exception config */ +#define CSR_ECFG_VS_SHIFT 16 +#define CSR_ECFG_VS_WIDTH 3 +#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT) +#define CSR_ECFG_IM_SHIFT 0 +#define CSR_ECFG_IM_WIDTH 13 +#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT) + +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_ESUBCODE_SHIFT 22 +#define CSR_ESTAT_ESUBCODE_WIDTH 9 +#define CSR_ESTAT_ESUBCODE (_ULCAST_(0x1ff) << CSR_ESTAT_ESUBCODE_SHIFT) +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) +#define CSR_ESTAT_IS_SHIFT 0 +#define CSR_ESTAT_IS_WIDTH 15 +#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) + +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ + +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ + +#define LOONGARCH_CSR_BADI 0x8 /* Bad instruction */ + +#define LOONGARCH_CSR_EENTRY 0xc /* Exception entry */ + +/* TLB related CSR registers */ +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize, NP */ +#define CSR_TLBIDX_EHINV_SHIFT 31 +#define CSR_TLBIDX_EHINV (_ULCAST_(1) << CSR_TLBIDX_EHINV_SHIFT) +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (_ULCAST_(0x3f) << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_IDX_SHIFT 0 +#define CSR_TLBIDX_IDX_WIDTH 12 +#define CSR_TLBIDX_IDX (_ULCAST_(0xfff) << CSR_TLBIDX_IDX_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define CSR_TLBIDX_IDXM 0xfff +#define CSR_INVALID_ENTRY(e) (CSR_TLBIDX_EHINV | e) + +#define LOONGARCH_CSR_TLBEHI 0x11 /* TLB EntryHi */ + +#define LOONGARCH_CSR_TLBELO0 0x12 /* TLB EntryLo0 */ +#define CSR_TLBLO0_RPLV_SHIFT 63 +#define CSR_TLBLO0_RPLV (_ULCAST_(0x1) << CSR_TLBLO0_RPLV_SHIFT) +#define CSR_TLBLO0_NX_SHIFT 62 +#define CSR_TLBLO0_NX (_ULCAST_(0x1) << CSR_TLBLO0_NX_SHIFT) +#define CSR_TLBLO0_NR_SHIFT 61 +#define CSR_TLBLO0_NR (_ULCAST_(0x1) << CSR_TLBLO0_NR_SHIFT) +#define CSR_TLBLO0_PFN_SHIFT 12 +#define CSR_TLBLO0_PFN_WIDTH 36 +#define CSR_TLBLO0_PFN (_ULCAST_(0xfffffffff) << CSR_TLBLO0_PFN_SHIFT) +#define CSR_TLBLO0_GLOBAL_SHIFT 6 +#define CSR_TLBLO0_GLOBAL (_ULCAST_(0x1) << CSR_TLBLO0_GLOBAL_SHIFT) +#define CSR_TLBLO0_CCA_SHIFT 4 +#define CSR_TLBLO0_CCA_WIDTH 2 +#define CSR_TLBLO0_CCA (_ULCAST_(0x3) << CSR_TLBLO0_CCA_SHIFT) +#define CSR_TLBLO0_PLV_SHIFT 2 +#define CSR_TLBLO0_PLV_WIDTH 2 +#define CSR_TLBLO0_PLV (_ULCAST_(0x3) << CSR_TLBLO0_PLV_SHIFT) +#define CSR_TLBLO0_WE_SHIFT 1 +#define CSR_TLBLO0_WE (_ULCAST_(0x1) << CSR_TLBLO0_WE_SHIFT) +#define CSR_TLBLO0_V_SHIFT 0 +#define CSR_TLBLO0_V (_ULCAST_(0x1) << CSR_TLBLO0_V_SHIFT) + +#define LOONGARCH_CSR_TLBELO1 0x13 /* TLB EntryLo1 */ +#define CSR_TLBLO1_RPLV_SHIFT 63 +#define CSR_TLBLO1_RPLV (_ULCAST_(0x1) << CSR_TLBLO1_RPLV_SHIFT) +#define CSR_TLBLO1_NX_SHIFT 62 +#define CSR_TLBLO1_NX (_ULCAST_(0x1) << CSR_TLBLO1_NX_SHIFT) +#define CSR_TLBLO1_NR_SHIFT 61 +#define CSR_TLBLO1_NR (_ULCAST_(0x1) << CSR_TLBLO1_NR_SHIFT) +#define CSR_TLBLO1_PFN_SHIFT 12 +#define CSR_TLBLO1_PFN_WIDTH 36 +#define CSR_TLBLO1_PFN (_ULCAST_(0xfffffffff) << CSR_TLBLO1_PFN_SHIFT) +#define CSR_TLBLO1_GLOBAL_SHIFT 6 +#define CSR_TLBLO1_GLOBAL (_ULCAST_(0x1) << CSR_TLBLO1_GLOBAL_SHIFT) +#define CSR_TLBLO1_CCA_SHIFT 4 +#define CSR_TLBLO1_CCA_WIDTH 2 +#define CSR_TLBLO1_CCA (_ULCAST_(0x3) << CSR_TLBLO1_CCA_SHIFT) +#define CSR_TLBLO1_PLV_SHIFT 2 +#define CSR_TLBLO1_PLV_WIDTH 2 +#define CSR_TLBLO1_PLV (_ULCAST_(0x3) << CSR_TLBLO1_PLV_SHIFT) +#define CSR_TLBLO1_WE_SHIFT 1 +#define CSR_TLBLO1_WE (_ULCAST_(0x1) << CSR_TLBLO1_WE_SHIFT) +#define CSR_TLBLO1_V_SHIFT 0 +#define CSR_TLBLO1_V (_ULCAST_(0x1) << CSR_TLBLO1_V_SHIFT) + +#define LOONGARCH_CSR_GTLBC 0x15 /* Guest TLB control */ +#define CSR_GTLBC_RID_SHIFT 16 +#define CSR_GTLBC_RID_WIDTH 8 +#define CSR_GTLBC_RID (_ULCAST_(0xff) << CSR_GTLBC_RID_SHIFT) +#define CSR_GTLBC_TOTI_SHIFT 13 +#define CSR_GTLBC_TOTI (_ULCAST_(0x1) << CSR_GTLBC_TOTI_SHIFT) +#define CSR_GTLBC_USERID_SHIFT 12 +#define CSR_GTLBC_USERID (_ULCAST_(0x1) << CSR_GTLBC_USERID_SHIFT) +#define CSR_GTLBC_GMTLBSZ_SHIFT 0 +#define CSR_GTLBC_GMTLBSZ_WIDTH 6 +#define CSR_GTLBC_GMTLBSZ (_ULCAST_(0x3f) << CSR_GTLBC_GMTLBSZ_SHIFT) + +#define LOONGARCH_CSR_TRGP 0x16 /* TLBR read guest info */ +#define CSR_TRGP_RID_SHIFT 16 +#define CSR_TRGP_RID_WIDTH 8 +#define CSR_TRGP_RID (_ULCAST_(0xff) << CSR_TRGP_RID_SHIFT) +#define CSR_TRGP_GTLB_SHIFT 0 +#define CSR_TRGP_GTLB (1 << CSR_TRGP_GTLB_SHIFT) + +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define CSR_ASID_BIT_SHIFT 16 /* ASIDBits */ +#define CSR_ASID_BIT_WIDTH 8 +#define CSR_ASID_BIT (_ULCAST_(0xff) << CSR_ASID_BIT_SHIFT) +#define CSR_ASID_ASID_SHIFT 0 +#define CSR_ASID_ASID_WIDTH 10 +#define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT) + +#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */ + +#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */ + +#define LOONGARCH_CSR_PGD 0x1b /* Page table base */ + +#define LOONGARCH_CSR_PWCTL0 0x1c /* PWCtl0 */ +#define CSR_PWCTL0_PTEW_SHIFT 30 +#define CSR_PWCTL0_PTEW_WIDTH 2 +#define CSR_PWCTL0_PTEW (_ULCAST_(0x3) << CSR_PWCTL0_PTEW_SHIFT) +#define CSR_PWCTL0_DIR1WIDTH_SHIFT 25 +#define CSR_PWCTL0_DIR1WIDTH_WIDTH 5 +#define CSR_PWCTL0_DIR1WIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_DIR1WIDTH_SHIFT) +#define CSR_PWCTL0_DIR1BASE_SHIFT 20 +#define CSR_PWCTL0_DIR1BASE_WIDTH 5 +#define CSR_PWCTL0_DIR1BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR1BASE_SHIFT) +#define CSR_PWCTL0_DIR0WIDTH_SHIFT 15 +#define CSR_PWCTL0_DIR0WIDTH_WIDTH 5 +#define CSR_PWCTL0_DIR0WIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_DIR0WIDTH_SHIFT) +#define CSR_PWCTL0_DIR0BASE_SHIFT 10 +#define CSR_PWCTL0_DIR0BASE_WIDTH 5 +#define CSR_PWCTL0_DIR0BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR0BASE_SHIFT) +#define CSR_PWCTL0_PTWIDTH_SHIFT 5 +#define CSR_PWCTL0_PTWIDTH_WIDTH 5 +#define CSR_PWCTL0_PTWIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_PTWIDTH_SHIFT) +#define CSR_PWCTL0_PTBASE_SHIFT 0 +#define CSR_PWCTL0_PTBASE_WIDTH 5 +#define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT) + +#define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define CSR_PWCTL1_DIR3WIDTH_SHIFT 18 +#define CSR_PWCTL1_DIR3WIDTH_WIDTH 5 +#define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT) +#define CSR_PWCTL1_DIR3BASE_SHIFT 12 +#define CSR_PWCTL1_DIR3BASE_WIDTH 5 +#define CSR_PWCTL1_DIR3BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR3BASE_SHIFT) +#define CSR_PWCTL1_DIR2WIDTH_SHIFT 6 +#define CSR_PWCTL1_DIR2WIDTH_WIDTH 5 +#define CSR_PWCTL1_DIR2WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR2WIDTH_SHIFT) +#define CSR_PWCTL1_DIR2BASE_SHIFT 0 +#define CSR_PWCTL1_DIR2BASE_WIDTH 5 +#define CSR_PWCTL1_DIR2BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR2BASE_SHIFT) + +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define CSR_STLBPGSIZE_PS_WIDTH 6 +#define CSR_STLBPGSIZE_PS (_ULCAST_(0x3f)) + +#define LOONGARCH_CSR_RVACFG 0x1f +#define CSR_RVACFG_RDVA_WIDTH 4 +#define CSR_RVACFG_RDVA (_ULCAST_(0xf)) + +/* Config CSR registers */ +#define LOONGARCH_CSR_CPUID 0x20 /* CPU core id */ +#define CSR_CPUID_COREID_WIDTH 9 +#define CSR_CPUID_COREID _ULCAST_(0x1ff) + +#define LOONGARCH_CSR_PRCFG1 0x21 /* Config1 */ +#define CSR_CONF1_VSMAX_SHIFT 12 +#define CSR_CONF1_VSMAX_WIDTH 3 +#define CSR_CONF1_VSMAX (_ULCAST_(7) << CSR_CONF1_VSMAX_SHIFT) +#define CSR_CONF1_TMRBITS_SHIFT 4 +#define CSR_CONF1_TMRBITS_WIDTH 8 +#define CSR_CONF1_TMRBITS (_ULCAST_(0xff) << CSR_CONF1_TMRBITS_SHIFT) +#define CSR_CONF1_KSNUM_WIDTH 4 +#define CSR_CONF1_KSNUM _ULCAST_(0xf) + +#define LOONGARCH_CSR_PRCFG2 0x22 /* Config2 */ +#define CSR_CONF2_PGMASK_SUPP 0x3ffff000 + +#define LOONGARCH_CSR_PRCFG3 0x23 /* Config3 */ +#define CSR_CONF3_STLBIDX_SHIFT 20 +#define CSR_CONF3_STLBIDX_WIDTH 6 +#define CSR_CONF3_STLBIDX (_ULCAST_(0x3f) << CSR_CONF3_STLBIDX_SHIFT) +#define CSR_CONF3_STLBWAYS_SHIFT 12 +#define CSR_CONF3_STLBWAYS_WIDTH 8 +#define CSR_CONF3_STLBWAYS (_ULCAST_(0xff) << CSR_CONF3_STLBWAYS_SHIFT) +#define CSR_CONF3_MTLBSIZE_SHIFT 4 +#define CSR_CONF3_MTLBSIZE_WIDTH 8 +#define CSR_CONF3_MTLBSIZE (_ULCAST_(0xff) << CSR_CONF3_MTLBSIZE_SHIFT) +#define CSR_CONF3_TLBTYPE_SHIFT 0 +#define CSR_CONF3_TLBTYPE_WIDTH 4 +#define CSR_CONF3_TLBTYPE (_ULCAST_(0xf) << CSR_CONF3_TLBTYPE_SHIFT) + +/* Kscratch registers */ +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_KS2 0x32 +#define LOONGARCH_CSR_KS3 0x33 +#define LOONGARCH_CSR_KS4 0x34 +#define LOONGARCH_CSR_KS5 0x35 +#define LOONGARCH_CSR_KS6 0x36 +#define LOONGARCH_CSR_KS7 0x37 +#define LOONGARCH_CSR_KS8 0x38 + +/* Exception allocated KS0, KS1 and KS2 statically */ +#define EXCEPTION_KS0 LOONGARCH_CSR_KS0 +#define EXCEPTION_KS1 LOONGARCH_CSR_KS1 +#define EXCEPTION_KS2 LOONGARCH_CSR_KS2 +#define EXC_KSCRATCH_MASK (1 << 0 | 1 << 1 | 1 << 2) + +/* Percpu-data base allocated KS3 statically */ +#define PERCPU_BASE_KS LOONGARCH_CSR_KS3 +#define PERCPU_KSCRATCH_MASK (1 << 3) + +/* KVM allocated KS4 and KS5 statically */ +#define KVM_VCPU_KS LOONGARCH_CSR_KS4 +#define KVM_TEMP_KS LOONGARCH_CSR_KS5 +#define KVM_KSCRATCH_MASK (1 << 4 | 1 << 5) + +/* Timer registers */ +#define LOONGARCH_CSR_TMID 0x40 /* Timer ID */ + +#define LOONGARCH_CSR_TCFG 0x41 /* Timer config */ +#define CSR_TCFG_VAL_SHIFT 2 +#define CSR_TCFG_VAL_WIDTH 48 +#define CSR_TCFG_VAL (_ULCAST_(0x3fffffffffff) << CSR_TCFG_VAL_SHIFT) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (_ULCAST_(0x1) << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (_ULCAST_(0x1)) + +#define LOONGARCH_CSR_TVAL 0x42 /* Timer value */ + +#define LOONGARCH_CSR_CNTC 0x43 /* Timer offset */ + +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) + +/* Guest registers */ +#define LOONGARCH_CSR_GSTAT 0x50 /* Guest status */ +#define CSR_GSTAT_GID_SHIFT 16 +#define CSR_GSTAT_GID_WIDTH 8 +#define CSR_GSTAT_GID (_ULCAST_(0xff) << CSR_GSTAT_GID_SHIFT) +#define CSR_GSTAT_GIDBIT_SHIFT 4 +#define CSR_GSTAT_GIDBIT_WIDTH 6 +#define CSR_GSTAT_GIDBIT (_ULCAST_(0x3f) << CSR_GSTAT_GIDBIT_SHIFT) +#define CSR_GSTAT_PVM_SHIFT 1 +#define CSR_GSTAT_PVM (_ULCAST_(0x1) << CSR_GSTAT_PVM_SHIFT) +#define CSR_GSTAT_VM_SHIFT 0 +#define CSR_GSTAT_VM (_ULCAST_(0x1) << CSR_GSTAT_VM_SHIFT) + +#define LOONGARCH_CSR_GCFG 0x51 /* Guest config */ +#define CSR_GCFG_GPERF_SHIFT 24 +#define CSR_GCFG_GPERF_WIDTH 3 +#define CSR_GCFG_GPERF (_ULCAST_(0x7) << CSR_GCFG_GPERF_SHIFT) +#define CSR_GCFG_GCI_SHIFT 20 +#define CSR_GCFG_GCI_WIDTH 2 +#define CSR_GCFG_GCI (_ULCAST_(0x3) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_ALL (_ULCAST_(0x0) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_HIT (_ULCAST_(0x1) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_SECURE (_ULCAST_(0x2) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCIP_SHIFT 16 +#define CSR_GCFG_GCIP (_ULCAST_(0xf) << CSR_GCFG_GCIP_SHIFT) +#define CSR_GCFG_GCIP_ALL (_ULCAST_(0x1) << CSR_GCFG_GCIP_SHIFT) +#define CSR_GCFG_GCIP_HIT (_ULCAST_(0x1) << (CSR_GCFG_GCIP_SHIFT + 1)) +#define CSR_GCFG_GCIP_SECURE (_ULCAST_(0x1) << (CSR_GCFG_GCIP_SHIFT + 2)) +#define CSR_GCFG_TORU_SHIFT 15 +#define CSR_GCFG_TORU (_ULCAST_(0x1) << CSR_GCFG_TORU_SHIFT) +#define CSR_GCFG_TORUP_SHIFT 14 +#define CSR_GCFG_TORUP (_ULCAST_(0x1) << CSR_GCFG_TORUP_SHIFT) +#define CSR_GCFG_TOP_SHIFT 13 +#define CSR_GCFG_TOP (_ULCAST_(0x1) << CSR_GCFG_TOP_SHIFT) +#define CSR_GCFG_TOPP_SHIFT 12 +#define CSR_GCFG_TOPP (_ULCAST_(0x1) << CSR_GCFG_TOPP_SHIFT) +#define CSR_GCFG_TOE_SHIFT 11 +#define CSR_GCFG_TOE (_ULCAST_(0x1) << CSR_GCFG_TOE_SHIFT) +#define CSR_GCFG_TOEP_SHIFT 10 +#define CSR_GCFG_TOEP (_ULCAST_(0x1) << CSR_GCFG_TOEP_SHIFT) +#define CSR_GCFG_TIT_SHIFT 9 +#define CSR_GCFG_TIT (_ULCAST_(0x1) << CSR_GCFG_TIT_SHIFT) +#define CSR_GCFG_TITP_SHIFT 8 +#define CSR_GCFG_TITP (_ULCAST_(0x1) << CSR_GCFG_TITP_SHIFT) +#define CSR_GCFG_SIT_SHIFT 7 +#define CSR_GCFG_SIT (_ULCAST_(0x1) << CSR_GCFG_SIT_SHIFT) +#define CSR_GCFG_SITP_SHIFT 6 +#define CSR_GCFG_SITP (_ULCAST_(0x1) << CSR_GCFG_SITP_SHIFT) +#define CSR_GCFG_MATC_SHITF 4 +#define CSR_GCFG_MATC_WIDTH 2 +#define CSR_GCFG_MATC_MASK (_ULCAST_(0x3) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_GUEST (_ULCAST_(0x0) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_NEST (_ULCAST_(0x2) << CSR_GCFG_MATC_SHITF) + +#define LOONGARCH_CSR_GINTC 0x52 /* Guest interrupt control */ +#define CSR_GINTC_HC_SHIFT 16 +#define CSR_GINTC_HC_WIDTH 8 +#define CSR_GINTC_HC (_ULCAST_(0xff) << CSR_GINTC_HC_SHIFT) +#define CSR_GINTC_PIP_SHIFT 8 +#define CSR_GINTC_PIP_WIDTH 8 +#define CSR_GINTC_PIP (_ULCAST_(0xff) << CSR_GINTC_PIP_SHIFT) +#define CSR_GINTC_VIP_SHIFT 0 +#define CSR_GINTC_VIP_WIDTH 8 +#define CSR_GINTC_VIP (_ULCAST_(0xff)) + +#define LOONGARCH_CSR_GCNTC 0x53 /* Guest timer offset */ + +/* LLBCTL register */ +#define LOONGARCH_CSR_LLBCTL 0x60 /* LLBit control */ +#define CSR_LLBCTL_ROLLB_SHIFT 0 +#define CSR_LLBCTL_ROLLB (_ULCAST_(1) << CSR_LLBCTL_ROLLB_SHIFT) +#define CSR_LLBCTL_WCLLB_SHIFT 1 +#define CSR_LLBCTL_WCLLB (_ULCAST_(1) << CSR_LLBCTL_WCLLB_SHIFT) +#define CSR_LLBCTL_KLO_SHIFT 2 +#define CSR_LLBCTL_KLO (_ULCAST_(1) << CSR_LLBCTL_KLO_SHIFT) + +/* Implement dependent */ +#define LOONGARCH_CSR_IMPCTL1 0x80 /* Loongson config1 */ +#define CSR_MISPEC_SHIFT 20 +#define CSR_MISPEC_WIDTH 8 +#define CSR_MISPEC (_ULCAST_(0xff) << CSR_MISPEC_SHIFT) +#define CSR_SSEN_SHIFT 18 +#define CSR_SSEN (_ULCAST_(1) << CSR_SSEN_SHIFT) +#define CSR_SCRAND_SHIFT 17 +#define CSR_SCRAND (_ULCAST_(1) << CSR_SCRAND_SHIFT) +#define CSR_LLEXCL_SHIFT 16 +#define CSR_LLEXCL (_ULCAST_(1) << CSR_LLEXCL_SHIFT) +#define CSR_DISVC_SHIFT 15 +#define CSR_DISVC (_ULCAST_(1) << CSR_DISVC_SHIFT) +#define CSR_VCLRU_SHIFT 14 +#define CSR_VCLRU (_ULCAST_(1) << CSR_VCLRU_SHIFT) +#define CSR_DCLRU_SHIFT 13 +#define CSR_DCLRU (_ULCAST_(1) << CSR_DCLRU_SHIFT) +#define CSR_FASTLDQ_SHIFT 12 +#define CSR_FASTLDQ (_ULCAST_(1) << CSR_FASTLDQ_SHIFT) +#define CSR_USERCAC_SHIFT 11 +#define CSR_USERCAC (_ULCAST_(1) << CSR_USERCAC_SHIFT) +#define CSR_ANTI_MISPEC_SHIFT 10 +#define CSR_ANTI_MISPEC (_ULCAST_(1) << CSR_ANTI_MISPEC_SHIFT) +#define CSR_AUTO_FLUSHSFB_SHIFT 9 +#define CSR_AUTO_FLUSHSFB (_ULCAST_(1) << CSR_AUTO_FLUSHSFB_SHIFT) +#define CSR_STFILL_SHIFT 8 +#define CSR_STFILL (_ULCAST_(1) << CSR_STFILL_SHIFT) +#define CSR_LIFEP_SHIFT 7 +#define CSR_LIFEP (_ULCAST_(1) << CSR_LIFEP_SHIFT) +#define CSR_LLSYNC_SHIFT 6 +#define CSR_LLSYNC (_ULCAST_(1) << CSR_LLSYNC_SHIFT) +#define CSR_BRBTDIS_SHIFT 5 +#define CSR_BRBTDIS (_ULCAST_(1) << CSR_BRBTDIS_SHIFT) +#define CSR_RASDIS_SHIFT 4 +#define CSR_RASDIS (_ULCAST_(1) << CSR_RASDIS_SHIFT) +#define CSR_STPRE_SHIFT 2 +#define CSR_STPRE_WIDTH 2 +#define CSR_STPRE (_ULCAST_(3) << CSR_STPRE_SHIFT) +#define CSR_INSTPRE_SHIFT 1 +#define CSR_INSTPRE (_ULCAST_(1) << CSR_INSTPRE_SHIFT) +#define CSR_DATAPRE_SHIFT 0 +#define CSR_DATAPRE (_ULCAST_(1) << CSR_DATAPRE_SHIFT) + +#define LOONGARCH_CSR_IMPCTL2 0x81 /* Loongson config2 */ +#define CSR_FLUSH_MTLB_SHIFT 0 +#define CSR_FLUSH_MTLB (_ULCAST_(1) << CSR_FLUSH_MTLB_SHIFT) +#define CSR_FLUSH_STLB_SHIFT 1 +#define CSR_FLUSH_STLB (_ULCAST_(1) << CSR_FLUSH_STLB_SHIFT) +#define CSR_FLUSH_DTLB_SHIFT 2 +#define CSR_FLUSH_DTLB (_ULCAST_(1) << CSR_FLUSH_DTLB_SHIFT) +#define CSR_FLUSH_ITLB_SHIFT 3 +#define CSR_FLUSH_ITLB (_ULCAST_(1) << CSR_FLUSH_ITLB_SHIFT) +#define CSR_FLUSH_BTAC_SHIFT 4 +#define CSR_FLUSH_BTAC (_ULCAST_(1) << CSR_FLUSH_BTAC_SHIFT) + +#define LOONGARCH_CSR_GNMI 0x82 + +/* TLB Refill registers */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 /* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRBADV 0x89 /* TLB refill badvaddr */ +#define LOONGARCH_CSR_TLBRERA 0x8a /* TLB refill ERA */ +#define LOONGARCH_CSR_TLBRSAVE 0x8b /* KScratch for TLB refill exception */ +#define LOONGARCH_CSR_TLBRELO0 0x8c /* TLB refill entrylo0 */ +#define LOONGARCH_CSR_TLBRELO1 0x8d /* TLB refill entrylo1 */ +#define LOONGARCH_CSR_TLBREHI 0x8e /* TLB refill entryhi */ +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (_ULCAST_(0x3f) << CSR_TLBREHI_PS_SHIFT) +#define LOONGARCH_CSR_TLBRPRMD 0x8f /* TLB refill mode info */ + +/* Machine Error registers */ +#define LOONGARCH_CSR_MERRCTL 0x90 /* MERRCTL */ +#define LOONGARCH_CSR_MERRINFO1 0x91 /* MError info1 */ +#define LOONGARCH_CSR_MERRINFO2 0x92 /* MError info2 */ +#define LOONGARCH_CSR_MERRENTRY 0x93 /* MError exception entry */ +#define LOONGARCH_CSR_MERRERA 0x94 /* MError exception ERA */ +#define LOONGARCH_CSR_MERRSAVE 0x95 /* KScratch for machine error exception */ + +#define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */ + +#define LOONGARCH_CSR_PRID 0xc0 + +/* Shadow MCSR : 0xc0 ~ 0xff */ +#define LOONGARCH_CSR_MCSR0 0xc0 /* CPUCFG0 and CPUCFG1 */ +#define MCSR0_INT_IMPL_SHIFT 58 +#define MCSR0_INT_IMPL 0 +#define MCSR0_IOCSR_BRD_SHIFT 57 +#define MCSR0_IOCSR_BRD (_ULCAST_(1) << MCSR0_IOCSR_BRD_SHIFT) +#define MCSR0_HUGEPG_SHIFT 56 +#define MCSR0_HUGEPG (_ULCAST_(1) << MCSR0_HUGEPG_SHIFT) +#define MCSR0_RPLMTLB_SHIFT 55 +#define MCSR0_RPLMTLB (_ULCAST_(1) << MCSR0_RPLMTLB_SHIFT) +#define MCSR0_EP_SHIFT 54 +#define MCSR0_EP (_ULCAST_(1) << MCSR0_EP_SHIFT) +#define MCSR0_RI_SHIFT 53 +#define MCSR0_RI (_ULCAST_(1) << MCSR0_RI_SHIFT) +#define MCSR0_UAL_SHIFT 52 +#define MCSR0_UAL (_ULCAST_(1) << MCSR0_UAL_SHIFT) +#define MCSR0_VABIT_SHIFT 44 +#define MCSR0_VABIT_WIDTH 8 +#define MCSR0_VABIT (_ULCAST_(0xff) << MCSR0_VABIT_SHIFT) +#define VABIT_DEFAULT 0x2f +#define MCSR0_PABIT_SHIFT 36 +#define MCSR0_PABIT_WIDTH 8 +#define MCSR0_PABIT (_ULCAST_(0xff) << MCSR0_PABIT_SHIFT) +#define PABIT_DEFAULT 0x2f +#define MCSR0_IOCSR_SHIFT 35 +#define MCSR0_IOCSR (_ULCAST_(1) << MCSR0_IOCSR_SHIFT) +#define MCSR0_PAGING_SHIFT 34 +#define MCSR0_PAGING (_ULCAST_(1) << MCSR0_PAGING_SHIFT) +#define MCSR0_GR64_SHIFT 33 +#define MCSR0_GR64 (_ULCAST_(1) << MCSR0_GR64_SHIFT) +#define GR64_DEFAULT 1 +#define MCSR0_GR32_SHIFT 32 +#define MCSR0_GR32 (_ULCAST_(1) << MCSR0_GR32_SHIFT) +#define GR32_DEFAULT 0 +#define MCSR0_PRID_WIDTH 32 +#define MCSR0_PRID 0x14C010 + +#define LOONGARCH_CSR_MCSR1 0xc1 /* CPUCFG2 and CPUCFG3 */ +#define MCSR1_HPFOLD_SHIFT 43 +#define MCSR1_HPFOLD (_ULCAST_(1) << MCSR1_HPFOLD_SHIFT) +#define MCSR1_SPW_LVL_SHIFT 40 +#define MCSR1_SPW_LVL_WIDTH 3 +#define MCSR1_SPW_LVL (_ULCAST_(7) << MCSR1_SPW_LVL_SHIFT) +#define MCSR1_ICACHET_SHIFT 39 +#define MCSR1_ICACHET (_ULCAST_(1) << MCSR1_ICACHET_SHIFT) +#define MCSR1_ITLBT_SHIFT 38 +#define MCSR1_ITLBT (_ULCAST_(1) << MCSR1_ITLBT_SHIFT) +#define MCSR1_LLDBAR_SHIFT 37 +#define MCSR1_LLDBAR (_ULCAST_(1) << MCSR1_LLDBAR_SHIFT) +#define MCSR1_SCDLY_SHIFT 36 +#define MCSR1_SCDLY (_ULCAST_(1) << MCSR1_SCDLY_SHIFT) +#define MCSR1_LLEXC_SHIFT 35 +#define MCSR1_LLEXC (_ULCAST_(1) << MCSR1_LLEXC_SHIFT) +#define MCSR1_UCACC_SHIFT 34 +#define MCSR1_UCACC (_ULCAST_(1) << MCSR1_UCACC_SHIFT) +#define MCSR1_SFB_SHIFT 33 +#define MCSR1_SFB (_ULCAST_(1) << MCSR1_SFB_SHIFT) +#define MCSR1_CCDMA_SHIFT 32 +#define MCSR1_CCDMA (_ULCAST_(1) << MCSR1_CCDMA_SHIFT) +#define MCSR1_LAMO_SHIFT 22 +#define MCSR1_LAMO (_ULCAST_(1) << MCSR1_LAMO_SHIFT) +#define MCSR1_LSPW_SHIFT 21 +#define MCSR1_LSPW (_ULCAST_(1) << MCSR1_LSPW_SHIFT) +#define MCSR1_MIPSBT_SHIFT 20 +#define MCSR1_MIPSBT (_ULCAST_(1) << MCSR1_MIPSBT_SHIFT) +#define MCSR1_ARMBT_SHIFT 19 +#define MCSR1_ARMBT (_ULCAST_(1) << MCSR1_ARMBT_SHIFT) +#define MCSR1_X86BT_SHIFT 18 +#define MCSR1_X86BT (_ULCAST_(1) << MCSR1_X86BT_SHIFT) +#define MCSR1_LLFTPVERS_SHIFT 15 +#define MCSR1_LLFTPVERS_WIDTH 3 +#define MCSR1_LLFTPVERS (_ULCAST_(7) << MCSR1_LLFTPVERS_SHIFT) +#define MCSR1_LLFTP_SHIFT 14 +#define MCSR1_LLFTP (_ULCAST_(1) << MCSR1_LLFTP_SHIFT) +#define MCSR1_VZVERS_SHIFT 11 +#define MCSR1_VZVERS_WIDTH 3 +#define MCSR1_VZVERS (_ULCAST_(7) << MCSR1_VZVERS_SHIFT) +#define MCSR1_VZ_SHIFT 10 +#define MCSR1_VZ (_ULCAST_(1) << MCSR1_VZ_SHIFT) +#define MCSR1_CRYPTO_SHIFT 9 +#define MCSR1_CRYPTO (_ULCAST_(1) << MCSR1_CRYPTO_SHIFT) +#define MCSR1_COMPLEX_SHIFT 8 +#define MCSR1_COMPLEX (_ULCAST_(1) << MCSR1_COMPLEX_SHIFT) +#define MCSR1_LASX_SHIFT 7 +#define MCSR1_LASX (_ULCAST_(1) << MCSR1_LASX_SHIFT) +#define MCSR1_LSX_SHIFT 6 +#define MCSR1_LSX (_ULCAST_(1) << MCSR1_LSX_SHIFT) +#define MCSR1_FPVERS_SHIFT 3 +#define MCSR1_FPVERS_WIDTH 3 +#define MCSR1_FPVERS (_ULCAST_(7) << MCSR1_FPVERS_SHIFT) +#define MCSR1_FPDP_SHIFT 2 +#define MCSR1_FPDP (_ULCAST_(1) << MCSR1_FPDP_SHIFT) +#define MCSR1_FPSP_SHIFT 1 +#define MCSR1_FPSP (_ULCAST_(1) << MCSR1_FPSP_SHIFT) +#define MCSR1_FP_SHIFT 0 +#define MCSR1_FP (_ULCAST_(1) << MCSR1_FP_SHIFT) + +#define LOONGARCH_CSR_MCSR2 0xc2 /* CPUCFG4 and CPUCFG5 */ +#define MCSR2_CCDIV_SHIFT 48 +#define MCSR2_CCDIV_WIDTH 16 +#define MCSR2_CCDIV (_ULCAST_(0xffff) << MCSR2_CCDIV_SHIFT) +#define MCSR2_CCMUL_SHIFT 32 +#define MCSR2_CCMUL_WIDTH 16 +#define MCSR2_CCMUL (_ULCAST_(0xffff) << MCSR2_CCMUL_SHIFT) +#define MCSR2_CCFREQ_WIDTH 32 +#define MCSR2_CCFREQ (_ULCAST_(0xffffffff)) +#define CCFREQ_DEFAULT 0x5f5e100 /* 100MHz */ + +#define LOONGARCH_CSR_MCSR3 0xc3 /* CPUCFG6 */ +#define MCSR3_UPM_SHIFT 14 +#define MCSR3_UPM (_ULCAST_(1) << MCSR3_UPM_SHIFT) +#define MCSR3_PMBITS_SHIFT 8 +#define MCSR3_PMBITS_WIDTH 6 +#define MCSR3_PMBITS (_ULCAST_(0x3f) << MCSR3_PMBITS_SHIFT) +#define PMBITS_DEFAULT 0x40 +#define MCSR3_PMNUM_SHIFT 4 +#define MCSR3_PMNUM_WIDTH 4 +#define MCSR3_PMNUM (_ULCAST_(0xf) << MCSR3_PMNUM_SHIFT) +#define MCSR3_PAMVER_SHIFT 1 +#define MCSR3_PAMVER_WIDTH 3 +#define MCSR3_PAMVER (_ULCAST_(0x7) << MCSR3_PAMVER_SHIFT) +#define MCSR3_PMP_SHIFT 0 +#define MCSR3_PMP (_ULCAST_(1) << MCSR3_PMP_SHIFT) + +#define LOONGARCH_CSR_MCSR8 0xc8 /* CPUCFG16 and CPUCFG17 */ +#define MCSR8_L1I_SIZE_SHIFT 56 +#define MCSR8_L1I_SIZE_WIDTH 7 +#define MCSR8_L1I_SIZE (_ULCAST_(0x7f) << MCSR8_L1I_SIZE_SHIFT) +#define MCSR8_L1I_IDX_SHIFT 48 +#define MCSR8_L1I_IDX_WIDTH 8 +#define MCSR8_L1I_IDX (_ULCAST_(0xff) << MCSR8_L1I_IDX_SHIFT) +#define MCSR8_L1I_WAY_SHIFT 32 +#define MCSR8_L1I_WAY_WIDTH 16 +#define MCSR8_L1I_WAY (_ULCAST_(0xffff) << MCSR8_L1I_WAY_SHIFT) +#define MCSR8_L3DINCL_SHIFT 16 +#define MCSR8_L3DINCL (_ULCAST_(1) << MCSR8_L3DINCL_SHIFT) +#define MCSR8_L3DPRIV_SHIFT 15 +#define MCSR8_L3DPRIV (_ULCAST_(1) << MCSR8_L3DPRIV_SHIFT) +#define MCSR8_L3DPRE_SHIFT 14 +#define MCSR8_L3DPRE (_ULCAST_(1) << MCSR8_L3DPRE_SHIFT) +#define MCSR8_L3IUINCL_SHIFT 13 +#define MCSR8_L3IUINCL (_ULCAST_(1) << MCSR8_L3IUINCL_SHIFT) +#define MCSR8_L3IUPRIV_SHIFT 12 +#define MCSR8_L3IUPRIV (_ULCAST_(1) << MCSR8_L3IUPRIV_SHIFT) +#define MCSR8_L3IUUNIFY_SHIFT 11 +#define MCSR8_L3IUUNIFY (_ULCAST_(1) << MCSR8_L3IUUNIFY_SHIFT) +#define MCSR8_L3IUPRE_SHIFT 10 +#define MCSR8_L3IUPRE (_ULCAST_(1) << MCSR8_L3IUPRE_SHIFT) +#define MCSR8_L2DINCL_SHIFT 9 +#define MCSR8_L2DINCL (_ULCAST_(1) << MCSR8_L2DINCL_SHIFT) +#define MCSR8_L2DPRIV_SHIFT 8 +#define MCSR8_L2DPRIV (_ULCAST_(1) << MCSR8_L2DPRIV_SHIFT) +#define MCSR8_L2DPRE_SHIFT 7 +#define MCSR8_L2DPRE (_ULCAST_(1) << MCSR8_L2DPRE_SHIFT) +#define MCSR8_L2IUINCL_SHIFT 6 +#define MCSR8_L2IUINCL (_ULCAST_(1) << MCSR8_L2IUINCL_SHIFT) +#define MCSR8_L2IUPRIV_SHIFT 5 +#define MCSR8_L2IUPRIV (_ULCAST_(1) << MCSR8_L2IUPRIV_SHIFT) +#define MCSR8_L2IUUNIFY_SHIFT 4 +#define MCSR8_L2IUUNIFY (_ULCAST_(1) << MCSR8_L2IUUNIFY_SHIFT) +#define MCSR8_L2IUPRE_SHIFT 3 +#define MCSR8_L2IUPRE (_ULCAST_(1) << MCSR8_L2IUPRE_SHIFT) +#define MCSR8_L1DPRE_SHIFT 2 +#define MCSR8_L1DPRE (_ULCAST_(1) << MCSR8_L1DPRE_SHIFT) +#define MCSR8_L1IUUNIFY_SHIFT 1 +#define MCSR8_L1IUUNIFY (_ULCAST_(1) << MCSR8_L1IUUNIFY_SHIFT) +#define MCSR8_L1IUPRE_SHIFT 0 +#define MCSR8_L1IUPRE (_ULCAST_(1) << MCSR8_L1IUPRE_SHIFT) + +#define LOONGARCH_CSR_MCSR9 0xc9 /* CPUCFG18 and CPUCFG19 */ +#define MCSR9_L2U_SIZE_SHIFT 56 +#define MCSR9_L2U_SIZE_WIDTH 7 +#define MCSR9_L2U_SIZE (_ULCAST_(0x7f) << MCSR9_L2U_SIZE_SHIFT) +#define MCSR9_L2U_IDX_SHIFT 48 +#define MCSR9_L2U_IDX_WIDTH 8 +#define MCSR9_L2U_IDX (_ULCAST_(0xff) << MCSR9_IDX_LOG_SHIFT) +#define MCSR9_L2U_WAY_SHIFT 32 +#define MCSR9_L2U_WAY_WIDTH 16 +#define MCSR9_L2U_WAY (_ULCAST_(0xffff) << MCSR9_L2U_WAY_SHIFT) +#define MCSR9_L1D_SIZE_SHIFT 24 +#define MCSR9_L1D_SIZE_WIDTH 7 +#define MCSR9_L1D_SIZE (_ULCAST_(0x7f) << MCSR9_L1D_SIZE_SHIFT) +#define MCSR9_L1D_IDX_SHIFT 16 +#define MCSR9_L1D_IDX_WIDTH 8 +#define MCSR9_L1D_IDX (_ULCAST_(0xff) << MCSR9_L1D_IDX_SHIFT) +#define MCSR9_L1D_WAY_SHIFT 0 +#define MCSR9_L1D_WAY_WIDTH 16 +#define MCSR9_L1D_WAY (_ULCAST_(0xffff) << MCSR9_L1D_WAY_SHIFT) + +#define LOONGARCH_CSR_MCSR10 0xca /* CPUCFG20 */ +#define MCSR10_L3U_SIZE_SHIFT 24 +#define MCSR10_L3U_SIZE_WIDTH 7 +#define MCSR10_L3U_SIZE (_ULCAST_(0x7f) << MCSR10_L3U_SIZE_SHIFT) +#define MCSR10_L3U_IDX_SHIFT 16 +#define MCSR10_L3U_IDX_WIDTH 8 +#define MCSR10_L3U_IDX (_ULCAST_(0xff) << MCSR10_L3U_IDX_SHIFT) +#define MCSR10_L3U_WAY_SHIFT 0 +#define MCSR10_L3U_WAY_WIDTH 16 +#define MCSR10_L3U_WAY (_ULCAST_(0xffff) << MCSR10_L3U_WAY_SHIFT) + +#define LOONGARCH_CSR_MCSR24 0xf0 /* cpucfg48 */ +#define MCSR24_RAMCG_SHIFT 3 +#define MCSR24_RAMCG (_ULCAST_(1) << MCSR24_RAMCG_SHIFT) +#define MCSR24_VFPUCG_SHIFT 2 +#define MCSR24_VFPUCG (_ULCAST_(1) << MCSR24_VFPUCG_SHIFT) +#define MCSR24_NAPEN_SHIFT 1 +#define MCSR24_NAPEN (_ULCAST_(1) << MCSR24_NAPEN_SHIFT) +#define MCSR24_MCSRLOCK_SHIFT 0 +#define MCSR24_MCSRLOCK (_ULCAST_(1) << MCSR24_MCSRLOCK_SHIFT) + +/* Uncached accelerate windows registers */ +#define LOONGARCH_CSR_UCAWIN 0x100 +#define LOONGARCH_CSR_UCAWIN0_LO 0x102 +#define LOONGARCH_CSR_UCAWIN0_HI 0x103 +#define LOONGARCH_CSR_UCAWIN1_LO 0x104 +#define LOONGARCH_CSR_UCAWIN1_HI 0x105 +#define LOONGARCH_CSR_UCAWIN2_LO 0x106 +#define LOONGARCH_CSR_UCAWIN2_HI 0x107 +#define LOONGARCH_CSR_UCAWIN3_LO 0x108 +#define LOONGARCH_CSR_UCAWIN3_HI 0x109 + +/* Direct Map windows registers */ +#define LOONGARCH_CSR_DMWIN0 0x180 /* 64 direct map win0: MEM & IF */ +#define LOONGARCH_CSR_DMWIN1 0x181 /* 64 direct map win1: MEM & IF */ +#define LOONGARCH_CSR_DMWIN2 0x182 /* 64 direct map win2: MEM */ +#define LOONGARCH_CSR_DMWIN3 0x183 /* 64 direct map win3: MEM */ + +/* Direct Map window 0/1 */ +#define CSR_DMW0_PLV0 _CONST64_(1 << 0) +#define CSR_DMW0_VSEG _CONST64_(0x8000) +#define CSR_DMW0_BASE (CSR_DMW0_VSEG << DMW_PABITS) +#define CSR_DMW0_INIT (CSR_DMW0_BASE | CSR_DMW0_PLV0) + +#define CSR_DMW1_PLV0 _CONST64_(1 << 0) +#define CSR_DMW1_MAT _CONST64_(1 << 4) +#define CSR_DMW1_VSEG _CONST64_(0x9000) +#define CSR_DMW1_BASE (CSR_DMW1_VSEG << DMW_PABITS) +#define CSR_DMW1_INIT (CSR_DMW1_BASE | CSR_DMW1_MAT | CSR_DMW1_PLV0) + +/* Performance Counter registers */ +#define LOONGARCH_CSR_PERFCTRL0 0x200 /* 32 perf event 0 config */ +#define LOONGARCH_CSR_PERFCNTR0 0x201 /* 64 perf event 0 count value */ +#define LOONGARCH_CSR_PERFCTRL1 0x202 /* 32 perf event 1 config */ +#define LOONGARCH_CSR_PERFCNTR1 0x203 /* 64 perf event 1 count value */ +#define LOONGARCH_CSR_PERFCTRL2 0x204 /* 32 perf event 2 config */ +#define LOONGARCH_CSR_PERFCNTR2 0x205 /* 64 perf event 2 count value */ +#define LOONGARCH_CSR_PERFCTRL3 0x206 /* 32 perf event 3 config */ +#define LOONGARCH_CSR_PERFCNTR3 0x207 /* 64 perf event 3 count value */ +#define CSR_PERFCTRL_PLV0 (_ULCAST_(1) << 16) +#define CSR_PERFCTRL_PLV1 (_ULCAST_(1) << 17) +#define CSR_PERFCTRL_PLV2 (_ULCAST_(1) << 18) +#define CSR_PERFCTRL_PLV3 (_ULCAST_(1) << 19) +#define CSR_PERFCTRL_IE (_ULCAST_(1) << 20) +#define CSR_PERFCTRL_EVENT 0x3ff + +/* Debug registers */ +#define LOONGARCH_CSR_MWPC 0x300 /* data breakpoint config */ +#define LOONGARCH_CSR_MWPS 0x301 /* data breakpoint status */ + +#define LOONGARCH_CSR_DB0ADDR 0x310 /* data breakpoint 0 address */ +#define LOONGARCH_CSR_DB0MASK 0x311 /* data breakpoint 0 mask */ +#define LOONGARCH_CSR_DB0CTL 0x312 /* data breakpoint 0 control */ +#define LOONGARCH_CSR_DB0ASID 0x313 /* data breakpoint 0 asid */ + +#define LOONGARCH_CSR_DB1ADDR 0x318 /* data breakpoint 1 address */ +#define LOONGARCH_CSR_DB1MASK 0x319 /* data breakpoint 1 mask */ +#define LOONGARCH_CSR_DB1CTL 0x31a /* data breakpoint 1 control */ +#define LOONGARCH_CSR_DB1ASID 0x31b /* data breakpoint 1 asid */ + +#define LOONGARCH_CSR_DB2ADDR 0x320 /* data breakpoint 2 address */ +#define LOONGARCH_CSR_DB2MASK 0x321 /* data breakpoint 2 mask */ +#define LOONGARCH_CSR_DB2CTL 0x322 /* data breakpoint 2 control */ +#define LOONGARCH_CSR_DB2ASID 0x323 /* data breakpoint 2 asid */ + +#define LOONGARCH_CSR_DB3ADDR 0x328 /* data breakpoint 3 address */ +#define LOONGARCH_CSR_DB3MASK 0x329 /* data breakpoint 3 mask */ +#define LOONGARCH_CSR_DB3CTL 0x32a /* data breakpoint 3 control */ +#define LOONGARCH_CSR_DB3ASID 0x32b /* data breakpoint 3 asid */ + +#define LOONGARCH_CSR_DB4ADDR 0x330 /* data breakpoint 4 address */ +#define LOONGARCH_CSR_DB4MASK 0x331 /* data breakpoint 4 maks */ +#define LOONGARCH_CSR_DB4CTL 0x332 /* data breakpoint 4 control */ +#define LOONGARCH_CSR_DB4ASID 0x333 /* data breakpoint 4 asid */ + +#define LOONGARCH_CSR_DB5ADDR 0x338 /* data breakpoint 5 address */ +#define LOONGARCH_CSR_DB5MASK 0x339 /* data breakpoint 5 mask */ +#define LOONGARCH_CSR_DB5CTL 0x33a /* data breakpoint 5 control */ +#define LOONGARCH_CSR_DB5ASID 0x33b /* data breakpoint 5 asid */ + +#define LOONGARCH_CSR_DB6ADDR 0x340 /* data breakpoint 6 address */ +#define LOONGARCH_CSR_DB6MASK 0x341 /* data breakpoint 6 mask */ +#define LOONGARCH_CSR_DB6CTL 0x342 /* data breakpoint 6 control */ +#define LOONGARCH_CSR_DB6ASID 0x343 /* data breakpoint 6 asid */ + +#define LOONGARCH_CSR_DB7ADDR 0x348 /* data breakpoint 7 address */ +#define LOONGARCH_CSR_DB7MASK 0x349 /* data breakpoint 7 mask */ +#define LOONGARCH_CSR_DB7CTL 0x34a /* data breakpoint 7 control */ +#define LOONGARCH_CSR_DB7ASID 0x34b /* data breakpoint 7 asid */ + +#define LOONGARCH_CSR_FWPC 0x380 /* instruction breakpoint config */ +#define LOONGARCH_CSR_FWPS 0x381 /* instruction breakpoint status */ + +#define LOONGARCH_CSR_IB0ADDR 0x390 /* inst breakpoint 0 address */ +#define LOONGARCH_CSR_IB0MASK 0x391 /* inst breakpoint 0 mask */ +#define LOONGARCH_CSR_IB0CTL 0x392 /* inst breakpoint 0 control */ +#define LOONGARCH_CSR_IB0ASID 0x393 /* inst breakpoint 0 asid */ + +#define LOONGARCH_CSR_IB1ADDR 0x398 /* inst breakpoint 1 address */ +#define LOONGARCH_CSR_IB1MASK 0x399 /* inst breakpoint 1 mask */ +#define LOONGARCH_CSR_IB1CTL 0x39a /* inst breakpoint 1 control */ +#define LOONGARCH_CSR_IB1ASID 0x39b /* inst breakpoint 1 asid */ + +#define LOONGARCH_CSR_IB2ADDR 0x3a0 /* inst breakpoint 2 address */ +#define LOONGARCH_CSR_IB2MASK 0x3a1 /* inst breakpoint 2 mask */ +#define LOONGARCH_CSR_IB2CTL 0x3a2 /* inst breakpoint 2 control */ +#define LOONGARCH_CSR_IB2ASID 0x3a3 /* inst breakpoint 2 asid */ + +#define LOONGARCH_CSR_IB3ADDR 0x3a8 /* inst breakpoint 3 address */ +#define LOONGARCH_CSR_IB3MASK 0x3a9 /* breakpoint 3 mask */ +#define LOONGARCH_CSR_IB3CTL 0x3aa /* inst breakpoint 3 control */ +#define LOONGARCH_CSR_IB3ASID 0x3ab /* inst breakpoint 3 asid */ + +#define LOONGARCH_CSR_IB4ADDR 0x3b0 /* inst breakpoint 4 address */ +#define LOONGARCH_CSR_IB4MASK 0x3b1 /* inst breakpoint 4 mask */ +#define LOONGARCH_CSR_IB4CTL 0x3b2 /* inst breakpoint 4 control */ +#define LOONGARCH_CSR_IB4ASID 0x3b3 /* inst breakpoint 4 asid */ + +#define LOONGARCH_CSR_IB5ADDR 0x3b8 /* inst breakpoint 5 address */ +#define LOONGARCH_CSR_IB5MASK 0x3b9 /* inst breakpoint 5 mask */ +#define LOONGARCH_CSR_IB5CTL 0x3ba /* inst breakpoint 5 control */ +#define LOONGARCH_CSR_IB5ASID 0x3bb /* inst breakpoint 5 asid */ + +#define LOONGARCH_CSR_IB6ADDR 0x3c0 /* inst breakpoint 6 address */ +#define LOONGARCH_CSR_IB6MASK 0x3c1 /* inst breakpoint 6 mask */ +#define LOONGARCH_CSR_IB6CTL 0x3c2 /* inst breakpoint 6 control */ +#define LOONGARCH_CSR_IB6ASID 0x3c3 /* inst breakpoint 6 asid */ + +#define LOONGARCH_CSR_IB7ADDR 0x3c8 /* inst breakpoint 7 address */ +#define LOONGARCH_CSR_IB7MASK 0x3c9 /* inst breakpoint 7 mask */ +#define LOONGARCH_CSR_IB7CTL 0x3ca /* inst breakpoint 7 control */ +#define LOONGARCH_CSR_IB7ASID 0x3cb /* inst breakpoint 7 asid */ + +#define LOONGARCH_CSR_DEBUG 0x500 /* debug config */ +#define LOONGARCH_CSR_DERA 0x501 /* debug era */ +#define LOONGARCH_CSR_DESAVE 0x502 /* debug save */ + +/* + * CSR_ECFG IM + */ +#define ECFG0_IM 0x00001fff +#define ECFGB_SIP0 0 +#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0) +#define ECFGB_SIP1 1 +#define ECFGF_SIP1 (_ULCAST_(1) << ECFGB_SIP1) +#define ECFGB_IP0 2 +#define ECFGF_IP0 (_ULCAST_(1) << ECFGB_IP0) +#define ECFGB_IP1 3 +#define ECFGF_IP1 (_ULCAST_(1) << ECFGB_IP1) +#define ECFGB_IP2 4 +#define ECFGF_IP2 (_ULCAST_(1) << ECFGB_IP2) +#define ECFGB_IP3 5 +#define ECFGF_IP3 (_ULCAST_(1) << ECFGB_IP3) +#define ECFGB_IP4 6 +#define ECFGF_IP4 (_ULCAST_(1) << ECFGB_IP4) +#define ECFGB_IP5 7 +#define ECFGF_IP5 (_ULCAST_(1) << ECFGB_IP5) +#define ECFGB_IP6 8 +#define ECFGF_IP6 (_ULCAST_(1) << ECFGB_IP6) +#define ECFGB_IP7 9 +#define ECFGF_IP7 (_ULCAST_(1) << ECFGB_IP7) +#define ECFGB_PMC 10 +#define ECFGF_PMC (_ULCAST_(1) << ECFGB_PMC) +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (_ULCAST_(1) << ECFGB_TIMER) +#define ECFGB_IPI 12 +#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI) +#define ECFGF(hwirq) (_ULCAST_(1) << hwirq) + +#define ESTATF_IP 0x00001fff + +#define LOONGARCH_IOCSR_FEATURES 0x8 +#define IOCSRF_TEMP BIT_ULL(0) +#define IOCSRF_NODECNT BIT_ULL(1) +#define IOCSRF_MSI BIT_ULL(2) +#define IOCSRF_EXTIOI BIT_ULL(3) +#define IOCSRF_CSRIPI BIT_ULL(4) +#define IOCSRF_FREQCSR BIT_ULL(5) +#define IOCSRF_FREQSCALE BIT_ULL(6) +#define IOCSRF_DVFSV1 BIT_ULL(7) +#define IOCSRF_EIODECODE BIT_ULL(9) +#define IOCSRF_FLATMODE BIT_ULL(10) +#define IOCSRF_VM BIT_ULL(11) + +#define LOONGARCH_IOCSR_VENDOR 0x10 + +#define LOONGARCH_IOCSR_CPUNAME 0x20 + +#define LOONGARCH_IOCSR_NODECNT 0x408 + +#define LOONGARCH_IOCSR_MISC_FUNC 0x420 +#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21) +#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48) + +#define LOONGARCH_IOCSR_CPUTEMP 0x428 + +/* PerCore CSR, only accessible by local cores */ +#define LOONGARCH_IOCSR_IPI_STATUS 0x1000 +#define LOONGARCH_IOCSR_IPI_EN 0x1004 +#define LOONGARCH_IOCSR_IPI_SET 0x1008 +#define LOONGARCH_IOCSR_IPI_CLEAR 0x100c +#define LOONGARCH_IOCSR_MBUF0 0x1020 +#define LOONGARCH_IOCSR_MBUF1 0x1028 +#define LOONGARCH_IOCSR_MBUF2 0x1030 +#define LOONGARCH_IOCSR_MBUF3 0x1038 + +#define LOONGARCH_IOCSR_IPI_SEND 0x1040 +#define IOCSR_IPI_SEND_IP_SHIFT 0 +#define IOCSR_IPI_SEND_CPU_SHIFT 16 +#define IOCSR_IPI_SEND_BLOCKING BIT(31) + +#define LOONGARCH_IOCSR_MBUF_SEND 0x1048 +#define IOCSR_MBUF_SEND_BLOCKING BIT_ULL(31) +#define IOCSR_MBUF_SEND_BOX_SHIFT 2 +#define IOCSR_MBUF_SEND_BOX_LO(box) (box << 1) +#define IOCSR_MBUF_SEND_BOX_HI(box) ((box << 1) + 1) +#define IOCSR_MBUF_SEND_CPU_SHIFT 16 +#define IOCSR_MBUF_SEND_BUF_SHIFT 32 +#define IOCSR_MBUF_SEND_H32_MASK 0xFFFFFFFF00000000ULL + +#define LOONGARCH_IOCSR_ANY_SEND 0x1158 +#define IOCSR_ANY_SEND_BLOCKING BIT_ULL(31) +#define IOCSR_ANY_SEND_CPU_SHIFT 16 +#define IOCSR_ANY_SEND_MASK_SHIFT 27 +#define IOCSR_ANY_SEND_BUF_SHIFT 32 +#define IOCSR_ANY_SEND_H32_MASK 0xFFFFFFFF00000000ULL + +/* Register offset and bit definition for CSR access */ +#define LOONGARCH_IOCSR_TIMER_CFG 0x1060 +#define LOONGARCH_IOCSR_TIMER_TICK 0x1070 +#define IOCSR_TIMER_CFG_RESERVED (_ULCAST_(1) << 63) +#define IOCSR_TIMER_CFG_PERIODIC (_ULCAST_(1) << 62) +#define IOCSR_TIMER_CFG_EN (_ULCAST_(1) << 61) +#define IOCSR_TIMER_MASK 0x0ffffffffffffULL +#define IOCSR_TIMER_INITVAL_RST (_ULCAST_(0xffff) << 48) + +#define LOONGARCH_IOCSR_EXTIOI_NODEMAP_BASE 0x14a0 +#define LOONGARCH_IOCSR_EXTIOI_IPMAP_BASE 0x14c0 +#define LOONGARCH_IOCSR_EXTIOI_EN_BASE 0x1600 +#define LOONGARCH_IOCSR_EXTIOI_BOUNCE_BASE 0x1680 +#define LOONGARCH_IOCSR_EXTIOI_ISR_BASE 0x1800 +#define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 +#define IOCSR_EXTIOI_VECTOR_NUM 256 + +#ifndef __ASSEMBLY__ + +static inline u64 drdtime(void) +{ + int rID = 0; + u64 val = 0; + + __asm__ __volatile__( + "rdtime.d %0, %1 \n\t" + : "=r"(val), "=r"(rID) + : + ); + return val; +} + +static inline unsigned int get_csr_cpuid(void) +{ + return csr_readl(LOONGARCH_CSR_CPUID); +} + +static inline void csr_any_send(unsigned int addr, unsigned int data, + unsigned int data_mask, unsigned int cpu) +{ + uint64_t val = 0; + + val = IOCSR_ANY_SEND_BLOCKING | addr; + val |= (cpu << IOCSR_ANY_SEND_CPU_SHIFT); + val |= (data_mask << IOCSR_ANY_SEND_MASK_SHIFT); + val |= ((uint64_t)data << IOCSR_ANY_SEND_BUF_SHIFT); + __iocsrwr_d(val, LOONGARCH_IOCSR_ANY_SEND); +} + +static inline unsigned int read_csr_excode(void) +{ + return (csr_readl(LOONGARCH_CSR_ESTAT) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; +} + +static inline void write_csr_index(unsigned int idx) +{ + __csrxchg(idx, CSR_TLBIDX_IDXM, LOONGARCH_CSR_TLBIDX); +} + +static inline unsigned int read_csr_pagesize(void) +{ + return (__csrrd(LOONGARCH_CSR_TLBIDX) & CSR_TLBIDX_SIZEM) >> CSR_TLBIDX_SIZE; +} + +static inline void write_csr_pagesize(unsigned int size) +{ + __csrxchg(size << CSR_TLBIDX_SIZE, CSR_TLBIDX_SIZEM, LOONGARCH_CSR_TLBIDX); +} + +static inline unsigned int read_csr_tlbrefill_pagesize(void) +{ + return (__dcsrrd(LOONGARCH_CSR_TLBREHI) & CSR_TLBREHI_PS) >> CSR_TLBREHI_PS_SHIFT; +} + +static inline void write_csr_tlbrefill_pagesize(unsigned int size) +{ + __dcsrxchg(size << CSR_TLBREHI_PS_SHIFT, CSR_TLBREHI_PS, LOONGARCH_CSR_TLBREHI); +} + +#define read_csr_asid() __csrrd(LOONGARCH_CSR_ASID) +#define write_csr_asid(val) __csrwr(val, LOONGARCH_CSR_ASID) +#define read_csr_entryhi() __dcsrrd(LOONGARCH_CSR_TLBEHI) +#define write_csr_entryhi(val) __dcsrwr(val, LOONGARCH_CSR_TLBEHI) +#define read_csr_entrylo0() __dcsrrd(LOONGARCH_CSR_TLBELO0) +#define write_csr_entrylo0(val) __dcsrwr(val, LOONGARCH_CSR_TLBELO0) +#define read_csr_entrylo1() __dcsrrd(LOONGARCH_CSR_TLBELO1) +#define write_csr_entrylo1(val) __dcsrwr(val, LOONGARCH_CSR_TLBELO1) +#define read_csr_ecfg() __csrrd(LOONGARCH_CSR_ECFG) +#define write_csr_ecfg(val) __csrwr(val, LOONGARCH_CSR_ECFG) +#define read_csr_estat() __csrrd(LOONGARCH_CSR_ESTAT) +#define write_csr_estat(val) __csrwr(val, LOONGARCH_CSR_ESTAT) +#define read_csr_tlbidx() __csrrd(LOONGARCH_CSR_TLBIDX) +#define write_csr_tlbidx(val) __csrwr(val, LOONGARCH_CSR_TLBIDX) +#define read_csr_euen() __csrrd(LOONGARCH_CSR_EUEN) +#define write_csr_euen(val) __csrwr(val, LOONGARCH_CSR_EUEN) +#define read_csr_cpuid() __csrrd(LOONGARCH_CSR_CPUID) +#define read_csr_prcfg1() __dcsrrd(LOONGARCH_CSR_PRCFG1) +#define write_csr_prcfg1(val) __dcsrwr(val, LOONGARCH_CSR_PRCFG1) +#define read_csr_prcfg2() __dcsrrd(LOONGARCH_CSR_PRCFG2) +#define write_csr_prcfg2(val) __dcsrwr(val, LOONGARCH_CSR_PRCFG2) +#define read_csr_prcfg3() __dcsrrd(LOONGARCH_CSR_PRCFG3) +#define write_csr_prcfg3(val) __dcsrwr(val, LOONGARCH_CSR_PRCFG3) +#define read_csr_stlbpgsize() __dcsrrd(LOONGARCH_CSR_STLBPGSIZE) +#define write_csr_stlbpgsize(val) __dcsrwr(val, LOONGARCH_CSR_STLBPGSIZE) +#define read_csr_rvacfg() __dcsrrd(LOONGARCH_CSR_RVACFG) +#define write_csr_rvacfg(val) __dcsrwr(val, LOONGARCH_CSR_RVACFG) +#define write_csr_tintclear(val) __dcsrwr(val, LOONGARCH_CSR_TINTCLR) +#define read_csr_impctl1() __dcsrrd(LOONGARCH_CSR_IMPCTL1) +#define write_csr_impctl1(val) __dcsrwr(val, LOONGARCH_CSR_IMPCTL1) +#define write_csr_impctl2(val) __dcsrwr(val, LOONGARCH_CSR_IMPCTL2) + +#define read_csr_perfctrl0() __dcsrrd(LOONGARCH_CSR_PERFCTRL0) +#define read_csr_perfcntr0() __dcsrrd(LOONGARCH_CSR_PERFCNTR0) +#define read_csr_perfctrl1() __dcsrrd(LOONGARCH_CSR_PERFCTRL1) +#define read_csr_perfcntr1() __dcsrrd(LOONGARCH_CSR_PERFCNTR1) +#define read_csr_perfctrl2() __dcsrrd(LOONGARCH_CSR_PERFCTRL2) +#define read_csr_perfcntr2() __dcsrrd(LOONGARCH_CSR_PERFCNTR2) +#define read_csr_perfctrl3() __dcsrrd(LOONGARCH_CSR_PERFCTRL3) +#define read_csr_perfcntr3() __dcsrrd(LOONGARCH_CSR_PERFCNTR3) +#define write_csr_perfctrl0(val) __dcsrwr(val, LOONGARCH_CSR_PERFCTRL0) +#define write_csr_perfcntr0(val) __dcsrwr(val, LOONGARCH_CSR_PERFCNTR0) +#define write_csr_perfctrl1(val) __dcsrwr(val, LOONGARCH_CSR_PERFCTRL1) +#define write_csr_perfcntr1(val) __dcsrwr(val, LOONGARCH_CSR_PERFCNTR1) +#define write_csr_perfctrl2(val) __dcsrwr(val, LOONGARCH_CSR_PERFCTRL2) +#define write_csr_perfcntr2(val) __dcsrwr(val, LOONGARCH_CSR_PERFCNTR2) +#define write_csr_perfctrl3(val) __dcsrwr(val, LOONGARCH_CSR_PERFCTRL3) +#define write_csr_perfcntr3(val) __dcsrwr(val, LOONGARCH_CSR_PERFCNTR3) + +/* + * Manipulate bits in a register. + */ +#define __BUILD_CSR_COMMON(name) \ +static inline unsigned long \ +set_##name(unsigned long set) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res | set; \ + write_##name(new); \ + \ + return res; \ +} \ + \ +static inline unsigned long \ +clear_##name(unsigned long clear) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res & ~clear; \ + write_##name(new); \ + \ + return res; \ +} \ + \ +static inline unsigned long \ +change_##name(unsigned long change, unsigned long val) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res & ~change; \ + new |= (val & change); \ + write_##name(new); \ + \ + return res; \ +} + +#define __BUILD_CSR_OP(name) __BUILD_CSR_COMMON(csr_##name) + +__BUILD_CSR_OP(euen) +__BUILD_CSR_OP(ecfg) +__BUILD_CSR_OP(tlbidx) + +#define set_csr_estat(val) \ + __dcsrxchg(val, val, LOONGARCH_CSR_ESTAT) +#define clear_csr_estat(val) \ + __dcsrxchg(~(val), val, LOONGARCH_CSR_ESTAT) + +#endif /* __ASSEMBLY__ */ + +/* Generic EntryLo bit definitions */ +#define ENTRYLO_V (_ULCAST_(1) << 0) +#define ENTRYLO_D (_ULCAST_(1) << 1) +#define ENTRYLO_PLV_SHIFT 2 +#define ENTRYLO_PLV (_ULCAST_(3) << ENTRYLO_PLV_SHIFT) +#define ENTRYLO_C_SHIFT 4 +#define ENTRYLO_C (_ULCAST_(3) << ENTRYLO_C_SHIFT) +#define ENTRYLO_G (_ULCAST_(1) << 6) +#define ENTRYLO_NR (_ULCAST_(1) << 61) +#define ENTRYLO_NX (_ULCAST_(1) << 62) + +/* LoongArch GlobalNumber definitions */ +#define LOONGARCH_GLOBALNUMBER_VP_SHF 0 +#define LOONGARCH_GLOBALNUMBER_VP (_ULCAST_(0xff) << LOONGARCH_GLOBALNUMBER_VP_SHF) +#define LOONGARCH_GLOBALNUMBER_CORE_SHF 8 +#define LOONGARCH_GLOBALNUMBER_CORE (_ULCAST_(0xff) << LOONGARCH_GLOBALNUMBER_CORE_SHF) +#define LOONGARCH_GLOBALNUMBER_CLUSTER_SHF 16 +#define LOONGARCH_GLOBALNUMBER_CLUSTER (_ULCAST_(0xf) << LOONGARCH_GLOBALNUMBER_CLUSTER_SHF) + +/* Values for PageMask register */ +#define PM_4K 0x00000000 +#define PM_8K 0x00002000 +#define PM_16K 0x00006000 +#define PM_32K 0x0000e000 +#define PM_64K 0x0001e000 +#define PM_128K 0x0003e000 +#define PM_256K 0x0007e000 +#define PM_512K 0x000fe000 +#define PM_1M 0x001fe000 +#define PM_2M 0x003fe000 +#define PM_4M 0x007fe000 +#define PM_8M 0x00ffe000 +#define PM_16M 0x01ffe000 +#define PM_32M 0x03ffe000 +#define PM_64M 0x07ffe000 +#define PM_128M 0x0fffe000 +#define PM_256M 0x1fffe000 +#define PM_512M 0x3fffe000 +#define PM_1G 0x7fffe000 + +#define PS_4K 0x0000000c +#define PS_8K 0x0000000d +#define PS_16K 0x0000000e +#define PS_32K 0x0000000f +#define PS_64K 0x00000010 +#define PS_128K 0x00000011 +#define PS_256K 0x00000012 +#define PS_512K 0x00000013 +#define PS_1M 0x00000014 +#define PS_2M 0x00000015 +#define PS_4M 0x00000016 +#define PS_8M 0x00000017 +#define PS_16M 0x00000018 +#define PS_32M 0x00000019 +#define PS_64M 0x0000001a +#define PS_128M 0x0000001b +#define PS_256M 0x0000001c +#define PS_512M 0x0000001d +#define PS_1G 0x0000001e + +#define PS_MASK 0x3f000000 +#define PS_SHIFT 24 + +/* Default page size for a given kernel configuration */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PM_DEFAULT_MASK PM_4K +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PM_DEFAULT_MASK PM_16K +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PM_DEFAULT_MASK PM_64K +#else +#error Bad page size configuration! +#endif + +#ifdef CONFIG_PAGE_SIZE_4KB +#define PS_DEFAULT_SIZE PS_4K +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PS_DEFAULT_SIZE PS_16K +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PS_DEFAULT_SIZE PS_64K +#else +#error Bad page size configuration! +#endif + +/* Default huge tlb size for a given kernel configuration */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PM_HUGE_MASK PM_1M +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PM_HUGE_MASK PM_16M +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PM_HUGE_MASK PM_256M +#else +#error Bad page size configuration for hugetlbfs! +#endif + +#ifdef CONFIG_PAGE_SIZE_4KB +#define PS_HUGE_SIZE PS_1M +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PS_HUGE_SIZE PS_16M +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PS_HUGE_SIZE PS_256M +#else +#error Bad page size configuration for hugetlbfs! +#endif + +/* Values used for computation of new tlb entries */ +#define PL_4K 12 +#define PL_16K 14 +#define PL_64K 16 +#define PL_256K 18 +#define PL_1M 20 +#define PL_4M 22 +#define PL_16M 24 +#define PL_64M 26 +#define PL_256M 28 + +/* ExStatus.ExcCode */ +#define EXCCODE_RSV 0 /* Reserved */ +#define EXCCODE_TLBL 1 /* TLB miss on a load */ +#define EXCCODE_TLBS 2 /* TLB miss on a store */ +#define EXCCODE_TLBI 3 /* TLB miss on a ifetch */ +#define EXCCODE_TLBM 4 /* TLB modified fault */ +#define EXCCODE_TLBNR 5 /* TLB Read-Inhibit exception */ +#define EXCCODE_TLBNX 6 /* TLB Execution-Inhibit exception */ +#define EXCCODE_TLBPE 7 /* TLB Privilege Error */ +#define EXCCODE_ADE 8 /* Address Error */ + #define EXSUBCODE_ADEF 0 /* Fetch Instruction */ + #define EXSUBCODE_ADEM 1 /* Access Memory*/ +#define EXCCODE_ALE 9 /* Unalign Access */ +#define EXCCODE_OOB 10 /* Out of bounds */ +#define EXCCODE_SYS 11 /* System call */ +#define EXCCODE_BP 12 /* Breakpoint */ +#define EXCCODE_INE 13 /* Inst. Not Exist */ +#define EXCCODE_IPE 14 /* Inst. Privileged Error */ +#define EXCCODE_FPDIS 15 /* FPU Disabled */ +#define EXCCODE_LSXDIS 16 /* LSX Disabled */ +#define EXCCODE_LASXDIS 17 /* LASX Disabled */ +#define EXCCODE_FPE 18 /* Floating Point Exception */ + #define EXCSUBCODE_FPE 0 /* Floating Point Exception */ + #define EXCSUBCODE_VFPE 1 /* Vector Exception */ +#define EXCCODE_WATCH 19 /* Watch address reference */ +#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */ +#define EXCCODE_BTE 21 /* Binary Trans. Exception */ +#define EXCCODE_PSI 22 /* Guest Privileged Error */ +#define EXCCODE_HYP 23 /* Hypercall */ +#define EXCCODE_GCM 24 /* Guest CSR modified */ + #define EXCSUBCODE_GCSC 0 /* Software caused */ + #define EXCSUBCODE_GCHC 1 /* Hardware caused */ +#define EXCCODE_SE 25 /* Security */ + +#define EXCCODE_INT_START 64 +#define EXCCODE_SIP0 64 +#define EXCCODE_SIP1 65 +#define EXCCODE_IP0 66 +#define EXCCODE_IP1 67 +#define EXCCODE_IP2 68 +#define EXCCODE_IP3 69 +#define EXCCODE_IP4 70 +#define EXCCODE_IP5 71 +#define EXCCODE_IP6 72 +#define EXCCODE_IP7 73 +#define EXCCODE_PMC 74 /* Performance Counter */ +#define EXCCODE_TIMER 75 +#define EXCCODE_IPI 76 +#define EXCCODE_NMI 77 +#define EXCCODE_INT_END 78 +#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START) + +/* FPU register names */ +#define LOONGARCH_FCSR0 $r0 +#define LOONGARCH_FCSR1 $r1 +#define LOONGARCH_FCSR2 $r2 +#define LOONGARCH_FCSR3 $r3 + +/* FPU Status Register Values */ +#define FPU_CSR_RSVD 0xe0e0fce0 + +/* + * X the exception cause indicator + * E the exception enable + * S the sticky/flag bit + */ +#define FPU_CSR_ALL_X 0x1f000000 +#define FPU_CSR_INV_X 0x10000000 +#define FPU_CSR_DIV_X 0x08000000 +#define FPU_CSR_OVF_X 0x04000000 +#define FPU_CSR_UDF_X 0x02000000 +#define FPU_CSR_INE_X 0x01000000 + +#define FPU_CSR_ALL_S 0x001f0000 +#define FPU_CSR_INV_S 0x00100000 +#define FPU_CSR_DIV_S 0x00080000 +#define FPU_CSR_OVF_S 0x00040000 +#define FPU_CSR_UDF_S 0x00020000 +#define FPU_CSR_INE_S 0x00010000 + +#define FPU_CSR_ALL_E 0x0000001f +#define FPU_CSR_INV_E 0x00000010 +#define FPU_CSR_DIV_E 0x00000008 +#define FPU_CSR_OVF_E 0x00000004 +#define FPU_CSR_UDF_E 0x00000002 +#define FPU_CSR_INE_E 0x00000001 + +/* Bits 8 and 9 of FPU Status Register specify the rounding mode */ +#define FPU_CSR_RM 0x300 +#define FPU_CSR_RN 0x000 /* nearest */ +#define FPU_CSR_RZ 0x100 /* towards zero */ +#define FPU_CSR_RU 0x200 /* towards +Infinity */ +#define FPU_CSR_RD 0x300 /* towards -Infinity */ + +#define write_fcsr(dest, val) \ +do { \ + __asm__ __volatile__( \ + " movgr2fcsr %0, "STR(dest)" \n" \ + : : "r" (val)); \ +} while (0) + +#define read_fcsr(source) \ +({ \ + unsigned int __res; \ +\ + __asm__ __volatile__( \ + " movfcsr2gr %0, "STR(source)" \n" \ + : "=r" (__res)); \ + __res; \ +}) + +#endif /* _ASM_LOONGARCHREGS_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/boot_param.h b/arch/loongarch/include/asm/mach-loongson64/boot_param.h new file mode 100644 index 000000000000..1169c628cdf3 --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/boot_param.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_LOONGSON64_BOOT_PARAM_H_ +#define __ASM_MACH_LOONGSON64_BOOT_PARAM_H_ + +#ifdef CONFIG_VT +#include +#endif + +#define ADDRESS_TYPE_SYSRAM 1 +#define ADDRESS_TYPE_RESERVED 2 +#define ADDRESS_TYPE_ACPI 3 +#define ADDRESS_TYPE_NVS 4 +#define ADDRESS_TYPE_PMEM 5 + +#define EFI_RUNTIME_MAP_START 100 +#define LOONGSON3_BOOT_MEM_MAP_MAX 128 + +#define LOONGSON_EFIBOOT_SIGNATURE "BPI" +#define LOONGSON_MEM_SIGNATURE "MEM" +#define LOONGSON_VBIOS_SIGNATURE "VBIOS" +#define LOONGSON_SCREENINFO_SIGNATURE "SINFO" + +/* Values for Version BPI */ +enum bpi_version { + BPI_VERSION_V1 = 1000, /* Signature="BPI01000" */ + BPI_VERSION_V2 = 1001, /* Signature="BPI01001" */ + BPI_VERSION_V3 = 1002, /* Signature="BPI01002" */ +}; + +/* Flags in bootparamsinterface */ +#define BPI_FLAGS_UEFI_SUPPORTED BIT(0) + +struct _extention_list_hdr { + u64 signature; + u32 length; + u8 revision; + u8 checksum; + u64 next_offset; +} __packed; + +struct boot_params { + u64 signature; /* {"B", "P", "I", "0", "1", ... } */ + void *systemtable; + u64 extlist_offset; + u64 flags; +} __packed; + +struct loongsonlist_mem_map_legacy { + struct _extention_list_hdr header; /* {"M", "E", "M"} */ + u8 map_count; + struct loongson_mem_map { + u32 mem_type; + u64 mem_start; + u64 mem_size; + } __packed map[LOONGSON3_BOOT_MEM_MAP_MAX]; +} __packed; + +struct loongsonlist_mem_map { + struct _extention_list_hdr header; /* {"M", "E", "M"} */ + u8 map_count; + u32 desc_version; + struct efi_mmap { + u32 mem_type; + u32 padding; + u64 mem_start; + u64 mem_vaddr; + u64 mem_size; + u64 attribute; + } __packed map[LOONGSON3_BOOT_MEM_MAP_MAX]; +} __packed; + +struct loongsonlist_vbios { + struct _extention_list_hdr header; /* {"V", "B", "I", "O", "S"} */ + u64 vbios_addr; +} __packed; + +struct loongsonlist_screeninfo{ + struct _extention_list_hdr header; /* {"S", "I", "N", "F", "O"} */ + struct screen_info si; +} __packed; + +struct loongson_board_info { + int bios_size; + char *bios_vendor; + char *bios_version; + char *bios_release_date; + char *board_name; + char *board_vendor; +}; + +struct loongson_system_configuration { + int bpi_ver; + int nr_cpus; + int nr_nodes; + int nr_io_pics; + int boot_cpu_id; + int cores_per_node; + int cores_per_package; + char *cpuname; + u64 vgabios_addr; +}; + +extern struct boot_params *efi_bp; +extern struct loongson_board_info b_info; +extern struct loongsonlist_mem_map *loongson_mem_map; +extern struct loongson_system_configuration loongson_sysconf; +#endif diff --git a/arch/loongarch/include/asm/mach-loongson64/irq.h b/arch/loongarch/include/asm/mach-loongson64/irq.h new file mode 100644 index 000000000000..78c1921f523b --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/irq.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_LOONGSON64_IRQ_H_ +#define __ASM_MACH_LOONGSON64_IRQ_H_ + +#define MAX_IO_PICS 2 +#define NR_IRQS (64 + (256 * MAX_IO_PICS)) + +#define CORES_PER_EIO_NODE 4 + +#define LOONGSON_CPU_UART0_VEC 10 /* CPU UART0 */ +#define LOONGSON_CPU_THSENS_VEC 14 /* CPU Thsens */ +#define LOONGSON_CPU_HT0_VEC 16 /* CPU HT0 irq vector base number */ +#define LOONGSON_CPU_HT1_VEC 24 /* CPU HT1 irq vector base number */ + +/* IRQ number definitions */ +#define LOONGSON_LPC_IRQ_BASE 0 +#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15) + +#define LOONGSON_CPU_IRQ_BASE 16 +#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14) + +#define LOONGSON_PCH_IRQ_BASE 64 +#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47) +#define LOONGSON_PCH_LAST_IRQ (LOONGSON_PCH_IRQ_BASE + 64 - 1) + +#define LOONGSON_MSI_IRQ_BASE (LOONGSON_PCH_IRQ_BASE + 64) +#define LOONGSON_MSI_LAST_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) + +#define GSI_MIN_LPC_IRQ LOONGSON_LPC_IRQ_BASE +#define GSI_MAX_LPC_IRQ (LOONGSON_LPC_IRQ_BASE + 16 - 1) +#define GSI_MIN_CPU_IRQ LOONGSON_CPU_IRQ_BASE +#define GSI_MAX_CPU_IRQ (LOONGSON_CPU_IRQ_BASE + 48 - 1) +#define GSI_MIN_PCH_IRQ LOONGSON_PCH_IRQ_BASE +#define GSI_MAX_PCH_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) + +extern int find_pch_pic(u32 gsi); +extern int eiointc_get_node(int id); + +static inline void eiointc_enable(void) +{ + uint64_t misc; + + misc = iocsr_readq(LOONGARCH_IOCSR_MISC_FUNC); + misc |= IOCSR_MISC_FUNC_EXT_IOI_EN; + iocsr_writeq(misc, LOONGARCH_IOCSR_MISC_FUNC); +} + +struct acpi_madt_lio_pic; +struct acpi_madt_eio_pic; +struct acpi_madt_ht_pic; +struct acpi_madt_bio_pic; +struct acpi_madt_msi_pic; +struct acpi_madt_lpc_pic; + +struct irq_domain *liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc); +struct irq_domain *eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc); + +struct irq_domain *htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec); +struct irq_domain *pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc); +struct irq_domain *pch_msi_acpi_init(struct irq_domain *parent, + struct acpi_madt_msi_pic *acpi_pchmsi); +struct irq_domain *pch_pic_acpi_init(struct irq_domain *parent, + struct acpi_madt_bio_pic *acpi_pchpic); + +extern struct acpi_madt_lio_pic *acpi_liointc; +extern struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS]; + +extern struct acpi_madt_ht_pic *acpi_htintc; +extern struct acpi_madt_lpc_pic *acpi_pchlpc; +extern struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS]; +extern struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS]; + +extern struct irq_domain *cpu_domain; +extern struct irq_domain *liointc_domain; +extern struct irq_domain *pch_lpc_domain; +extern struct irq_domain *pch_msi_domain[MAX_IO_PICS]; +extern struct irq_domain *pch_pic_domain[MAX_IO_PICS]; + +#endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */ diff --git a/arch/loongarch/include/asm/mach-loongson64/loongson.h b/arch/loongarch/include/asm/mach-loongson64/loongson.h new file mode 100644 index 000000000000..5e6fc05a5c48 --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/loongson.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_MACH_LOONGSON64_LOONGSON_H +#define __ASM_MACH_LOONGSON64_LOONGSON_H + +#include +#include +#include +#include +#include + +#include + +extern const struct plat_smp_ops loongson3_smp_ops; + +/* loongson-specific command line, env and memory initialization */ +extern void __init fw_init_environ(void); +extern void __init fw_init_memory(void); +extern void __init fw_init_numa_memory(void); + +#define LOONGSON_REG(x) \ + (*(volatile u32 *)((char *)TO_UNCAC(LOONGSON_REG_BASE) + (x))) + +#define LOONGSON_LIO_BASE 0x18000000 +#define LOONGSON_LIO_SIZE 0x00100000 /* 1M */ +#define LOONGSON_LIO_TOP (LOONGSON_LIO_BASE+LOONGSON_LIO_SIZE-1) + +#define LOONGSON_BOOT_BASE 0x1c000000 +#define LOONGSON_BOOT_SIZE 0x02000000 /* 32M */ +#define LOONGSON_BOOT_TOP (LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1) + +#define LOONGSON_REG_BASE 0x1fe00000 +#define LOONGSON_REG_SIZE 0x00100000 /* 1M */ +#define LOONGSON_REG_TOP (LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1) + +/* GPIO Regs - r/w */ + +#define LOONGSON_GPIODATA LOONGSON_REG(0x11c) +#define LOONGSON_GPIOIE LOONGSON_REG(0x120) +#define LOONGSON_REG_GPIO_BASE (LOONGSON_REG_BASE + 0x11c) + +#define MAX_PACKAGES 16 + +/* Chip Config registor of each physical cpu package */ +extern u64 loongson_chipcfg[MAX_PACKAGES]; +#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id])) + +/* Chip Temperature registor of each physical cpu package */ +extern u64 loongson_chiptemp[MAX_PACKAGES]; +#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id])) + +/* Freq Control register of each physical cpu package */ +extern u64 loongson_freqctrl[MAX_PACKAGES]; +#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id])) + +#define xconf_readl(addr) readl(addr) +#define xconf_readq(addr) readq(addr) + +static inline void xconf_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile ( + " st.w %[v], %[hw], 0 \n" + " ld.b $r0, %[hw], 0 \n" + : + : [hw] "r" (addr), [v] "r" (val) + ); +} + +static inline void xconf_writeq(u64 val64, volatile void __iomem *addr) +{ + asm volatile ( + " st.d %[v], %[hw], 0 \n" + " ld.b $r0, %[hw], 0 \n" + : + : [hw] "r" (addr), [v] "r" (val64) + ); +} + +/* ============== LS7A registers =============== */ +#define LS7A_PCH_REG_BASE 0x10000000UL +/* LPC regs */ +#define LS7A_LPC_REG_BASE (LS7A_PCH_REG_BASE + 0x00002000) +/* CHIPCFG regs */ +#define LS7A_CHIPCFG_REG_BASE (LS7A_PCH_REG_BASE + 0x00010000) +/* MISC reg base */ +#define LS7A_MISC_REG_BASE (LS7A_PCH_REG_BASE + 0x00080000) +/* ACPI regs */ +#define LS7A_ACPI_REG_BASE (LS7A_MISC_REG_BASE + 0x00050000) +/* RTC regs */ +#define LS7A_RTC_REG_BASE (LS7A_MISC_REG_BASE + 0x00050100) + +#define LS7A_DMA_CFG (volatile void *)TO_UNCAC(LS7A_CHIPCFG_REG_BASE + 0x041c) +#define LS7A_DMA_NODE_SHF 8 +#define LS7A_DMA_NODE_MASK 0x1F00 + +#define LS7A_INT_MASK_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x020) +#define LS7A_INT_EDGE_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x060) +#define LS7A_INT_CLEAR_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x080) +#define LS7A_INT_HTMSI_EN_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x040) +#define LS7A_INT_ROUTE_ENTRY_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x100) +#define LS7A_INT_HTMSI_VEC_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x200) +#define LS7A_INT_STATUS_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x3a0) +#define LS7A_INT_POL_REG (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x3e0) +#define LS7A_LPC_INT_CTL (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x2000) +#define LS7A_LPC_INT_ENA (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x2004) +#define LS7A_LPC_INT_STS (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x2008) +#define LS7A_LPC_INT_CLR (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x200c) +#define LS7A_LPC_INT_POL (volatile void *)TO_UNCAC(LS7A_PCH_REG_BASE + 0x2010) + +#define LS7A_PMCON_SOC_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x000) +#define LS7A_PMCON_RESUME_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x004) +#define LS7A_PMCON_RTC_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x008) +#define LS7A_PM1_EVT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x00c) +#define LS7A_PM1_ENA_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x010) +#define LS7A_PM1_CNT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x014) +#define LS7A_PM1_TMR_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x018) +#define LS7A_P_CNT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x01c) +#define LS7A_GPE0_STS_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x028) +#define LS7A_GPE0_ENA_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x02c) +#define LS7A_RST_CNT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x030) +#define LS7A_WD_SET_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x034) +#define LS7A_WD_TIMER_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x038) +#define LS7A_THSENS_CNT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x04c) +#define LS7A_GEN_RTC_1_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x050) +#define LS7A_GEN_RTC_2_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x054) +#define LS7A_DPM_CFG_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x400) +#define LS7A_DPM_STS_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x404) +#define LS7A_DPM_CNT_REG (volatile void *)TO_UNCAC(LS7A_ACPI_REG_BASE + 0x408) + +typedef enum { + ACPI_PCI_HOTPLUG_STATUS = 1 << 1, + ACPI_CPU_HOTPLUG_STATUS = 1 << 2, + ACPI_MEM_HOTPLUG_STATUS = 1 << 3, + ACPI_POWERBUTTON_STATUS = 1 << 8, + ACPI_RTC_WAKE_STATUS = 1 << 10, + ACPI_PCI_WAKE_STATUS = 1 << 14, + ACPI_ANY_WAKE_STATUS = 1 << 15, +} AcpiEventStatusBits; + +#define HT1LO_OFFSET 0xe0000000000UL + +/* PCI Configration Space Base */ +#define MCFG_EXT_PCICFG_BASE 0xefe00000000UL + +/* REG ACCESS*/ +#define ls7a_readb(addr) (*(volatile unsigned char *)TO_UNCAC(addr)) +#define ls7a_readw(addr) (*(volatile unsigned short *)TO_UNCAC(addr)) +#define ls7a_readl(addr) (*(volatile unsigned int *)TO_UNCAC(addr)) +#define ls7a_readq(addr) (*(volatile unsigned long *)TO_UNCAC(addr)) +#define ls7a_writeb(val, addr) *(volatile unsigned char *)TO_UNCAC(addr) = (val) +#define ls7a_writew(val, addr) *(volatile unsigned short *)TO_UNCAC(addr) = (val) +#define ls7a_writel(val, addr) ls7a_write_type(val, addr, uint32_t) +#define ls7a_writeq(val, addr) ls7a_write_type(val, addr, uint64_t) +#define ls7a_write(val, addr) ls7a_write_type(val, addr, uint64_t) + +#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/mmu.h b/arch/loongarch/include/asm/mmu.h new file mode 100644 index 000000000000..c2753acaf0ea --- /dev/null +++ b/arch/loongarch/include/asm/mmu.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_MMU_H +#define __ASM_MMU_H + +#include +#include + +typedef struct { + u64 asid[NR_CPUS]; + void *vdso; +} mm_context_t; + +#endif /* __ASM_MMU_H */ diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h new file mode 100644 index 000000000000..2bb0fc98b8fd --- /dev/null +++ b/arch/loongarch/include/asm/mmu_context.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Switch a MMU context. + */ +#ifndef _ASM_MMU_CONTEXT_H +#define _ASM_MMU_CONTEXT_H + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * All unused by hardware upper bits will be considered + * as a software asid extension. + */ +static inline u64 asid_version_mask(unsigned int cpu) +{ + return ~(u64)(cpu_asid_mask(&cpu_data[cpu])); +} + +static inline u64 asid_first_version(unsigned int cpu) +{ + return cpu_asid_mask(&cpu_data[cpu]) + 1; +} + +#define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) +#define asid_cache(cpu) (cpu_data[cpu].asid_cache) +#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & cpu_asid_mask(&cpu_data[cpu])) + +static inline int asid_valid(struct mm_struct *mm, unsigned int cpu) +{ + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & asid_version_mask(cpu)) + return 0; + + return 1; +} + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + +/* Normal, classic get_new_mmu_context */ +static inline void +get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) +{ + u64 asid = asid_cache(cpu); + + if (!((++asid) & cpu_asid_mask(&cpu_data[cpu]))) + local_flush_tlb_user(); /* start new asid cycle */ + + cpu_context(cpu, mm) = asid_cache(cpu) = asid; +} + +/* + * Initialize the context related info for a new mm_struct + * instance. + */ +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int i; + + for_each_possible_cpu(i) + cpu_context(i, mm) = 0; + + return 0; +} + +static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned int cpu = smp_processor_id(); + + /* Check if our ASID is of an older version and thus invalid */ + if (!asid_valid(next, cpu)) + get_new_mmu_context(next, cpu); + + write_csr_asid(cpu_asid(cpu, next)); + + if (next != &init_mm) + csr_writeq((unsigned long)next->pgd, LOONGARCH_CSR_PGDL); + else + csr_writeq((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + + /* + * Mark current->active_mm as not "active" anymore. + * We don't want to mislead possible IPI tlb flush routines. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); +} + +#define switch_mm_irqs_off switch_mm_irqs_off + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} + +/* + * Destroy context related info for an mm_struct that is about + * to be put to rest. + */ +static inline void destroy_context(struct mm_struct *mm) +{ +} + +#define activate_mm(prev, next) switch_mm(prev, next, current) +#define deactivate_mm(task, mm) do { } while (0) + +/* + * If mm is currently active, we can't really drop it. + * Instead, we will get a new one for it. + */ +static inline void +drop_mmu_context(struct mm_struct *mm, unsigned cpu) +{ + int asid; + unsigned long flags; + + local_irq_save(flags); + + asid = read_csr_asid() & cpu_asid_mask(¤t_cpu_data); + + if (asid == cpu_asid(cpu, mm)) { + if (!current->mm || (current->mm == mm)) { + get_new_mmu_context(mm, cpu); + write_csr_asid(cpu_asid(cpu, mm)); + goto out; + } + } + + /* Will get a new context next time */ + cpu_context(cpu, mm) = 0; + cpumask_clear_cpu(cpu, mm_cpumask(mm)); +out: + local_irq_restore(flags); +} + +#endif /* _ASM_MMU_CONTEXT_H */ diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h new file mode 100644 index 000000000000..6e6df997ed2b --- /dev/null +++ b/arch/loongarch/include/asm/module.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_MODULE_H +#define _ASM_MODULE_H + +#include + +#define RELA_STACK_DEPTH 16 + +struct mod_arch_specific { +}; + +#endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h new file mode 100644 index 000000000000..cf0265b08a33 --- /dev/null +++ b/arch/loongarch/include/asm/page.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PAGE_H +#define _ASM_PAGE_H + +#include + +/* + * PAGE_SHIFT determines the page size + */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PAGE_SHIFT 12 +#endif +#ifdef CONFIG_PAGE_SIZE_16KB +#define PAGE_SHIFT 14 +#endif +#ifdef CONFIG_PAGE_SIZE_64KB +#define PAGE_SHIFT 16 +#endif +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define HPAGE_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * It's normally defined only for FLATMEM config but it's + * used in our early mem init code for all memory models. + * So always define it. + */ +#define ARCH_PFN_OFFSET PFN_UP(PHYS_OFFSET) + +extern void clear_page(void *page); +extern void copy_page(void *to, void *from); + +extern unsigned long shm_align_mask; + +struct page; + +static inline void clear_user_page(void *addr, unsigned long vaddr, + struct page *page) +{ + clear_page(addr); +} + +struct vm_area_struct; +extern void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); + +#define __HAVE_ARCH_COPY_USER_HIGHPAGE + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { unsigned long pte; } pte_t; +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) }) +typedef struct page *pgtable_t; + +/* + * Finall the top of the hierarchy, the pgd + */ +typedef struct { unsigned long pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) +#define __pgd(x) ((pgd_t) { (x) }) + +/* + * Manipulate page protection bits + */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) +#define pte_pgprot(x) __pgprot(pte_val(x) & ~_PFN_MASK) + +#define ptep_buddy(x) ((pte_t *)((unsigned long)(x) ^ sizeof(pte_t))) + +/* + * __pa()/__va() should be used only during mem init. + */ +#define __pa(x) PHYSADDR(x) +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) + +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#ifdef CONFIG_FLATMEM + +static inline int pfn_valid(unsigned long pfn) +{ + /* avoid include hell */ + extern unsigned long max_mapnr; + unsigned long pfn_offset = ARCH_PFN_OFFSET; + + return pfn >= pfn_offset && pfn < max_mapnr; +} + +#endif + +#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) + +extern int __virt_addr_valid(volatile void *kaddr); +#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) + +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | \ + ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +extern unsigned long __kaslr_offset; +static inline unsigned long kaslr_offset(void) +{ + return __kaslr_offset; +} + +#include +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pci.h b/arch/loongarch/include/asm/pci.h new file mode 100644 index 000000000000..6698c01b54f5 --- /dev/null +++ b/arch/loongarch/include/asm/pci.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_PCI_H +#define _ASM_PCI_H + +#include + +/* + * This file essentially defines the interface between board specific + * PCI code and LoongArch common PCI code. Should potentially put into + * include/asm/pci.h file. + */ + +#include +#include + +extern phys_addr_t mcfg_addr_init(int node); + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ +static inline unsigned int pcibios_assign_all_busses(void) +{ + return 0; +} + +#define PCIBIOS_MIN_IO 0x4000 +#define PCIBIOS_MIN_MEM 0x20000000 +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +#define HAVE_PCI_MMAP +#define ARCH_GENERIC_PCI_MMAP_RESOURCE + +/* + * Dynamic DMA mapping stuff. + * LoongArch has everything mapped statically. + */ + +#include +#include +#include +#include +#include + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 1; /* always show the domain in /proc */ +} + +/* generic pci stuff */ +#include + +#endif /* _ASM_PCI_H */ diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h new file mode 100644 index 000000000000..06dbdd47145c --- /dev/null +++ b/arch/loongarch/include/asm/percpu.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Ltd. + */ +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +/* Use r21 for fast access */ +register unsigned long __my_cpu_offset __asm__("$r21"); + +static inline void set_my_cpu_offset(unsigned long off) +{ + __my_cpu_offset = off; + csr_writeq(off, PERCPU_BASE_KS); +} +#define __my_cpu_offset __my_cpu_offset + +#include + +#endif /* __ASM_PERCPU_H */ diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h new file mode 100644 index 000000000000..0b57817eb6dc --- /dev/null +++ b/arch/loongarch/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/arch/loongarch/include/asm/perf_event.h + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LOONGARCH_PERF_EVENT_H__ +#define __LOONGARCH_PERF_EVENT_H__ +/* Leave it empty here. The file is required by linux/perf_event.h */ +#endif /* __LOONGARCH_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h new file mode 100644 index 000000000000..93e7a7ff1631 --- /dev/null +++ b/arch/loongarch/include/asm/pgalloc.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGALLOC_H +#define _ASM_PGALLOC_H + +#include +#include +#include + +#define __HAVE_ARCH_PMD_ALLOC_ONE +#define __HAVE_ARCH_PUD_ALLOC_ONE +#include + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, + pte_t *pte) +{ + set_pmd(pmd, __pmd((unsigned long)pte)); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + pgtable_t pte) +{ + set_pmd(pmd, __pmd((unsigned long)page_address(pte))); +} +#define pmd_pgtable(pmd) pmd_page(pmd) + +/* + * Initialize a new pmd table with invalid pointers. + */ +extern void pmd_init(unsigned long page, unsigned long pagetable); + +#ifndef __PAGETABLE_PMD_FOLDED + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + set_pud(pud, __pud((unsigned long)pmd)); +} +#endif + +/* + * Initialize a new pgd / pmd table with invalid pointers. + */ +extern void pgd_init(unsigned long page); +extern pgd_t *pgd_alloc(struct mm_struct *mm); + +#define __pte_free_tlb(tlb, pte, address) \ +do { \ + pgtable_pte_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) + +#ifndef __PAGETABLE_PMD_FOLDED + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd; + struct page *pg; + + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; + } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + return pmd; +} + +#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x) + +#endif + +#ifndef __PAGETABLE_PUD_FOLDED + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pud_t *pud; + + pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); + if (pud) + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + return pud; +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + free_pages((unsigned long)pud, PUD_ORDER); +} + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + set_p4d(p4d, __p4d((unsigned long)pud)); +} + +#define __pud_free_tlb(tlb, x, addr) pud_free((tlb)->mm, x) + +#endif /* __PAGETABLE_PUD_FOLDED */ + +extern void pagetable_init(void); + +#endif /* _ASM_PGALLOC_H */ diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h new file mode 100644 index 000000000000..b6cf99d47833 --- /dev/null +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_64_H +#define _ASM_PGTABLE_64_H + +#include +#include + +#include + +#if CONFIG_PGTABLE_LEVELS == 2 +#include +#elif CONFIG_PGTABLE_LEVELS == 3 +#include +#else +#include +#endif + +#define PGD_ORDER 0 +#define PUD_ORDER 0 +#define PMD_ORDER 0 +#define PTE_ORDER 0 + +#if CONFIG_PGTABLE_LEVELS == 2 +#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#elif CONFIG_PGTABLE_LEVELS == 3 +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3)) +#elif CONFIG_PGTABLE_LEVELS == 4 +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PGDIR_SHIFT (PUD_SHIFT + (PAGE_SHIFT + PUD_ORDER - 3)) +#endif + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define VA_BITS (PGDIR_SHIFT + (PAGE_SHIFT + PGD_ORDER - 3)) + +#define PTRS_PER_PGD ((PAGE_SIZE << PGD_ORDER) >> 3) +#if CONFIG_PGTABLE_LEVELS > 3 +#define PTRS_PER_PUD ((PAGE_SIZE << PUD_ORDER) >> 3) +#endif +#if CONFIG_PGTABLE_LEVELS > 2 +#define PTRS_PER_PMD ((PAGE_SIZE << PMD_ORDER) >> 3) +#endif +#define PTRS_PER_PTE ((PAGE_SIZE << PTE_ORDER) >> 3) + +#define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) +#define FIRST_USER_ADDRESS 0UL + +#ifndef __ASSEMBLY__ + +#include + +/* + * TLB refill handlers may also map the vmalloc area into xkvrange. + * Avoid the first couple of pages so NULL pointer dereferences will + * still reliably trap. + */ +#define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) +#define MODULES_END (MODULES_VADDR + SZ_256M) + +#define VMALLOC_START MODULES_END +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#endif +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#endif +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) + +extern pte_t invalid_pte_table[PTRS_PER_PTE]; + +#ifndef __PAGETABLE_PUD_FOLDED +/* + * For 4-level pagetables we defines these ourselves, for 3-level the + * definitions are below, for 2-level the + * definitions are supplied by . + */ +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) + +extern pud_t invalid_pud_table[PTRS_PER_PUD]; + +/* + * Empty pgd entries point to the invalid_pud_table. + */ +static inline int p4d_none(p4d_t p4d) +{ + return p4d_val(p4d) == (unsigned long)invalid_pud_table; +} + +static inline int p4d_bad(p4d_t p4d) +{ + if (unlikely(p4d_val(p4d) & ~PAGE_MASK)) + return 1; + + return 0; +} + +static inline int p4d_present(p4d_t p4d) +{ + return p4d_val(p4d) != (unsigned long)invalid_pud_table; +} + +static inline void p4d_clear(pgd_t *p4dp) +{ + p4d_val(*p4dp) = (unsigned long)invalid_pud_table; +} + +static inline unsigned long p4d_page_vaddr(p4d_t p4d) +{ + return p4d_val(p4d); +} + +static inline void set_p4d(pgd_t *p4d, pgd_t p4dval) +{ + *p4d = p4dval; +} + +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +/* + * For 3-level pagetables we defines these ourselves, for 2-level the + * definitions are supplied by . + */ +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) }) + +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)pud_val(pud); +} +#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) + +extern pmd_t invalid_pmd_table[PTRS_PER_PMD]; +#endif + +/* + * Empty pgd/pmd entries point to the invalid_pte_table. + */ +static inline int pmd_none(pmd_t pmd) +{ + return pmd_val(pmd) == (unsigned long) invalid_pte_table; +} + +static inline int pmd_bad(pmd_t pmd) +{ + /* pmd_huge(pmd) but inline */ + if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) + return 0; + + if (unlikely(pmd_val(pmd) & ~PAGE_MASK)) + return 1; + + return 0; +} + +static inline int pmd_present(pmd_t pmd) +{ + if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + + return pmd_val(pmd) != (unsigned long) invalid_pte_table; +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + pmd_val(*pmdp) = ((unsigned long) invalid_pte_table); +} +#ifndef __PAGETABLE_PMD_FOLDED + +/* + * Empty pud entries point to the invalid_pmd_table. + */ +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == (unsigned long) invalid_pmd_table; +} + +static inline int pud_bad(pud_t pud) +{ + return pud_val(pud) & ~PAGE_MASK; +} + +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) != (unsigned long) invalid_pmd_table; +} + +static inline void pud_clear(pud_t *pudp) +{ + pud_val(*pudp) = ((unsigned long) invalid_pmd_table); +} +#endif + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pte_pfn(x) ((unsigned long)(((x).pte & _PFN_MASK) >> _PFN_SHIFT)) +#define pfn_pte(pfn, prot) __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) + +#define __pgd_offset(address) pgd_index(address) +#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define __pmd_offset(address) pmd_index(address) + +#ifndef __PAGETABLE_PMD_FOLDED +static inline unsigned long pud_page_vaddr(pud_t pud) +{ + return pud_val(pud); +} +#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) + +#endif + +/* + * Initialize a new pgd / pmd table with invalid pointers. + */ +extern void pgd_init(unsigned long page); +extern void pud_init(unsigned long page, unsigned long pagetable); +extern void pmd_init(unsigned long page, unsigned long pagetable); + +/* + * Non-present pages: high 40 bits are offset, next 8 bits type, + * low 16 bits zero. + */ +static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) +{ pte_t pte; pte_val(pte) = (type << 16) | (offset << 24); return pte; } + +#define __swp_type(x) (((x).val >> 16) & 0xff) +#define __swp_offset(x) ((x).val >> 24) +#define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type), (offset))) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PGTABLE_64_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h new file mode 100644 index 000000000000..ea4d0dc7ff70 --- /dev/null +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_BITS_H +#define _ASM_PGTABLE_BITS_H + +/* Page table bits */ + +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_ACCESSED_SHIFT 0 /* Reuse Valid for Accessed */ +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_GLOBAL_SHIFT 6 +#define _PAGE_HUGE_SHIFT 6 /* HUGE is a PMD bit */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 +#define _PAGE_MODIFIED_SHIFT 9 +#define _PAGE_PROTNONE_SHIFT 10 +#define _PAGE_SPECIAL_SHIFT 11 +#define _PAGE_HGLOBAL_SHIFT 12 /* HGlobal is a PMD bit */ +#define _PAGE_PFN_SHIFT 12 +#define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_NO_READ_SHIFT 61 +#define _PAGE_NO_EXEC_SHIFT 62 +#define _PAGE_RPLV_SHIFT 63 + +/* Used only by software */ +#define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) +#define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) +#define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) +#define _PAGE_PROTNONE (_ULCAST_(1) << _PAGE_PROTNONE_SHIFT) +#define _PAGE_SPECIAL (_ULCAST_(1) << _PAGE_SPECIAL_SHIFT) + +/* Used by TLB hardware (placed in EntryLo*) */ +#define _PAGE_VALID (_ULCAST_(1) << _PAGE_VALID_SHIFT) +#define _PAGE_DIRTY (_ULCAST_(1) << _PAGE_DIRTY_SHIFT) +#define _PAGE_PLV (_ULCAST_(3) << _PAGE_PLV_SHIFT) +#define _PAGE_GLOBAL (_ULCAST_(1) << _PAGE_GLOBAL_SHIFT) +#define _PAGE_HUGE (_ULCAST_(1) << _PAGE_HUGE_SHIFT) +#define _PAGE_HGLOBAL (_ULCAST_(1) << _PAGE_HGLOBAL_SHIFT) +#define _PAGE_NO_READ (_ULCAST_(1) << _PAGE_NO_READ_SHIFT) +#define _PAGE_NO_EXEC (_ULCAST_(1) << _PAGE_NO_EXEC_SHIFT) +#define _PAGE_RPLV (_ULCAST_(1) << _PAGE_RPLV_SHIFT) +#define _CACHE_MASK (_ULCAST_(3) << _CACHE_SHIFT) +#define _PFN_SHIFT (PAGE_SHIFT - 12 + _PAGE_PFN_SHIFT) + +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define _PAGE_KERN (PLV_KERN << _PAGE_PLV_SHIFT) + +#define _PFN_MASK (~((_ULCAST_(1) << (_PFN_SHIFT)) - 1) & \ + ((_ULCAST_(1) << (_PAGE_PFN_END_SHIFT)) - 1)) + +/* + * Cache attributes + */ + +#ifndef _CACHE_SUC +#define _CACHE_SUC (0<<_CACHE_SHIFT) /* Strong-ordered UnCached */ +#endif +#ifndef _CACHE_CC +#define _CACHE_CC (1<<_CACHE_SHIFT) /* Coherent Cached */ +#endif +#ifndef _CACHE_WUC +#define _CACHE_WUC (2<<_CACHE_SHIFT) /* Weak-ordered UnCached */ +#endif + +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) + +#define _PAGE_CHG_MASK (_PAGE_MODIFIED | _PAGE_SPECIAL | _PFN_MASK | _CACHE_MASK | _PAGE_PLV) +#define _HPAGE_CHG_MASK (_PAGE_MODIFIED | _PAGE_SPECIAL | _PFN_MASK | _CACHE_MASK | _PAGE_PLV | _PAGE_HUGE) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_NO_READ | \ + _PAGE_USER | _CACHE_CC) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ + _PAGE_USER | _CACHE_CC) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _CACHE_CC) + +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_CC) +#define PAGE_KERNEL_SUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_SUC) +#define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) + +#define __P000 __pgprot(_CACHE_CC | _PAGE_USER | _PAGE_PROTNONE | _PAGE_NO_EXEC | _PAGE_NO_READ) +#define __P001 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P010 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P011 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P100 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P101 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P110 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P111 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) + +#define __S000 __pgprot(_CACHE_CC | _PAGE_USER | _PAGE_PROTNONE | _PAGE_NO_EXEC | _PAGE_NO_READ) +#define __S001 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __S010 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE) +#define __S011 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE) +#define __S100 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __S101 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __S110 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_WRITE) +#define __S111 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_WRITE) + +#ifndef __ASSEMBLY__ + +/* + * Macro to make mark a page protection value as "uncacheable". Note + * that "protection" is really a misnomer here as the protection value + * contains the memory attribute bits, dirty bits, and various other + * bits as well. + */ +#define pgprot_noncached pgprot_noncached + +static inline pgprot_t pgprot_noncached(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot = (prot & ~_CACHE_MASK) | _CACHE_SUC; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine + +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot = (prot & ~_CACHE_MASK) | _CACHE_WUC; + + return __pgprot(prot); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h new file mode 100644 index 000000000000..f2b8341ec03e --- /dev/null +++ b/arch/loongarch/include/asm/pgtable.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_H +#define _ASM_PGTABLE_H + +#include +#include +#include +#include +#include + +struct mm_struct; +struct vm_area_struct; + +/* + * ZERO_PAGE is a global shared page that is always zero; used + * for zero-mapped memory areas etc.. + */ + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) +#define __HAVE_COLOR_ZERO_PAGE + +extern void paging_init(void); + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) + +#define __pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_page(pmd) __pmd_page(pmd) +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#define pmd_page_vaddr(pmd) pmd_val(pmd) + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); + +#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL)) +#define pte_present(pte) (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define pte_no_exec(pte) (pte_val(pte) & _PAGE_NO_EXEC) + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + if (pte_val(pteval) & _PAGE_GLOBAL) { + pte_t *buddy = ptep_buddy(ptep); + /* + * Make sure the buddy is global too (if it's !none, + * it better already be global) + */ + if (pte_none(*buddy)) + pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + } +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + /* Preserve global status for the pair */ + if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL)); + else + set_pte_at(mm, addr, ptep, __pte(0)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + extern void __update_cache(unsigned long address, pte_t pte); + + if (!pte_present(pteval)) + goto cache_sync_done; + + if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval))) + goto cache_sync_done; + + __update_cache(addr, pteval); +cache_sync_done: + set_pte(ptep, pteval); +} + +/* + * (pmds are folded into puds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) + +#ifndef __PAGETABLE_PMD_FOLDED +/* + * (puds are folded into pgds so this doesn't get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +#endif + +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) + +extern pgd_t swapper_pg_dir[]; +extern pgd_t invalid_pg_dir[]; + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_ACCESSED; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= _PAGE_ACCESSED; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_MODIFIED); + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + return pte; +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + return pte; +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_WRITE | _PAGE_DIRTY); + return pte; +} + +static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } + +static inline pte_t pte_mkhuge(pte_t pte) +{ + pte_val(pte) |= _PAGE_HUGE; + return pte; +} + +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } +static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; } +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + +#define pte_accessible pte_accessible +static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) +{ + if (pte_val(a) & _PAGE_PRESENT) + return true; + + if ((pte_val(a) & _PAGE_PROTNONE) && + mm_tlb_flush_pending(mm)) + return true; + + return false; +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); +} + +extern void __update_tlb(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep); + +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + __update_tlb(vma, address, ptep); +} + +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + __update_tlb(vma, address, (pte_t *)pmdp); +} + +#define kern_addr_valid(addr) (1) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ +#define pmdp_establish generic_pmdp_establish + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_HUGE) && pmd_present(pmd); +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) = (pmd_val(pmd) & ~(_PAGE_GLOBAL)) | + ((pmd_val(pmd) & _PAGE_GLOBAL) << (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)); + pmd_val(pmd) |= _PAGE_HUGE; + + return pmd; +} + +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); + +#define pmd_write pmd_write +static inline int pmd_write(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_WRITE); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + return pmd; +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_WRITE | _PAGE_DIRTY); + return pmd; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_MODIFIED); +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_DIRTY | _PAGE_MODIFIED); + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + return pmd; +} + +static inline int pmd_young(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + pmd_val(pmd) &= ~_PAGE_ACCESSED; + return pmd; +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + pmd_val(pmd) |= _PAGE_ACCESSED; + return pmd; +} + +/* Extern to avoid header file madness */ +extern pmd_t mk_pmd(struct page *page, pgprot_t prot); + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; +} + +static inline struct page *pmd_page(pmd_t pmd) +{ + if (pmd_trans_huge(pmd)) + return pfn_to_page(pmd_pfn(pmd)); + + return pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT); +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); + return pmd; +} + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); + + return pmd; +} + +/* + * The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a + * different prototype. + */ +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t old = *pmdp; + + pmd_clear(pmdp); + + return old; +} + +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * We provide our own get_unmapped area to cope with the virtual aliasing + * constraints placed on us by the cache architecture. + */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h new file mode 100644 index 000000000000..47169b57ce32 --- /dev/null +++ b/arch/loongarch/include/asm/prefetch.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef __ASM_PREFETCH_H +#define __ASM_PREFETCH_H + +#define Pref_Load 0 +#define Pref_Store 8 + +#ifdef __ASSEMBLY__ + + .macro __pref hint addr +#ifdef CONFIG_CPU_HAS_PREFETCH + preld \hint, \addr, 0 +#endif + .endm + + .macro pref_load addr + __pref Pref_Load, \addr + .endm + + .macro pref_store addr + __pref Pref_Store, \addr + .endm + +#endif + +#endif /* __ASM_PREFETCH_H */ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h new file mode 100644 index 000000000000..c8ce892e3aa8 --- /dev/null +++ b/arch/loongarch/include/asm/processor.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PROCESSOR_H +#define _ASM_PROCESSOR_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_32BIT +#define TASK_SIZE 0x80000000UL +#define STACK_TOP_MAX TASK_SIZE + +#define TASK_IS_32BIT_ADDR 1 + +#endif + +#ifdef CONFIG_64BIT + +#define TASK_SIZE32 0x100000000UL + +#define TASK_SIZE64 (0x1UL << ((cpu_vabits > VA_BITS) ? VA_BITS : cpu_vabits)) + +#define TASK_SIZE (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64) + +#define STACK_TOP_MAX TASK_SIZE64 + +#define TASK_SIZE_OF(tsk) \ + (test_tsk_thread_flag(tsk, TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64) + +#define TASK_IS_32BIT_ADDR test_thread_flag(TIF_32BIT_ADDR) + +#endif + +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M) + +unsigned long stack_top(void); +#define STACK_TOP stack_top() + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + +#define FPU_REG_WIDTH 256 +#define FPU_ALIGN __attribute__((aligned(32))) + +union fpureg { + __u32 val32[FPU_REG_WIDTH / 32]; + __u64 val64[FPU_REG_WIDTH / 64]; +}; + +#define FPR_IDX(width, idx) (idx) + +#define BUILD_FPR_ACCESS(width) \ +static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \ +{ \ + return fpr->val##width[FPR_IDX(width, idx)]; \ +} \ + \ +static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \ + u##width val) \ +{ \ + fpr->val##width[FPR_IDX(width, idx)] = val; \ +} + +BUILD_FPR_ACCESS(32) +BUILD_FPR_ACCESS(64) + +struct loongarch_fpu { + unsigned int fcsr; + unsigned int vcsr; + uint64_t fcc; /* 8x8 */ + union fpureg fpr[NUM_FPU_REGS]; +}; + +#define INIT_CPUMASK { \ + {0,} \ +} + +#define ARCH_MIN_TASKALIGN 32 + +struct loongarch_vdso_info; + +/* + * If you change thread_struct remember to change the #defines below too! + */ +struct thread_struct { + /* Main processor registers. */ + unsigned long reg01, reg03, reg22; /* ra sp fp */ + unsigned long reg23, reg24, reg25, reg26; /* s0-s3 */ + unsigned long reg27, reg28, reg29, reg30, reg31; /* s4-s8 */ + + /* CSR registers */ + unsigned long csr_prmd; + unsigned long csr_crmd; + unsigned long csr_euen; + unsigned long csr_ecfg; + unsigned long csr_badvaddr; /* Last user fault */ + + /* Scratch registers */ + unsigned long scr0; + unsigned long scr1; + unsigned long scr2; + unsigned long scr3; + + /* Eflags register */ + unsigned long eflags; + + /* Other stuff associated with the thread. */ + unsigned long trap_nr; + unsigned long error_code; + struct loongarch_vdso_info *vdso; + + /* + * FPU & vector registers, must be at last because + * they are conditionally copied at fork(). + */ + struct loongarch_fpu fpu FPU_ALIGN; +}; + +#define INIT_THREAD { \ + /* \ + * Main processor registers \ + */ \ + .reg01 = 0, \ + .reg03 = 0, \ + .reg22 = 0, \ + .reg23 = 0, \ + .reg24 = 0, \ + .reg25 = 0, \ + .reg26 = 0, \ + .reg27 = 0, \ + .reg28 = 0, \ + .reg29 = 0, \ + .reg30 = 0, \ + .reg31 = 0, \ + .csr_crmd = 0, \ + .csr_prmd = 0, \ + .csr_euen = 0, \ + .csr_ecfg = 0, \ + .csr_badvaddr = 0, \ + /* \ + * Other stuff associated with the process \ + */ \ + .trap_nr = 0, \ + .error_code = 0, \ + /* \ + * FPU & vector registers \ + */ \ + .fpu = { \ + .fcsr = 0, \ + .vcsr = 0, \ + .fcc = 0, \ + .fpr = {{{0,},},}, \ + }, \ +} + +struct task_struct; + +/* Free all resources held by a thread. */ +#define release_thread(thread) do { } while (0) + +enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL}; + +extern unsigned long boot_option_idle_override; +/* + * Do necessary setup to start up a newly executed thread. + */ +extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); + +static inline void flush_thread(void) +{ +} + +unsigned long get_wchan(struct task_struct *p); + +#define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \ + THREAD_SIZE - 32 - sizeof(struct pt_regs)) +#define task_pt_regs(tsk) ((struct pt_regs *)__KSTK_TOS(tsk)) +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->csr_era) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[3]) +#define KSTK_EUEN(tsk) (task_pt_regs(tsk)->csr_euen) +#define KSTK_ECFG(tsk) (task_pt_regs(tsk)->csr_ecfg) + +#define return_address() ({__asm__ __volatile__("":::"$1"); __builtin_return_address(0); }) + +#ifdef CONFIG_CPU_HAS_PREFETCH + +#define ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch((x), 0, 1) + +#define ARCH_HAS_PREFETCHW +#define prefetchw(x) __builtin_prefetch((x), 1, 1) + +#endif + +#endif /* _ASM_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h new file mode 100644 index 000000000000..a42a79ac2a3d --- /dev/null +++ b/arch/loongarch/include/asm/ptrace.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_PTRACE_H +#define _ASM_PTRACE_H + +#include +#include +#include + +/* + * This struct defines the way the registers are stored on the stack during + * a system call/exception. If you add a register here, please also add it to + * regoffset_table[] in arch/loongarch/kernel/ptrace.c. + */ +struct pt_regs { + /* Main processor registers. */ + unsigned long regs[32]; + + /* Special CSR registers. */ + unsigned long csr_era; + unsigned long csr_badvaddr; + unsigned long csr_crmd; + unsigned long csr_prmd; + unsigned long csr_euen; + unsigned long csr_ecfg; + unsigned long csr_estat; + unsigned long orig_a0; + unsigned long __last[0]; +} __aligned(8); + +static inline int regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(regs->csr_prmd); +} + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->regs[3]; +} + +/* + * Don't use asm-generic/ptrace.h it defines FP accessors that don't make + * sense on LoongArch. We rather want an error if they get invoked. + */ + +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->csr_era = val; +} + +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +struct task_struct; + +/* + * Does the process account for user or for system time? + */ +#define user_mode(regs) (((regs)->csr_prmd & PLV_MASK) == PLV_USER) + +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[4]; +} + +#define instruction_pointer(regs) ((regs)->csr_era) +#define profile_pc(regs) instruction_pointer(regs) + +extern void die(const char *, struct pt_regs *) __noreturn; + +static inline void die_if_kernel(const char *str, struct pt_regs *regs) +{ + if (unlikely(!user_mode(regs))) + die(str, regs); +} + +#define current_pt_regs() \ +({ \ + unsigned long sp = (unsigned long)__builtin_frame_address(0); \ + (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1 - 32) - 1; \ +}) + +/* Helpers for working with the user stack pointer */ + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->regs[3]; +} + +static inline void user_stack_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->regs[3] = val; +} + +#endif /* _ASM_PTRACE_H */ diff --git a/arch/loongarch/include/asm/reboot.h b/arch/loongarch/include/asm/reboot.h new file mode 100644 index 000000000000..48c3b1ea7145 --- /dev/null +++ b/arch/loongarch/include/asm/reboot.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_REBOOT_H +#define _ASM_REBOOT_H + +extern void (*pm_restart)(void); + +#endif /* _ASM_REBOOT_H */ diff --git a/arch/loongarch/include/asm/regdef.h b/arch/loongarch/include/asm/regdef.h new file mode 100644 index 000000000000..6025ee809d29 --- /dev/null +++ b/arch/loongarch/include/asm/regdef.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_REGDEF_H +#define _ASM_REGDEF_H + +#define zero $r0 /* wired zero */ +#define ra $r1 /* return address */ +#define tp $r2 +#define sp $r3 /* stack pointer */ +#define v0 $r4 /* return value - caller saved */ +#define v1 $r5 +#define a0 $r4 /* argument registers */ +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 /* caller saved */ +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 /* frame pointer */ +#define s0 $r23 /* callee saved */ +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 + +#endif /* _ASM_REGDEF_H */ diff --git a/arch/loongarch/include/asm/serial.h b/arch/loongarch/include/asm/serial.h new file mode 100644 index 000000000000..f0bead2e9995 --- /dev/null +++ b/arch/loongarch/include/asm/serial.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef __ASM__SERIAL_H +#define __ASM__SERIAL_H + +#define BASE_BAUD 0 +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) + +#endif /* __ASM__SERIAL_H */ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h new file mode 100644 index 000000000000..ebd9d210f287 --- /dev/null +++ b/arch/loongarch/include/asm/setup.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _LOONGARCH_SETUP_H +#define _LOONGARCH_SETUP_H + +#include +#include + +#define VECSIZE 0x200 + +extern unsigned long eentry; +extern unsigned long tlbrentry; +extern void cpu_cache_init(void); +extern void per_cpu_trap_init(int cpu); +extern void set_handler(unsigned long offset, void *addr, unsigned long len); +extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); + +#endif /* __SETUP_H */ diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h new file mode 100644 index 000000000000..0a4af240090c --- /dev/null +++ b/arch/loongarch/include/asm/shmparam.h @@ -0,0 +1,13 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef _ASM_SHMPARAM_H +#define _ASM_SHMPARAM_H + +#define __ARCH_FORCE_SHMLBA 1 + +#define SHMLBA SZ_64K /* attach addr a multiple of this */ + +#endif /* _ASM_SHMPARAM_H */ diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h new file mode 100644 index 000000000000..3d18cdf1b069 --- /dev/null +++ b/arch/loongarch/include/asm/sparsemem.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LOONGARCH_SPARSEMEM_H +#define _LOONGARCH_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM + +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ +#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ +#define MAX_PHYSMEM_BITS 48 + +#endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + +#define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS) + +#endif /* _LOONGARCH_SPARSEMEM_H */ diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h new file mode 100644 index 000000000000..52dcc6419525 --- /dev/null +++ b/arch/loongarch/include/asm/spinlock.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_H +#define _ASM_SPINLOCK_H + +#include +#include +#include + +#endif /* _ASM_SPINLOCK_H */ diff --git a/arch/loongarch/include/asm/spinlock_types.h b/arch/loongarch/include/asm/spinlock_types.h new file mode 100644 index 000000000000..1cbfe2451529 --- /dev/null +++ b/arch/loongarch/include/asm/spinlock_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_TYPES_H +#define _ASM_SPINLOCK_TYPES_H + +#include +#include + +#endif diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h new file mode 100644 index 000000000000..d5580e9de290 --- /dev/null +++ b/arch/loongarch/include/asm/stackframe.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_STACKFRAME_H +#define _ASM_STACKFRAME_H + +#include + +#include +#include +#include +#include +#include + +/* Make the addition of cfi info a little easier. */ + .macro cfi_rel_offset reg offset=0 docfi=0 + .if \docfi + .cfi_rel_offset \reg, \offset + .endif + .endm + + .macro cfi_st reg offset=0 docfi=0 + cfi_rel_offset \reg, \offset, \docfi + LONG_S \reg, sp, \offset + .endm + + .macro cfi_restore reg offset=0 docfi=0 + .if \docfi + .cfi_restore \reg + .endif + .endm + + .macro cfi_ld reg offset=0 docfi=0 + LONG_L \reg, sp, \offset + cfi_restore \reg \offset \docfi + .endm + + .macro BACKUP_T0T1 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + .endm + + .macro RELOAD_T0T1 + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + .endm + + .macro SAVE_TEMP docfi=0 + RELOAD_T0T1 + cfi_st t0, PT_R12, \docfi + cfi_st t1, PT_R13, \docfi + cfi_st t2, PT_R14, \docfi + cfi_st t3, PT_R15, \docfi + cfi_st t4, PT_R16, \docfi + cfi_st t5, PT_R17, \docfi + cfi_st t6, PT_R18, \docfi + cfi_st t7, PT_R19, \docfi + cfi_st t8, PT_R20, \docfi + .endm + + .macro SAVE_STATIC docfi=0 + cfi_st s0, PT_R23, \docfi + cfi_st s1, PT_R24, \docfi + cfi_st s2, PT_R25, \docfi + cfi_st s3, PT_R26, \docfi + cfi_st s4, PT_R27, \docfi + cfi_st s5, PT_R28, \docfi + cfi_st s6, PT_R29, \docfi + cfi_st s7, PT_R30, \docfi + cfi_st s8, PT_R31, \docfi + .endm + +/* + * get_saved_sp returns the SP for the current CPU by looking in the + * kernelsp array for it. It stores the current sp in t0 and loads the + * new value in sp. + */ + .macro get_saved_sp docfi=0 + la.abs t1, kernelsp + move t0, sp + .if \docfi + .cfi_register sp, t0 + .endif + LONG_L sp, t1, 0 + .endm + + .macro set_saved_sp stackp temp temp2 + la.abs \temp, kernelsp + LONG_S \stackp, \temp, 0 + .endm + + .macro SAVE_SOME docfi=0 + csrrd t1, LOONGARCH_CSR_PRMD + andi t1, t1, 0x3 /* extract pplv bit */ + move t0, sp + beqz t1, 8f + /* Called from user mode, new stack. */ + get_saved_sp docfi=\docfi +8: + PTR_ADDIU sp, sp, -PT_SIZE + .if \docfi + .cfi_def_cfa sp,0 + .endif + cfi_st t0, PT_R3, \docfi + cfi_rel_offset sp, PT_R3, \docfi + LONG_S zero, sp, PT_R0 + csrrd t0, LOONGARCH_CSR_PRMD + LONG_S t0, sp, PT_PRMD + csrrd t0, LOONGARCH_CSR_CRMD + LONG_S t0, sp, PT_CRMD + csrrd t0, LOONGARCH_CSR_EUEN + LONG_S t0, sp, PT_EUEN + csrrd t0, LOONGARCH_CSR_ECFG + LONG_S t0, sp, PT_ECFG + csrrd t0, LOONGARCH_CSR_ESTAT + PTR_S t0, sp, PT_ESTAT + cfi_st ra, PT_R1, \docfi + cfi_st a0, PT_R4, \docfi + cfi_st a1, PT_R5, \docfi + cfi_st a2, PT_R6, \docfi + cfi_st a3, PT_R7, \docfi + cfi_st a4, PT_R8, \docfi + cfi_st a5, PT_R9, \docfi + cfi_st a6, PT_R10, \docfi + cfi_st a7, PT_R11, \docfi + csrrd ra, LOONGARCH_CSR_ERA + LONG_S ra, sp, PT_ERA + .if \docfi + .cfi_rel_offset ra, PT_ERA + .endif + cfi_st tp, PT_R2, \docfi + cfi_st fp, PT_R22, \docfi + + /* Set thread_info if we're coming from user mode */ + csrrd t0, LOONGARCH_CSR_PRMD + andi t0, t0, 0x3 /* extract pplv bit */ + beqz t0, 9f + + li.d tp, ~_THREAD_MASK + and tp, tp, sp + cfi_st u0, PT_R21, \docfi + csrrd u0, PERCPU_BASE_KS +9: + .endm + + .macro SAVE_ALL docfi=0 + SAVE_SOME \docfi + SAVE_TEMP \docfi + SAVE_STATIC \docfi + .endm + + .macro RESTORE_TEMP docfi=0 + cfi_ld t0, PT_R12, \docfi + cfi_ld t1, PT_R13, \docfi + cfi_ld t2, PT_R14, \docfi + cfi_ld t3, PT_R15, \docfi + cfi_ld t4, PT_R16, \docfi + cfi_ld t5, PT_R17, \docfi + cfi_ld t6, PT_R18, \docfi + cfi_ld t7, PT_R19, \docfi + cfi_ld t8, PT_R20, \docfi + .endm + + .macro RESTORE_STATIC docfi=0 + cfi_ld s0, PT_R23, \docfi + cfi_ld s1, PT_R24, \docfi + cfi_ld s2, PT_R25, \docfi + cfi_ld s3, PT_R26, \docfi + cfi_ld s4, PT_R27, \docfi + cfi_ld s5, PT_R28, \docfi + cfi_ld s6, PT_R29, \docfi + cfi_ld s7, PT_R30, \docfi + cfi_ld s8, PT_R31, \docfi + .endm + + .macro RESTORE_SOME docfi=0 + LONG_L a0, sp, PT_PRMD + andi a0, a0, 0x3 /* extract pplv bit */ + beqz a0, 8f + cfi_ld u0, PT_R21, \docfi +8: + LONG_L a0, sp, PT_ERA + csrwr a0, LOONGARCH_CSR_ERA + LONG_L a0, sp, PT_PRMD + csrwr a0, LOONGARCH_CSR_PRMD + cfi_ld ra, PT_R1, \docfi + cfi_ld a0, PT_R4, \docfi + cfi_ld a1, PT_R5, \docfi + cfi_ld a2, PT_R6, \docfi + cfi_ld a3, PT_R7, \docfi + cfi_ld a4, PT_R8, \docfi + cfi_ld a5, PT_R9, \docfi + cfi_ld a6, PT_R10, \docfi + cfi_ld a7, PT_R11, \docfi + cfi_ld tp, PT_R2, \docfi + cfi_ld fp, PT_R22, \docfi + .endm + + .macro RESTORE_SP_AND_RET docfi=0 + cfi_ld sp, PT_R3, \docfi + ertn + .endm + + .macro RESTORE_ALL_AND_RET docfi=0 + RESTORE_STATIC \docfi + RESTORE_TEMP \docfi + RESTORE_SOME \docfi + RESTORE_SP_AND_RET \docfi + .endm + +#endif /* _ASM_STACKFRAME_H */ diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h new file mode 100644 index 000000000000..07adab35ab8a --- /dev/null +++ b/arch/loongarch/include/asm/stacktrace.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _ASM_STACKTRACE_H +#define _ASM_STACKTRACE_H + +#include +#include +#include +#include + +#define STR_LONG_L __stringify(LONG_L) +#define STR_LONG_S __stringify(LONG_S) +#define STR_LONGSIZE __stringify(LONGSIZE) + +#define STORE_ONE_REG(r) \ + STR_LONG_S " $r" __stringify(r)", %1, "STR_LONGSIZE"*"__stringify(r)"\n\t" + +#define CSRRD_ONE_REG(reg) \ + __stringify(csrrd) " %0, "__stringify(reg)"\n\t" + +static __always_inline void prepare_frametrace(struct pt_regs *regs) +{ + __asm__ __volatile__( + /* Save $r1 */ + STORE_ONE_REG(1) + /* Use $r1 to save PC */ + "pcaddi $r1, 0\n\t" + STR_LONG_S " $r1, %0\n\t" + /* Restore $r1 */ + STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t" + STORE_ONE_REG(2) + STORE_ONE_REG(3) + STORE_ONE_REG(4) + STORE_ONE_REG(5) + STORE_ONE_REG(6) + STORE_ONE_REG(7) + STORE_ONE_REG(8) + STORE_ONE_REG(9) + STORE_ONE_REG(10) + STORE_ONE_REG(11) + STORE_ONE_REG(12) + STORE_ONE_REG(13) + STORE_ONE_REG(14) + STORE_ONE_REG(15) + STORE_ONE_REG(16) + STORE_ONE_REG(17) + STORE_ONE_REG(18) + STORE_ONE_REG(19) + STORE_ONE_REG(20) + STORE_ONE_REG(21) + STORE_ONE_REG(22) + STORE_ONE_REG(23) + STORE_ONE_REG(24) + STORE_ONE_REG(25) + STORE_ONE_REG(26) + STORE_ONE_REG(27) + STORE_ONE_REG(28) + STORE_ONE_REG(29) + STORE_ONE_REG(30) + STORE_ONE_REG(31) + : "=m" (regs->csr_era) + : "r" (regs->regs) + : "memory"); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_BADV) : "=r" (regs->csr_badvaddr)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_CRMD) : "=r" (regs->csr_crmd)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_PRMD) : "=r" (regs->csr_prmd)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_EUEN) : "=r" (regs->csr_euen)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_ECFG) : "=r" (regs->csr_ecfg)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_ESTAT) : "=r" (regs->csr_estat)); +} + +#endif /* _ASM_STACKTRACE_H */ diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h new file mode 100644 index 000000000000..812389888e35 --- /dev/null +++ b/arch/loongarch/include/asm/string.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_STRING_H +#define _ASM_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE +extern void *memmove(void *__dest, __const__ void *__src, size_t __n); + +#endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h new file mode 100644 index 000000000000..e2d4afefce24 --- /dev/null +++ b/arch/loongarch/include/asm/switch_to.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SWITCH_TO_H +#define _ASM_SWITCH_TO_H + +#include +#include + +struct task_struct; + +/** + * __switch_to - switch execution of a task + * @prev: The task previously executed. + * @next: The task to begin executing. + * @next_ti: task_thread_info(next). + * + * This function is used whilst scheduling to save the context of prev & load + * the context of next. Returns prev. + */ +extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next, struct thread_info *next_ti); + +/* + * For newly created kernel threads switch_to() will return to + * ret_from_kernel_thread, newly created user threads to ret_from_fork. + * That is, everything following __switch_to() will be skipped for new threads. + * So everything that matters to new threads should be placed before __switch_to(). + */ +#define switch_to(prev, next, last) \ +do { \ + lose_fpu_inatomic(1, prev); \ + (last) = __switch_to(prev, next, task_thread_info(next)); \ +} while (0) + +#endif /* _ASM_SWITCH_TO_H */ diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h new file mode 100644 index 000000000000..c21f07de70e5 --- /dev/null +++ b/arch/loongarch/include/asm/syscall.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __ASM_LOONGARCH_SYSCALL_H +#define __ASM_LOONGARCH_SYSCALL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +extern void *sys_call_table[]; + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[11]; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->regs[4] = regs->orig_a0; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->regs[4]; + + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[4]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->regs[4] = (long) error ? error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ + args[0] = regs->orig_a0; + memcpy(&args[1], ®s->regs[5], 5 * sizeof(long)); +} + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_LOONGARCH64; +} + +#endif /* __ASM_LOONGARCH_SYSCALL_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h new file mode 100644 index 000000000000..356974e33671 --- /dev/null +++ b/arch/loongarch/include/asm/thread_info.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * thread_info.h: LoongArch low-level thread information + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_THREAD_INFO_H +#define _ASM_THREAD_INFO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +#include + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, the assembly constants + * must also be changed + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + unsigned long flags; /* low level flags */ + unsigned long tp_value; /* thread pointer */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptible, <0 => BUG */ + struct pt_regs *regs; + long syscall; /* syscall number */ +}; + +/* + * macros/functions for gaining access to the thread information structure + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ +} + +/* How to get the thread information struct from C. */ +register struct thread_info *__current_thread_info __asm__("$r2"); + +static inline struct thread_info *current_thread_info(void) +{ + return __current_thread_info; +} + +register unsigned long current_stack_pointer __asm__("$r3"); + +#endif /* !__ASSEMBLY__ */ + +/* thread information allocation */ +#if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) +#define THREAD_SIZE_ORDER (1) +#endif +#if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_64BIT) +#define THREAD_SIZE_ORDER (2) +#endif +#ifdef CONFIG_PAGE_SIZE_16KB +#define THREAD_SIZE_ORDER (0) +#endif +#ifdef CONFIG_PAGE_SIZE_64KB +#define THREAD_SIZE_ORDER (0) +#endif + +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_MASK (THREAD_SIZE - 1UL) + +#define STACK_WARN (THREAD_SIZE / 8) + +/* + * thread information flags + * - these are process state flags that various assembly files may need to + * access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ +#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_NOHZ 5 /* in adaptive nohz mode */ +#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ +#define TIF_SYSCALL_TRACE 7 /* syscall trace active */ +#define TIF_SYSCALL_TRACEPOINT 8 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 9 /* secure computing */ +#define TIF_UPROBE 10 /* breakpointed or singlestepping */ +#define TIF_USEDFPU 11 /* FPU was used by this task this quantum (SMP) */ +#define TIF_USEDSIMD 12 /* SIMD has been used this quantum */ +#define TIF_MEMDIE 13 /* is terminating due to OOM killer */ +#define TIF_FIXADE 14 /* Fix address errors in software */ +#define TIF_LOGADE 15 /* Log address errors to syslog */ +#define TIF_32BIT_REGS 16 /* 32-bit general purpose registers */ +#define TIF_32BIT_ADDR 17 /* 32-bit address space */ +#define TIF_LOAD_WATCH 18 /* If set, load watch registers */ +#define TIF_LSX_CTX_LIVE 19 /* LSX context must be preserved */ +#define TIF_LASX_CTX_LIVE 20 /* LASX context must be preserved */ + +#define _TIF_SIGPENDING (1< +#include +#include + +extern u64 cpu_clock_freq; +extern u64 const_clock_freq; + +extern void sync_counter(void); + +static inline unsigned int calc_const_freq(void) +{ + unsigned int res; + unsigned int base_freq; + unsigned int cfm, cfd; + + res = read_cpucfg(LOONGARCH_CPUCFG2); + if (!(res & CPUCFG2_LLFTP)) + return 0; + + base_freq = read_cpucfg(LOONGARCH_CPUCFG4); + res = read_cpucfg(LOONGARCH_CPUCFG5); + cfm = res & 0xffff; + cfd = (res >> 16) & 0xffff; + + if (!base_freq || !cfm || !cfd) + return 0; + else + return (base_freq * cfm / cfd); +} + +/* + * Initialize the calling CPU's timer interrupt as clockevent device + */ +extern int constant_clockevent_init(void); +extern int constant_clocksource_init(void); + +static inline void clockevent_set_clock(struct clock_event_device *cd, + unsigned int clock) +{ + clockevents_calc_mult_shift(cd, clock, 4); +} + +#endif /* _ASM_TIME_H */ diff --git a/arch/loongarch/include/asm/timex.h b/arch/loongarch/include/asm/timex.h new file mode 100644 index 000000000000..755d8a9a2b69 --- /dev/null +++ b/arch/loongarch/include/asm/timex.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_TIMEX_H +#define _ASM_TIMEX_H + +#ifdef __KERNEL__ + +#include + +#include +#include +#include + +/* + * Standard way to access the cycle counter. + * Currently only used on SMP for scheduling. + * + * We know that all SMP capable CPUs have cycle counters. + */ + +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles(void) +{ + return drdtime(); +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_TIMEX_H */ diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h new file mode 100644 index 000000000000..ec6d7fa8f02c --- /dev/null +++ b/arch/loongarch/include/asm/tlb.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TLB_H +#define __ASM_TLB_H + +#include +#include +#include + +/* + * TLB Invalidate Flush + */ +static inline void tlbclr(void) +{ + __asm__ __volatile__("tlbclr"); +} + +static inline void tlbflush(void) +{ + __asm__ __volatile__("tlbflush"); +} + +/* + * TLB R/W operations. + */ +static inline void tlb_probe(void) +{ + __asm__ __volatile__("tlbsrch"); +} + +static inline void tlb_read(void) +{ + __asm__ __volatile__("tlbrd"); +} + +static inline void tlb_write_indexed(void) +{ + __asm__ __volatile__("tlbwr"); +} + +static inline void tlb_write_random(void) +{ + __asm__ __volatile__("tlbfill"); +} + +/* + * Guest TLB Invalidate Flush + */ +static inline void guest_tlbflush(void) +{ + __asm__ __volatile__( + ".word 0x6482401\n\t"); +} + +/* + * Guest TLB R/W operations. + */ +static inline void guest_tlb_probe(void) +{ + __asm__ __volatile__( + ".word 0x6482801\n\t"); +} + +static inline void guest_tlb_read(void) +{ + __asm__ __volatile__( + ".word 0x6482c01\n\t"); +} + +static inline void guest_tlb_write_indexed(void) +{ + __asm__ __volatile__( + ".word 0x6483001\n\t"); +} + +static inline void guest_tlb_write_random(void) +{ + __asm__ __volatile__( + ".word 0x6483401\n\t"); +} + +enum invtlb_ops { + /* Invalid all tlb */ + INVTLB_ALL = 0x0, + /* Invalid current tlb */ + INVTLB_CURRENT_ALL = 0x1, + /* Invalid all global=1 lines in current tlb */ + INVTLB_CURRENT_GTRUE = 0x2, + /* Invalid all global=0 lines in current tlb */ + INVTLB_CURRENT_GFALSE = 0x3, + /* Invalid global=0 and matched asid lines in current tlb */ + INVTLB_GFALSE_AND_ASID = 0x4, + /* Invalid addr with global=0 and matched asid in current tlb */ + INVTLB_ADDR_GFALSE_AND_ASID = 0x5, + /* Invalid addr with global=1 or matched asid in current tlb */ + INVTLB_ADDR_GTRUE_OR_ASID = 0x6, + /* Invalid matched gid in guest tlb */ + INVGTLB_GID = 0x9, + /* Invalid global=1, matched gid in guest tlb */ + INVGTLB_GID_GTRUE = 0xa, + /* Invalid global=0, matched gid in guest tlb */ + INVGTLB_GID_GFALSE = 0xb, + /* Invalid global=0, matched gid and asid in guest tlb */ + INVGTLB_GID_GFALSE_ASID = 0xc, + /* Invalid global=0 , matched gid, asid and addr in guest tlb */ + INVGTLB_GID_GFALSE_ASID_ADDR = 0xd, + /* Invalid global=1 , matched gid, asid and addr in guest tlb */ + INVGTLB_GID_GTRUE_ASID_ADDR = 0xe, + /* Invalid all gid gva-->gpa guest tlb */ + INVGTLB_ALLGID_GVA_TO_GPA = 0x10, + /* Invalid all gid gpa-->hpa tlb */ + INVTLB_ALLGID_GPA_TO_HPA = 0x11, + /* Invalid all gid tlb, including gva-->gpa and gpa-->hpa */ + INVTLB_ALLGID = 0x12, + /* Invalid matched gid gva-->gpa guest tlb */ + INVGTLB_GID_GVA_TO_GPA = 0x13, + /* Invalid matched gid gpa-->hpa tlb */ + INVTLB_GID_GPA_TO_HPA = 0x14, + /* Invalid matched gid tlb,including gva-->gpa and gpa-->hpa */ + INVTLB_GID_ALL = 0x15, + /* Invalid matched gid and addr gpa-->hpa tlb */ + INVTLB_GID_ADDR = 0x16, +}; + +/* + * invtlb op info addr + * (0x1 << 26) | (0x24 << 20) | (0x13 << 15) | + * (addr << 10) | (info << 5) | op + */ +static inline void invtlb(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r addr,%0\n\t" + "parse_r info,%1\n\t" + ".word ((0x6498000) | (addr << 10) | (info << 5) | %2)\n\t" + : + : "r"(addr), "r"(info), "i"(op) + : + ); +} + +static inline void invtlb_addr(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r addr,%0\n\t" + ".word ((0x6498000) | (addr << 10) | (0 << 5) | %1)\n\t" + : + : "r"(addr), "i"(op) + : + ); +} + +static inline void invtlb_info(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r info,%0\n\t" + ".word ((0x6498000) | (0 << 10) | (info << 5) | %1)\n\t" + : + : "r"(info), "i"(op) + : + ); +} + +static inline void invtlb_all(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + ".word ((0x6498000) | (0 << 10) | (0 << 5) | %0)\n\t" + : + : "i"(op) + : + ); +} + +/* + * LoongArch doesn't need any special per-pte or per-vma handling, except + * we need to flush cache for area to be unmapped. + */ +#define tlb_start_vma(tlb, vma) \ + do { \ + if (!(tlb)->fullmm) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end); \ + } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) + +static void tlb_flush(struct mmu_gather *tlb); + +#define tlb_flush tlb_flush +#include + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + struct vm_area_struct vma; + + vma.vm_mm = tlb->mm; + vma.vm_flags = 0; + if (tlb->fullmm) { + flush_tlb_mm(tlb->mm); + return; + } + + flush_tlb_range(&vma, tlb->start, tlb->end); +} + +extern void handle_tlb_load(void); +extern void handle_tlb_store(void); +extern void handle_tlb_modify(void); +extern void handle_tlb_refill(void); +extern void handle_tlb_protect(void); + +extern void dump_tlb_all(void); +extern void dump_tlb_regs(void); + +#endif /* __ASM_TLB_H */ diff --git a/arch/loongarch/include/asm/tlbflush.h b/arch/loongarch/include/asm/tlbflush.h new file mode 100644 index 000000000000..67e08f8ad794 --- /dev/null +++ b/arch/loongarch/include/asm/tlbflush.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TLBFLUSH_H +#define __ASM_TLBFLUSH_H + +#include + +/* + * TLB flushing: + * + * - flush_tlb_all() flushes all processes TLB entries + * - flush_tlb_mm(mm) flushes the specified mm context TLB entries + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + */ +extern void local_flush_tlb_all(void); +extern void local_flush_tlb_user(void); +extern void local_flush_tlb_kernel(void); +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +extern void local_flush_tlb_kernel_range(unsigned long start, + unsigned long end); +extern void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long page); +extern void local_flush_tlb_one(unsigned long vaddr); + +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_range(vma, vmaddr, end) local_flush_tlb_range(vma, vmaddr, end) +#define flush_tlb_kernel_range(vmaddr,end) local_flush_tlb_kernel_range(vmaddr, end) +#define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) +#define flush_tlb_one(vaddr) local_flush_tlb_one(vaddr) + +#endif /* __ASM_TLBFLUSH_H */ diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h new file mode 100644 index 000000000000..51feaa64339a --- /dev/null +++ b/arch/loongarch/include/asm/topology.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TOPOLOGY_H +#define __ASM_TOPOLOGY_H + +#include + +#define cpu_logical_map(cpu) 0 + +#include + +static inline void arch_fix_phys_package_id(int num, u32 slot) { } +#endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h new file mode 100644 index 000000000000..b5b1d368a5cb --- /dev/null +++ b/arch/loongarch/include/asm/types.h @@ -0,0 +1,39 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 1995, 1996, 1999 by Ralf Baechle + * Copyright (C) 2008 Wind River Systems, + * written by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#ifndef _ASM_TYPES_H +#define _ASM_TYPES_H + +#include +#include + +/* + * The following macros are especially useful for __asm__ + * inline assembler. + */ +#ifndef __STR +#define __STR(x) #x +#endif +#ifndef STR +#define STR(x) __STR(x) +#endif + +/* + * Configure language + */ +#ifdef __ASSEMBLY__ +#define _ULCAST_ +#define _U64CAST_ +#else +#define _ULCAST_ (unsigned long) +#define _U64CAST_ (u64) +#endif + +#endif /* _ASM_TYPES_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h new file mode 100644 index 000000000000..50d9bb233955 --- /dev/null +++ b/arch/loongarch/include/asm/uaccess.h @@ -0,0 +1,432 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_UACCESS_H +#define _ASM_UACCESS_H + +#include +#include +#include +#include + +#ifdef CONFIG_64BIT + +extern u64 __ua_limit; + +#define __UA_LIMIT __ua_limit + +#define __UA_ADDR ".dword" +#define __UA_ADDU "add.d" +#define __UA_LA "la.abs" + +#endif /* CONFIG_64BIT */ + +/* + * Is a address valid? This does a straightforward calculation rather + * than tests. + * + * Address valid if: + * - "addr" doesn't have any high-bits set + * - AND "size" doesn't have any high-bits set + * - AND "addr+size" doesn't have any high-bits set + * - OR we are in kernel mode. + * + * __ua_size() is a trick to avoid runtime checking of positive constant + * sizes; for those we already know at compile time that the size is ok. + */ +#define __ua_size(size) \ + ((__builtin_constant_p(size) && (signed long) (size) > 0) ? 0 : (size)) + +/* + * access_ok: - Checks if a user space pointer is valid + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +static inline int __access_ok(const void __user *p, unsigned long size) +{ + unsigned long addr = (unsigned long)p; + unsigned long end = addr + size - !!size; + + return (__UA_LIMIT & (addr | end | __ua_size(size))) == 0; +} + +#define access_ok(addr, size) \ + likely(__access_ok((addr), (size))) + +/* + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x, ptr) \ +({ \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + \ + might_fault(); \ + access_ok(__p, sizeof(*__p)) ? __get_user((x), __p) : \ + ((x) = 0, -EFAULT); \ +}) + +/* + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + \ + might_fault(); \ + access_ok(__p, sizeof(*__p)) ? __put_user((x), __p) : -EFAULT; \ +}) + +/* + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x, ptr) \ +({ \ + int __gu_err = 0; \ + \ + __chk_user_ptr(ptr); \ + __get_user_common((x), sizeof(*(ptr)), ptr); \ + __gu_err; \ +}) + +/* + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ +#define __put_user(x, ptr) \ +({ \ + int __pu_err = 0; \ + __typeof__(*(ptr)) __pu_val; \ + \ + __pu_val = (x); \ + __chk_user_ptr(ptr); \ + __put_user_common(ptr, sizeof(*(ptr))); \ + __pu_err; \ +}) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +#ifdef CONFIG_32BIT +#define __GET_DW(val, insn, ptr) __get_data_asm_ll32(val, insn, ptr) +#endif +#ifdef CONFIG_64BIT +#define __GET_DW(val, insn, ptr) __get_data_asm(val, insn, ptr) +#endif + +#define __get_user_common(val, size, ptr) \ +do { \ + switch (size) { \ + case 1: __get_data_asm(val, "ld.b", ptr); break; \ + case 2: __get_data_asm(val, "ld.h", ptr); break; \ + case 4: __get_data_asm(val, "ld.w", ptr); break; \ + case 8: __GET_DW(val, "ld.d", ptr); break; \ + default: BUILD_BUG(); break; \ + } \ +} while (0) + +#define __get_kernel_common(val, size, ptr) __get_user_common(val, size, ptr) + +#define __get_data_asm(val, insn, addr) \ +{ \ + long __gu_tmp; \ + \ + __asm__ __volatile__( \ + "1: " insn " %1, %3 \n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + "3: li.w %0, %4 \n" \ + " or %1, $r0, $r0 \n" \ + " b 2b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " "__UA_ADDR "\t1b, 3b \n" \ + " .previous \n" \ + : "=r" (__gu_err), "=r" (__gu_tmp) \ + : "0" (0), "o" (__m(addr)), "i" (-EFAULT)); \ + \ + (val) = (__typeof__(*(addr))) __gu_tmp; \ +} + +/* + * Get a long long 64 using 32 bit registers. + */ +#define __get_data_asm_ll32(val, insn, addr) \ +{ \ + union { \ + unsigned long long l; \ + __typeof__(*(addr)) t; \ + } __gu_tmp; \ + \ + __asm__ __volatile__( \ + "1: ld.w %1, (%3) \n" \ + "2: ld.w %D1, 4(%3) \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %4 \n" \ + " slli.d %1, $r0, 0 \n" \ + " slli.d %D1, $r0, 0 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 4b \n" \ + " " __UA_ADDR " 2b, 4b \n" \ + " .previous \n" \ + : "=r" (__gu_err), "=&r" (__gu_tmp.l) \ + : "0" (0), "r" (addr), "i" (-EFAULT)); \ + \ + (val) = __gu_tmp.t; \ +} + +#ifdef CONFIG_32BIT +#define __PUT_DW(insn, ptr) __put_data_asm_ll32(insn, ptr) +#endif +#ifdef CONFIG_64BIT +#define __PUT_DW(insn, ptr) __put_data_asm(insn, ptr) +#endif + +#define __put_user_common(ptr, size) \ +do { \ + switch (size) { \ + case 1: __put_data_asm("st.b", ptr); break; \ + case 2: __put_data_asm("st.h", ptr); break; \ + case 4: __put_data_asm("st.w", ptr); break; \ + case 8: __PUT_DW("st.d", ptr); break; \ + default: BUILD_BUG(); break; \ + } \ +} while (0) + +#define __put_kernel_common(ptr, size) __put_user_common(ptr, size) + +#define __put_data_asm(insn, ptr) \ +{ \ + __asm__ __volatile__( \ + "1: " insn " %z2, %3 # __put_user_asm\n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + "3: li.w %0, %4 \n" \ + " b 2b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 3b \n" \ + " .previous \n" \ + : "=r" (__pu_err) \ + : "0" (0), "Jr" (__pu_val), "o" (__m(ptr)), \ + "i" (-EFAULT)); \ +} + +#define __put_data_asm_ll32(insn, ptr) \ +{ \ + __asm__ __volatile__( \ + "1: st.w %2, (%3) # __put_user_asm_ll32 \n" \ + "2: st.w %D2, 4(%3) \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %4 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 4b \n" \ + " " __UA_ADDR " 2b, 4b \n" \ + " .previous" \ + : "=r" (__pu_err) \ + : "0" (0), "r" (__pu_val), "r" (ptr), \ + "i" (-EFAULT)); \ +} + +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __gu_err = 0; \ + \ + __get_kernel_common(*((type *)(dst)), sizeof(type), \ + (__force type *)(src)); \ + if (unlikely(__gu_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + type __pu_val; \ + int __pu_err = 0; \ + \ + __pu_val = *(__force type *)(src); \ + __put_kernel_common(((type *)(dst)), sizeof(type)); \ + if (unlikely(__pu_err)) \ + goto err_label; \ +} while (0) + +extern unsigned long __copy_user(void *to, const void *from, __kernel_size_t n); + +static inline unsigned long __must_check +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return __copy_user(to, from, n); +} + +static inline unsigned long __must_check +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return __copy_user(to, from, n); +} + +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER + +/* + * __clear_user: - Zero a block of memory in user space, with less checking. + * @addr: Destination address, in user space. + * @to: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +extern unsigned long __clear_user(void __user *addr, __kernel_size_t size); + +#define clear_user(addr, n) \ +({ \ + void __user *__cl_addr = (addr); \ + unsigned long __cl_size = (n); \ + if (__cl_size && access_ok(__cl_addr, __cl_size)) \ + __cl_size = __clear_user(__cl_addr, __cl_size); \ + __cl_size; \ +}) + +extern long __strncpy_from_user(char *to, const char __user *from, long len); + +/* + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @to: Destination address, in kernel space. This buffer must be at + * least @len bytes long. + * @from: Source address, in user space. + * @len: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @len is smaller than the length of the string, copies @len bytes + * and returns @len. + */ +static inline long +strncpy_from_user(char *to, const char __user *from, long len) +{ + if (!access_ok(from, len)) + return -EFAULT; + + might_fault(); + return __strncpy_from_user(to, from, len); +} + +extern long __strnlen_user(const char __user *s, long n); + +/* + * strnlen_user: - Get the size of a string in user space. + * @s: The string to measure. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ +static inline long strnlen_user(const char __user *s, long n) +{ + if (!access_ok(s, 1)) + return 0; + + might_fault(); + return __strnlen_user(s, n); +} + +#endif /* _ASM_UACCESS_H */ diff --git a/arch/loongarch/include/asm/unistd.h b/arch/loongarch/include/asm/unistd.h new file mode 100644 index 000000000000..b21b66cca8d9 --- /dev/null +++ b/arch/loongarch/include/asm/unistd.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#include + +#define NR_syscalls (__NR_syscalls) diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h new file mode 100644 index 000000000000..93d47adba469 --- /dev/null +++ b/arch/loongarch/include/asm/vdso.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#include +#include + +#include + +/** + * struct loongarch_vdso_info - Details of a VDSO image. + * @vdso: Pointer to VDSO image (page-aligned). + * @size: Size of the VDSO image (page-aligned). + * @off_rt_sigreturn: Offset of the rt_sigreturn() trampoline. + * @code_mapping: Special mapping structure for vdso code. + * @code_mapping: Special mapping structure for vdso data. + * + * This structure contains details of a VDSO image, including the image data + * and offsets of certain symbols required by the kernel. It is generated as + * part of the VDSO build process, aside from the mapping page array, which is + * populated at runtime. + */ +struct loongarch_vdso_info { + void *vdso; + unsigned long size; + unsigned long offset_sigreturn; + struct vm_special_mapping code_mapping; + struct vm_special_mapping data_mapping; +}; + +extern struct loongarch_vdso_info vdso_info; + +#endif /* __ASM_VDSO_H */ diff --git a/arch/loongarch/include/asm/vdso/clocksource.h b/arch/loongarch/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..13cd580d406d --- /dev/null +++ b/arch/loongarch/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __ASM_VDSOCLOCKSOURCE_H +#define __ASM_VDSOCLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_CPU + +#endif /* __ASM_VDSOCLOCKSOURCE_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..66e9a3b8b834 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline long gettimeofday_fallback( + struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct __kernel_old_timeval *tv asm("a0") = _tv; + register struct timezone *tz asm("a1") = _tz; + register long nr asm("a7") = __NR_gettimeofday; + register long ret asm("v0"); + + asm volatile( + " syscall 0\n" + : "=r" (ret) + : "r" (nr), "r" (tv), "r" (tz) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline long clock_gettime_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long nr asm("a7") = __NR_clock_gettime; + register long ret asm("v0"); + + asm volatile( + " syscall 0\n" + : "=r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline int clock_getres_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long nr asm("a7") = __NR_clock_getres; + register long ret asm("v0"); + + asm volatile( + " syscall 0\n" + : "=r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + unsigned int count; + + __asm__ __volatile__( + " rdtime.d %0, $zero\n" + : "=r" (count)); + + return count; +} + +static inline bool loongarch_vdso_hres_capable(void) +{ + return true; +} +#define __arch_vdso_hres_capable loongarch_vdso_hres_capable + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return get_vdso_data(); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h new file mode 100644 index 000000000000..99aa47ca40e4 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +#define cpu_relax() barrier() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h new file mode 100644 index 000000000000..3ddfde3f7557 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +static inline unsigned long get_vdso_base(void) +{ + unsigned long addr; + + __asm__( + " la.pcrel %0, _start \n" + : "=r" (addr) + : + :); + + return addr; +} + +static inline const struct vdso_data *get_vdso_data(void) +{ + return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE); +} + +#endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..5de615383a22 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__loongarch_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __loongarch_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/include/asm/vermagic.h b/arch/loongarch/include/asm/vermagic.h new file mode 100644 index 000000000000..aacbce545a79 --- /dev/null +++ b/arch/loongarch/include/asm/vermagic.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_PROC_FAMILY "LOONGARCH " + +#ifdef CONFIG_32BIT +#define MODULE_KERNEL_TYPE "32BIT " +#elif defined CONFIG_64BIT +#define MODULE_KERNEL_TYPE "64BIT " +#endif + +#define MODULE_ARCH_VERMAGIC \ + MODULE_PROC_FAMILY MODULE_KERNEL_TYPE + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/loongarch/include/asm/vmalloc.h b/arch/loongarch/include/asm/vmalloc.h new file mode 100644 index 000000000000..965a0d41ac2d --- /dev/null +++ b/arch/loongarch/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_LOONGARCH_VMALLOC_H +#define _ASM_LOONGARCH_VMALLOC_H + +#endif /* _ASM_LOONGARCH_VMALLOC_H */ diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..4aa680ca2e5f --- /dev/null +++ b/arch/loongarch/include/uapi/asm/Kbuild @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +generic-y += kvm_para.h diff --git a/arch/loongarch/include/uapi/asm/auxvec.h b/arch/loongarch/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..bb0c71b4d64c --- /dev/null +++ b/arch/loongarch/include/uapi/asm/auxvec.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __ASM_AUXVEC_H +#define __ASM_AUXVEC_H + +/* Location of VDSO image. */ +#define AT_SYSINFO_EHDR 33 + +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + +#endif /* __ASM_AUXVEC_H */ diff --git a/arch/loongarch/include/uapi/asm/bitfield.h b/arch/loongarch/include/uapi/asm/bitfield.h new file mode 100644 index 000000000000..1bdadee88617 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bitfield.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef __UAPI_ASM_BITFIELD_H +#define __UAPI_ASM_BITFIELD_H + +/* + * * Damn ... bitfields depend from byteorder :-( + * */ +#define __BITFIELD_FIELD(field, more) \ + more \ + field; + +#endif /* __UAPI_ASM_BITFIELD_H */ diff --git a/arch/loongarch/include/uapi/asm/bitsperlong.h b/arch/loongarch/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..5c2c8779a695 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_LOONGARCH_BITSPERLONG_H +#define __ASM_LOONGARCH_BITSPERLONG_H + +#define __BITS_PER_LONG _LOONGARCH_SZLONG + +#include + +#endif /* __ASM_LOONGARCH_BITSPERLONG_H */ diff --git a/arch/loongarch/include/uapi/asm/break.h b/arch/loongarch/include/uapi/asm/break.h new file mode 100644 index 000000000000..414b0f49e8ea --- /dev/null +++ b/arch/loongarch/include/uapi/asm/break.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#ifndef __UAPI_ASM_BREAK_H +#define __UAPI_ASM_BREAK_H + +#define BRK_DEFAULT 0 /* Used as default */ +#define BRK_BUG 1 /* Used by BUG() */ +#define BRK_KDB 2 /* Used in KDB_ENTER() */ +#define BRK_MATHEMU 3 /* Used by FPU emulator */ +#define BRK_USERBP 4 /* User bp (used by debuggers) */ +#define BRK_SSTEPBP 5 /* User bp (used by debuggers) */ +#define BRK_OVERFLOW 6 /* Overflow check */ +#define BRK_DIVZERO 7 /* Divide by zero check */ +#define BRK_RANGE 8 /* Range error check */ +#define BRK_MULOVFL 9 /* Multiply overflow */ +#define BRK_KPROBE_BP 10 /* Kprobe break */ +#define BRK_KPROBE_SSTEPBP 11 /* Kprobe single step break */ +#define BRK_UPROBE_BP 12 /* See */ +#define BRK_UPROBE_XOLBP 13 /* See */ + +#endif /* __UAPI_ASM_BREAK_H */ diff --git a/arch/loongarch/include/uapi/asm/byteorder.h b/arch/loongarch/include/uapi/asm/byteorder.h new file mode 100644 index 000000000000..bbb85c62740c --- /dev/null +++ b/arch/loongarch/include/uapi/asm/byteorder.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef _ASM_BYTEORDER_H +#define _ASM_BYTEORDER_H + +#include + +#endif /* _ASM_BYTEORDER_H */ diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h new file mode 100644 index 000000000000..8840b72fa8e8 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_HWCAP_H +#define _UAPI_ASM_HWCAP_H + +/* HWCAP flags */ +#define HWCAP_LOONGARCH_CPUCFG (1 << 0) +#define HWCAP_LOONGARCH_LAM (1 << 1) +#define HWCAP_LOONGARCH_UAL (1 << 2) +#define HWCAP_LOONGARCH_FPU (1 << 3) +#define HWCAP_LOONGARCH_LSX (1 << 4) +#define HWCAP_LOONGARCH_LASX (1 << 5) +#define HWCAP_LOONGARCH_CRC32 (1 << 6) +#define HWCAP_LOONGARCH_COMPLEX (1 << 7) +#define HWCAP_LOONGARCH_CRYPTO (1 << 8) +#define HWCAP_LOONGARCH_LVZ (1 << 9) +#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) +#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) +#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) + +#endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/include/uapi/asm/inst.h b/arch/loongarch/include/uapi/asm/inst.h new file mode 100644 index 000000000000..4682f9ab6dc6 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/inst.h @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Format of an instruction in memory. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _UAPI_ASM_INST_H +#define _UAPI_ASM_INST_H + +#include + +enum reg0_op { + tlbclr_op = 0x19208, gtlbclr_op = 0x19208, + tlbflush_op = 0x19209, gtlbflush_op = 0x19209, + tlbsrch_op = 0x1920a, gtlbsrch_op = 0x1920a, + tlbrd_op = 0x1920b, gtlbrd_op = 0x1920b, + tlbwr_op = 0x1920c, gtlbwr_op = 0x1920c, + tlbfill_op = 0x1920d, gtlbfill_op = 0x1920d, + ertn_op = 0x1920e, +}; + +enum reg0i15_op { + break_op = 0x54, dbcl_op, syscall_op, hypcall_op, + idle_op = 0xc91, dbar_op = 0x70e4, ibar_op, +}; + +enum reg0i26_op { + b_op = 0x14, bl_op, +}; + +enum reg1i20_op { + lu12iw_op = 0xa, lu32id_op, pcaddi_op, pcalau12i_op, + pcaddu12i_op, pcaddu18i_op, +}; + +enum reg1i21_op { + beqz_op = 0x10, bnez_op, bceqz_op, bcnez_op = 0x12, jiscr0_op = 0x12, jiscr1_op = 0x12, +}; + +enum reg2_op { + gr2scr_op = 0x2, scr2gr_op, clow_op, + clzw_op, ctow_op, ctzw_op, clod_op, + clzd_op, ctod_op, ctzd_op, revb2h_op, + revb4h_op, revb2w_op, revbd_op, revh2w_op, + revhd_op, bitrev4b_op, bitrev8b_op, bitrevw_op, + bitrevd_op, extwh_op, extwb_op, rdtimelw_op, + rdtimehw_op, rdtimed_op, cpucfg_op, + iocsrrdb_op = 0x19200, iocsrrdh_op, iocsrrdw_op, iocsrrdd_op, + iocsrwrb_op, iocsrwrh_op, iocsrwrw_op, iocsrwrd_op, + movgr2fcsr_op = 0x4530, movfcsr2gr_op = 0x4532, + movgr2cf_op = 0x4536, movcf2gr_op = 0x4537, +}; + +enum reg2ui3_op { + rotrib_op = 0x261, rcrib_op = 0x281, +}; + +enum reg2ui4_op { + rotrih_op = 0x131, rcrih_op = 0x141, +}; + +enum reg2ui5_op { + slliw_op = 0x81, srliw_op = 0x89, sraiw_op = 0x91, rotriw_op = 0x99, + rcriw_op = 0xa1, +}; + +enum reg2ui6_op { + sllid_op = 0x41, srlid_op = 0x45, sraid_op = 0x49, rotrid_op = 0x4d, + rcrid_op = 0x51, +}; + +enum reg2lsbw_op { + bstrinsw_op = 0x3, bstrpickw_op = 0x3, +}; + +enum reg2lsbd_op { + bstrinsd_op = 0x2, bstrpickd_op = 0x3, +}; + +enum reg2i8_op { + lddir_op = 0x190, ldpte_op, +}; + +enum reg2i8idx1_op { + vstelmd_op = 0x622, +}; + +enum reg2i8idx2_op { + vstelmw_op = 0x312, xvstelmd_op = 0x331, +}; + +enum reg2i8idx3_op { + vstelmh_op = 0x18a, xvstelmw_op = 0x199, +}; + +enum reg2i8idx4_op { + vstelmb_op = 0xc6, xvstelmh_op = 0xcd, +}; + +enum reg2i8idx5_op { + xvstelmb_op = 0x67, +}; + +enum reg2i9_op { + vldrepld_op = 0x602, xvldrepld_op = 0x642, +}; + +enum reg2i10_op { + vldreplw_op = 0x302, xvldreplw_op = 0x322, +}; + +enum reg2i11_op { + vldreplh_op = 0x182, xvldreplh_op = 0x192, +}; + +enum reg2i12_op { + slti_op = 0x8, sltui_op, addiw_op, addid_op, + lu52id_op, andi_op, ori_op, xori_op, + cacop_op = 0x18, xvldreplb_op = 0xca, + ldb_op = 0xa0, ldh_op, ldw_op, ldd_op, stb_op, sth_op, + stw_op, std_op, ldbu_op, ldhu_op, ldwu_op, preld_op, + flds_op, fsts_op, fldd_op, fstd_op, vld_op, vst_op, xvld_op, + xvst_op, ldlw_op = 0xb8, ldrw_op, ldld_op, ldrd_op, stlw_op, + strw_op, stld_op, strd_op, vldreplb_op = 0xc2, +}; + +enum reg2i14_op { + llw_op = 0x20, scw_op, lld_op, scd_op, ldptrw_op, stptrw_op, + ldptrd_op, stptrd_op, +}; + +enum reg2i16_op { + addu16id_op = 0x4, jirl_op = 0x13, beq_op = 0x16, bne_op, blt_op, bge_op, bltu_op, bgeu_op, +}; + +enum reg2csr_op { + csrrd_op = 0x4, csrwr_op = 0x4, csrxchg_op = 0x4, + gcsrrd_op = 0x5, gcsrwr_op = 0x5, gcsrxchg_op = 0x5, +}; + +enum reg3_op { + asrtled_op = 0x2, asrtgtd_op, + addw_op = 0x20, addd_op, subw_op, subd_op, + slt_op, sltu_op, maskeqz_op, masknez_op, + nor_op, and_op, or_op, xor_op, orn_op, + andn_op, sllw_op, srlw_op, sraw_op, slld_op, + srld_op, srad_op, rotrb_op, rotrh_op, + rotrw_op, rotrd_op, mulw_op, mulhw_op, + mulhwu_op, muld_op, mulhd_op, mulhdu_op, + mulwdw_op, mulwdwu_op, divw_op, modw_op, + divwu_op, modwu_op, divd_op, modd_op, + divdu_op, moddu_op, crcwbw_op, + crcwhw_op, crcwww_op, crcwdw_op, crccwbw_op, + crccwhw_op, crccwww_op, crccwdw_op, addu12iw_op, + addu12id_op, + adcb_op = 0x60, adch_op, adcw_op, adcd_op, + sbcb_op, sbch_op, sbcw_op, sbcd_op, + rcrb_op, rcrh_op, rcrw_op, rcrd_op, + ldxb_op = 0x7000, ldxh_op = 0x7008, ldxw_op = 0x7010, ldxd_op = 0x7018, + stxb_op = 0x7020, stxh_op = 0x7028, stxw_op = 0x7030, stxd_op = 0x7038, + ldxbu_op = 0x7040, ldxhu_op = 0x7048, ldxwu_op = 0x7050, + preldx_op = 0x7058, fldxs_op = 0x7060, fldxd_op = 0x7068, + fstxs_op = 0x7070, fstxd_op = 0x7078, vldx_op = 0x7080, + vstx_op = 0x7088, xvldx_op = 0x7090, xvstx_op = 0x7098, + amswapw_op = 0x70c0, amswapd_op, amaddw_op, amaddd_op, amandw_op, + amandd_op, amorw_op, amord_op, amxorw_op, amxord_op, ammaxw_op, + ammaxd_op, amminw_op, ammind_op, ammaxwu_op, ammaxdu_op, + amminwu_op, ammindu_op, amswap_dbw_op, amswap_dbd_op, amadd_dbw_op, + amadd_dbd_op, amand_dbw_op, amand_dbd_op, amor_dbw_op, amor_dbd_op, + amxor_dbw_op, amxor_dbd_op, ammax_dbw_op, ammax_dbd_op, ammin_dbw_op, + ammin_dbd_op, ammax_dbwu_op, ammax_dbdu_op, ammin_dbwu_op, + ammin_dbdu_op, fldgts_op = 0x70e8, fldgtd_op, + fldles_op, fldled_op, fstgts_op, fstgtd_op, fstles_op, fstled_op, + ldgtb_op, ldgth_op, ldgtw_op, ldgtd_op, ldleb_op, ldleh_op, ldlew_op, + ldled_op, stgtb_op, stgth_op, stgtw_op, stgtd_op, stleb_op, stleh_op, + stlew_op, stled_op, +}; + +enum reg3sa2_op { + alslw_op = 0x2, alslwu_op, bytepickw_op, alsld_op = 0x16, + +}; + +enum reg3sa3_op { + bytepickd_op = 0x3, +}; + +struct reg2_format { + __BITFIELD_FIELD(unsigned int opcode : 22, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg2ui3_format { + __BITFIELD_FIELD(unsigned int opcode : 19, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui4_format { + __BITFIELD_FIELD(unsigned int opcode : 18, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui5_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui6_format { + __BITFIELD_FIELD(unsigned int opcode : 16, + __BITFIELD_FIELD(unsigned int simmediate : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2lsbw_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int msbw : 5, + __BITFIELD_FIELD(unsigned int op : 1, + __BITFIELD_FIELD(unsigned int lsbw : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))))) +}; + +struct reg2lsbd_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int msbd : 6, + __BITFIELD_FIELD(unsigned int lsbd : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg3sa2_format { + __BITFIELD_FIELD(unsigned int opcode : 15, + __BITFIELD_FIELD(unsigned int simmediate : 2, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa3_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa4_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg4_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int fa : 5, + __BITFIELD_FIELD(unsigned int fk : 5, + __BITFIELD_FIELD(unsigned int fj : 5, + __BITFIELD_FIELD(unsigned int fd : 5, + ;))))) +}; + +struct reg2i8_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i8idx1_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int idx : 1, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx2_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int idx : 2, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx3_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int idx : 3, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx4_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int idx : 4, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx5_format { + __BITFIELD_FIELD(unsigned int opcode : 9, + __BITFIELD_FIELD(unsigned int idx : 5, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i9_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 9, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i10_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int simmediate : 10, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i11_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int simmediate : 11, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(signed int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2u12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i14_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int simmediate : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i16_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2csr_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int csr : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg1i21_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int simmediate_h : 5, + ;)))) +}; + +struct reg1i20_format { + __BITFIELD_FIELD(unsigned int opcode : 7, + __BITFIELD_FIELD(unsigned int simmediate : 20, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg0i15_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 15, + ;)) +}; + +struct reg0i26_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int simmediate_h : 10, + ;))) +}; + +union loongarch_instruction { + unsigned int word; + unsigned short halfword[2]; + unsigned char byte[4]; + struct reg2_format reg2_format; + struct reg2ui3_format reg2ui3_format; + struct reg2ui4_format reg2ui4_format; + struct reg2ui5_format reg2ui5_format; + struct reg2ui6_format reg2ui6_format; + struct reg2lsbw_format reg2lsbw_format; + struct reg2lsbd_format reg2lsbd_format; + struct reg3_format reg3_format; + struct reg3sa2_format reg3sa2_format; + struct reg3sa3_format reg3sa3_format; + struct reg3sa4_format reg3sa4_format; + struct reg4_format reg4_format; + struct reg2i8_format reg2i8_format; + struct reg2i8idx1_format reg2i8idx1_format; + struct reg2i8idx2_format reg2i8idx2_format; + struct reg2i8idx3_format reg2i8idx3_format; + struct reg2i8idx4_format reg2i8idx4_format; + struct reg2i8idx5_format reg2i8idx5_format; + struct reg2i9_format reg2i9_format; + struct reg2i10_format reg2i10_format; + struct reg2i11_format reg2i11_format; + struct reg2i12_format reg2i12_format; + struct reg2i14_format reg2i14_format; + struct reg2i16_format reg2i16_format; + struct reg2csr_format reg2csr_format; + struct reg1i21_format reg1i21_format; + struct reg1i20_format reg1i20_format; + struct reg0i15_format reg0i15_format; + struct reg0i26_format reg0i26_format; +}; + +#endif /* _UAPI_ASM_INST_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..dc78bbeab3b0 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Huacai Chen +*/ +#ifndef _UAPI_ASM_PTRACE_H +#define _UAPI_ASM_PTRACE_H + +#include + +#ifndef __KERNEL__ +#include +#endif + +/* + * For PTRACE_{POKE,PEEK}USR. 0 - 31 are GPRs, + * 32 is syscall's original ARG0, 33 is PC, 34 is BADVADDR. + */ +#define GPR_BASE 0 +#define GPR_NUM 32 +#define GPR_END (GPR_BASE + GPR_NUM - 1) +#define PC (GPR_END + 1) +#define BADVADDR (GPR_END + 2) + +#define NUM_FPU_REGS 32 + +struct user_pt_regs { + /* Main processor registers. */ + unsigned long regs[32]; + + /* Special CSR registers. */ + unsigned long csr_era; + unsigned long csr_badv; + unsigned long reserved[11]; +} __attribute__((aligned(8))); + +struct user_fp_state { + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; +}; + +#define PTRACE_SYSEMU 0x1f +#define PTRACE_SYSEMU_SINGLESTEP 0x20 + +#endif /* _UAPI_ASM_PTRACE_H */ diff --git a/arch/loongarch/include/uapi/asm/reg.h b/arch/loongarch/include/uapi/asm/reg.h new file mode 100644 index 000000000000..12f498f22157 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/reg.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Various register offset definitions for debuggers, core file + * examiners and whatnot. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __UAPI_ASM_LOONGARCH_REG_H +#define __UAPI_ASM_LOONGARCH_REG_H + +#define LOONGARCH_EF_R0 0 +#define LOONGARCH_EF_R1 1 +#define LOONGARCH_EF_R2 2 +#define LOONGARCH_EF_R3 3 +#define LOONGARCH_EF_R4 4 +#define LOONGARCH_EF_R5 5 +#define LOONGARCH_EF_R6 6 +#define LOONGARCH_EF_R7 7 +#define LOONGARCH_EF_R8 8 +#define LOONGARCH_EF_R9 9 +#define LOONGARCH_EF_R10 10 +#define LOONGARCH_EF_R11 11 +#define LOONGARCH_EF_R12 12 +#define LOONGARCH_EF_R13 13 +#define LOONGARCH_EF_R14 14 +#define LOONGARCH_EF_R15 15 +#define LOONGARCH_EF_R16 16 +#define LOONGARCH_EF_R17 17 +#define LOONGARCH_EF_R18 18 +#define LOONGARCH_EF_R19 19 +#define LOONGARCH_EF_R20 20 +#define LOONGARCH_EF_R21 21 +#define LOONGARCH_EF_R22 22 +#define LOONGARCH_EF_R23 23 +#define LOONGARCH_EF_R24 24 +#define LOONGARCH_EF_R25 25 +#define LOONGARCH_EF_R26 26 +#define LOONGARCH_EF_R27 27 +#define LOONGARCH_EF_R28 28 +#define LOONGARCH_EF_R29 29 +#define LOONGARCH_EF_R30 30 +#define LOONGARCH_EF_R31 31 + +/* + * Saved special registers + */ +#define LOONGARCH_EF_CSR_ERA 32 +#define LOONGARCH_EF_CSR_BADV 33 +#define LOONGARCH_EF_CSR_CRMD 34 +#define LOONGARCH_EF_CSR_PRMD 35 +#define LOONGARCH_EF_CSR_EUEN 36 +#define LOONGARCH_EF_CSR_ECFG 37 +#define LOONGARCH_EF_CSR_ESTAT 38 + +#define LOONGARCH_EF_SIZE 312 /* size in bytes */ + +#endif /* __UAPI_ASM_LOONGARCH_REG_H */ diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000000..d9bf4a937f61 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef _UAPI_ASM_SIGCONTEXT_H +#define _UAPI_ASM_SIGCONTEXT_H + +#include +#include + +/* FP context was used */ +#define USED_FP (1 << 0) +#define ADRERR_RD (1 << 30) +#define ADRERR_WR (1 << 31) + +#define FPU_REG_WIDTH 256 +#define FPU_ALIGN __attribute__((aligned(32))) + +struct sigcontext { + __u64 sc_pc; + __u64 sc_regs[32]; + __u32 sc_flags; + __u32 sc_fcsr; + __u32 sc_vcsr; + __u64 sc_fcc; + /* For Binary Translation */ + __u64 sc_scr[4]; + union { + __u32 val32[FPU_REG_WIDTH / 32]; + __u64 val64[FPU_REG_WIDTH / 64]; + } sc_fpregs[32] FPU_ALIGN; + + /* Reserved for future scalable vectors */ + __u8 sc_reserved[4096] __attribute__((__aligned__(16))); +}; + +#endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/include/uapi/asm/signal.h b/arch/loongarch/include/uapi/asm/signal.h new file mode 100644 index 000000000000..c1333dc49f82 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/signal.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _UAPI_ASM_SIGNAL_H +#define _UAPI_ASM_SIGNAL_H + +#define MINSIGSTKSZ 4096 +#define SIGSTKSZ 16384 + +#include + +#endif diff --git a/arch/loongarch/include/uapi/asm/swab.h b/arch/loongarch/include/uapi/asm/swab.h new file mode 100644 index 000000000000..ea16e98e18a6 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/swab.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technologies, Inc. All rights reserved. + * Authors: Jun Yi + */ +#ifndef _ASM_SWAB_H +#define _ASM_SWAB_H + +#include +#include + +#define __SWAB_64_THRU_32__ + +static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + __asm__( + " revb.2h %0, %1 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab16 __arch_swab16 + +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + __asm__( + " revb.2h %0, %1 \n" + " rotri.w %0, %0, 16 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab32 __arch_swab32 + +#ifdef __loongarch64 +static inline __attribute_const__ __u64 __arch_swab64(__u64 x) +{ + __asm__( + " revb.4h %0, %1 \n" + " revh.d %0, %0 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab64 __arch_swab64 +#endif /* __loongarch64 */ +#endif /* _ASM_SWAB_H */ diff --git a/arch/loongarch/include/uapi/asm/ucontext.h b/arch/loongarch/include/uapi/asm/ucontext.h new file mode 100644 index 000000000000..19ebdb033282 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/ucontext.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LOONGARCH_UAPI_ASM_UCONTEXT_H +#define __LOONGARCH_UAPI_ASM_UCONTEXT_H + +/** + * struct ucontext - user context structure + * @uc_flags: + * @uc_link: + * @uc_stack: + * @uc_mcontext: holds basic processor state + * @uc_sigmask: + * @uc_extcontext: holds extended processor state + */ +struct ucontext { + /* Historic fields matching asm-generic */ + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; + /** + * There's some padding here to allow sigset_t to be expanded in the + * future. Though this is unlikely, other architectures put uc_sigmask + * at the end of this structure and explicitly state it can be + * expanded, so we didn't want to box ourselves in here. + */ + __u8 __unused[1024 / 8 - sizeof(sigset_t)]; +}; + +#endif /* __LOONGARCH_UAPI_ASM_UCONTEXT_H */ diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..b344b1f91715 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile new file mode 100644 index 000000000000..de13ac4fd2cb --- /dev/null +++ b/arch/loongarch/kernel/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/LoongArch kernel. +# + +extra-y := head.o vmlinux.lds + +obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ + process.o ptrace.o reset.o setup.o signal.o io.o \ + syscall.o time.o topology.o traps.o unaligned.o \ + cmdline.o switch.o cacheinfo.o cmpxchg.o vdso.o + +obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_EFI) += efi.o + +obj-$(CONFIG_MODULES) += module.o + +obj-$(CONFIG_CPU_HAS_FPU) += fpu.o + +obj-$(CONFIG_PROC_FS) += proc.o + +CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/access-helper.h b/arch/loongarch/kernel/access-helper.h new file mode 100644 index 000000000000..4a35ca81bd08 --- /dev/null +++ b/arch/loongarch/kernel/access-helper.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +static inline int __get_inst(u32 *i, u32 *p, bool user) +{ + return user ? get_user(*i, (u32 __user *)p) : get_kernel_nofault(*i, p); +} + +static inline int __get_addr(unsigned long *a, unsigned long *p, bool user) +{ + return user ? get_user(*a, (unsigned long __user *)p) : get_kernel_nofault(*a, p); +} diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c new file mode 100644 index 000000000000..5172581441da --- /dev/null +++ b/arch/loongarch/kernel/acpi.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * acpi.c - Architecture-Specific Low-Level ACPI Boot Support + * + * Author: Jianmin Lv + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MSI_MSG_ADDRESS 0x2FF00000 +#define MSI_MSG_DEFAULT_COUNT 0xC0 + +int acpi_disabled; +EXPORT_SYMBOL(acpi_disabled); +int acpi_noirq; +int acpi_pci_disabled; +EXPORT_SYMBOL(acpi_pci_disabled); +int acpi_strict = 1; /* We have no workarounds on LoongArch */ +int num_processors; +int disabled_cpus; +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; + +u64 acpi_saved_sp; + +#define MAX_CORE_PIC 256 + +#define PREFIX "ACPI: " + +struct acpi_madt_lio_pic liointc_default = { + .address = LOONGSON_REG_BASE + 0x1400, + .size = 256, + .cascade = {2, 3}, + .cascade_map = {0x00FFFFFF, 0xff000000}, +}; + +struct acpi_madt_lpc_pic pchlpc_default = { + .address = LS7A_LPC_REG_BASE, + .size = SZ_4K, + .cascade = 19, +}; + +struct acpi_madt_eio_pic eiointc_default[MAX_IO_PICS]; +struct acpi_madt_msi_pic pchmsi_default[MAX_IO_PICS]; +struct acpi_madt_bio_pic pchpic_default[MAX_IO_PICS]; + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) +{ + if (irqp != NULL) + *irqp = acpi_register_gsi(NULL, gsi, -1, -1); + return (*irqp >= 0) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); + +int acpi_isa_irq_to_gsi(unsigned int isa_irq, u32 *gsi) +{ + if (gsi) + *gsi = isa_irq; + return 0; +} + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + int id; + struct irq_fwspec fwspec; + + switch (gsi) { + case GSI_MIN_CPU_IRQ ... GSI_MAX_CPU_IRQ: + fwspec.fwnode = liointc_domain->fwnode; + fwspec.param[0] = gsi - GSI_MIN_CPU_IRQ; + fwspec.param_count = 1; + + return irq_create_fwspec_mapping(&fwspec); + + case GSI_MIN_LPC_IRQ ... GSI_MAX_LPC_IRQ: + if (!pch_lpc_domain) + return -EINVAL; + + fwspec.fwnode = pch_lpc_domain->fwnode; + fwspec.param[0] = gsi - GSI_MIN_LPC_IRQ; + fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity); + fwspec.param_count = 2; + + return irq_create_fwspec_mapping(&fwspec); + + case GSI_MIN_PCH_IRQ ... GSI_MAX_PCH_IRQ: + id = find_pch_pic(gsi); + if (id < 0) + return -EINVAL; + + fwspec.fwnode = pch_pic_domain[id]->fwnode; + fwspec.param[0] = gsi - acpi_pchpic[id]->gsi_base; + fwspec.param[1] = IRQ_TYPE_LEVEL_HIGH; + fwspec.param_count = 2; + + return irq_create_fwspec_mapping(&fwspec); + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(acpi_register_gsi); + +void acpi_unregister_gsi(u32 gsi) +{ + +} +EXPORT_SYMBOL_GPL(acpi_unregister_gsi); + +void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) +{ + + if (!phys || !size) + return NULL; + + return early_memremap(phys, size); +} +void __init __acpi_unmap_table(void __iomem *map, unsigned long size) +{ + if (!map || !size) + return; + + early_memunmap(map, size); +} + +void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + if (!memblock_is_memory(phys)) + return ioremap(phys, size); + else + return ioremap_cache(phys, size); +} + +void __init acpi_boot_table_init(void) +{ + /* + * If acpi_disabled, bail out + */ + if (acpi_disabled) + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } +} + +static int __init +acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_core_pic *processor = NULL; + + processor = (struct acpi_madt_core_pic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + + return 0; +} + +static int __init +acpi_parse_lapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + set_processor_mask(processor->id, processor->lapic_flags); + + return 0; +} + +static int __init acpi_parse_madt_lapic(void) +{ + int ret; + struct acpi_subtable_proc madt_proc[1]; + + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), + MAX_CORE_PIC); + if (ret < 0) { + pr_err(PREFIX "Error parsing LAPIC entries\n"); + return ret; + } + + return 0; +} + +static int __init +acpi_parse_liointc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_lio_pic *liointc = NULL; + + liointc = (struct acpi_madt_lio_pic *)header; + + if (BAD_MADT_ENTRY(liointc, end)) + return -EINVAL; + + acpi_liointc = liointc; + + return 0; +} + +static int __init +acpi_parse_eiointc(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id; + struct acpi_madt_eio_pic *eiointc = NULL; + + eiointc = (struct acpi_madt_eio_pic *)header; + + if (BAD_MADT_ENTRY(eiointc, end)) + return -EINVAL; + + acpi_eiointc[id++] = eiointc; + loongson_sysconf.nr_io_pics = id; + + return 0; +} + +static int __init +acpi_parse_htintc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_ht_pic *htintc = NULL; + + htintc = (struct acpi_madt_ht_pic *)header; + + if (BAD_MADT_ENTRY(htintc, end)) + return -EINVAL; + + acpi_htintc = htintc; + loongson_sysconf.nr_io_pics = 1; + + return 0; +} + +static int __init +acpi_parse_pch_pic(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id; + struct acpi_madt_bio_pic *pchpic = NULL; + + pchpic = (struct acpi_madt_bio_pic *)header; + + if (BAD_MADT_ENTRY(pchpic, end)) + return -EINVAL; + + acpi_pchpic[id++] = pchpic; + + return 0; +} + +static int __init +acpi_parse_pch_msi(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id; + struct acpi_madt_msi_pic *pchmsi = NULL; + + pchmsi = (struct acpi_madt_msi_pic *)header; + + if (BAD_MADT_ENTRY(pchmsi, end)) + return -EINVAL; + + acpi_pchmsi[id++] = pchmsi; + + return 0; +} + +static int __init +acpi_parse_pch_lpc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_lpc_pic *pchlpc = NULL; + + pchlpc = (struct acpi_madt_lpc_pic *)header; + + if (BAD_MADT_ENTRY(pchlpc, end)) + return -EINVAL; + + acpi_pchlpc = pchlpc; + + return 0; +} + +static __init int bad_pch_pic(unsigned long address) +{ + if (loongson_sysconf.nr_io_pics >= MAX_IO_PICS) { + pr_warn("WARNING: Max # of I/O PCH_PICs (%d) exceeded (found %d), skipping\n", + MAX_IO_PICS, loongson_sysconf.nr_io_pics); + return 1; + } + if (!address) { + pr_warn("WARNING: Bogus (zero) I/O PCH_PIC address found in table, skipping!\n"); + return 1; + } + return 0; +} + +void __init register_default_pic(int id, u32 address, u32 irq_base) +{ + int idx, entries; + unsigned long addr; + + if (bad_pch_pic(address)) + return; + + idx = loongson_sysconf.nr_io_pics; + + pchpic_default[idx].address = address; + if (idx) + pchpic_default[idx].address |= nid_to_addrbase(id) | HT1LO_OFFSET; + pchpic_default[idx].id = id; + pchpic_default[idx].version = 0; + pchpic_default[idx].size = 0x1000; + pchpic_default[idx].gsi_base = irq_base; + + + addr = pchpic_default[idx].address; + entries = (((unsigned long)ls7a_readq(addr) >> 48) & 0xff) + 1; + pchmsi_default[idx].msg_address = MSI_MSG_ADDRESS; + pchmsi_default[idx].start = entries; + pchmsi_default[idx].count = MSI_MSG_DEFAULT_COUNT; + + eiointc_default[idx].cascade = 3; + eiointc_default[idx].node = id; + eiointc_default[idx].node_map = 1; + + if (idx) { + eiointc_default[idx].cascade = 0x4; + eiointc_default[0].node_map = 0x1DF; + eiointc_default[idx].node_map = 0xFE20; + } + + acpi_pchpic[idx] = &pchpic_default[idx]; + acpi_pchmsi[idx] = &pchmsi_default[idx]; + acpi_eiointc[idx] = &eiointc_default[idx]; + + loongson_sysconf.nr_io_pics++; +} + +static int __init +acpi_parse_legacy_pch_pic(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_io_apic *pch_pic = NULL; + + pch_pic = (struct acpi_madt_io_apic *)header; + + if (BAD_MADT_ENTRY(pch_pic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + register_default_pic(pch_pic->id, pch_pic->address, + pch_pic->global_irq_base); + + return 0; +} + +/* + * Parse PCH_PIC related entries in MADT + * returns 0 on success, < 0 on error + */ +static int __init acpi_parse_madt_pch_pic_entries(void) +{ + int count; + + /* + * ACPI interpreter is required to complete interrupt setup, + * so if it is off, don't enumerate the io-apics with ACPI. + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ + if (acpi_disabled || acpi_noirq) + return -ENODEV; + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, + (void *)acpi_parse_legacy_pch_pic, MAX_IO_PICS); + if (!count) { + printk(KERN_ERR PREFIX "No PCH_PIC entries present\n"); + return -ENODEV; + } else if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing PCH_PIC entry\n"); + return count; + } + + return 0; +} + +static int legacy_madt_table_init(void) +{ + int error; + + /* Parse MADT LAPIC entries */ + error = acpi_parse_madt_lapic(); + if (!error) { + acpi_liointc = &liointc_default; + acpi_pchlpc = &pchlpc_default; + acpi_parse_madt_pch_pic_entries(); + } + if (error == -EINVAL) { + pr_err(PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + return -1; + } + + loongson_sysconf.nr_cpus = num_processors; + return 0; +} + +static void __init acpi_process_madt(void) +{ + int error; + + if (loongson_sysconf.bpi_ver <= BPI_VERSION_V1) { + error = legacy_madt_table_init(); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (legacy), ACPI disabled\n"); + } + return; + } + + /* Parse MADT CPUINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, acpi_parse_cpuintc, MAX_CORE_PIC); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (CPUINTC entries), ACPI disabled\n"); + return; + } + + loongson_sysconf.nr_cpus = num_processors; + + /* Parse MADT LIOINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_LIO_PIC, acpi_parse_liointc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (LIOINTC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT EIOINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, acpi_parse_eiointc, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (EIOINTC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT HTVEC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_HT_PIC, acpi_parse_htintc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (HTVEC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHPIC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC, acpi_parse_pch_pic, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHPIC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHMSI entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, acpi_parse_pch_msi, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHMSI entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHLPC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_LPC_PIC, acpi_parse_pch_lpc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHLPC entries), ACPI disabled\n"); + return; + } +} + +int __init acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + */ + if (acpi_disabled) + return -1; + + loongson_sysconf.boot_cpu_id = read_csr_cpuid(); + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + acpi_process_madt(); + + /* Do not enable ACPI SPCR console by default */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + + return 0; +} + +void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + memblock_reserve(addr, size); +} diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c new file mode 100644 index 000000000000..776051030a98 --- /dev/null +++ b/arch/loongarch/kernel/asm-offsets.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * asm-offsets.c: Calculate pt_regs and task_struct offsets. + * + * Copyright (C) 2020 Loongson Technologies, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void output_ptreg_defines(void) +{ + COMMENT("LoongArch pt_regs offsets."); + OFFSET(PT_R0, pt_regs, regs[0]); + OFFSET(PT_R1, pt_regs, regs[1]); + OFFSET(PT_R2, pt_regs, regs[2]); + OFFSET(PT_R3, pt_regs, regs[3]); + OFFSET(PT_R4, pt_regs, regs[4]); + OFFSET(PT_R5, pt_regs, regs[5]); + OFFSET(PT_R6, pt_regs, regs[6]); + OFFSET(PT_R7, pt_regs, regs[7]); + OFFSET(PT_R8, pt_regs, regs[8]); + OFFSET(PT_R9, pt_regs, regs[9]); + OFFSET(PT_R10, pt_regs, regs[10]); + OFFSET(PT_R11, pt_regs, regs[11]); + OFFSET(PT_R12, pt_regs, regs[12]); + OFFSET(PT_R13, pt_regs, regs[13]); + OFFSET(PT_R14, pt_regs, regs[14]); + OFFSET(PT_R15, pt_regs, regs[15]); + OFFSET(PT_R16, pt_regs, regs[16]); + OFFSET(PT_R17, pt_regs, regs[17]); + OFFSET(PT_R18, pt_regs, regs[18]); + OFFSET(PT_R19, pt_regs, regs[19]); + OFFSET(PT_R20, pt_regs, regs[20]); + OFFSET(PT_R21, pt_regs, regs[21]); + OFFSET(PT_R22, pt_regs, regs[22]); + OFFSET(PT_R23, pt_regs, regs[23]); + OFFSET(PT_R24, pt_regs, regs[24]); + OFFSET(PT_R25, pt_regs, regs[25]); + OFFSET(PT_R26, pt_regs, regs[26]); + OFFSET(PT_R27, pt_regs, regs[27]); + OFFSET(PT_R28, pt_regs, regs[28]); + OFFSET(PT_R29, pt_regs, regs[29]); + OFFSET(PT_R30, pt_regs, regs[30]); + OFFSET(PT_R31, pt_regs, regs[31]); + OFFSET(PT_CRMD, pt_regs, csr_crmd); + OFFSET(PT_PRMD, pt_regs, csr_prmd); + OFFSET(PT_EUEN, pt_regs, csr_euen); + OFFSET(PT_ECFG, pt_regs, csr_ecfg); + OFFSET(PT_ESTAT, pt_regs, csr_estat); + OFFSET(PT_ERA, pt_regs, csr_era); + OFFSET(PT_BVADDR, pt_regs, csr_badvaddr); + OFFSET(PT_ORIG_A0, pt_regs, orig_a0); + DEFINE(PT_SIZE, sizeof(struct pt_regs)); + BLANK(); +} + +void output_task_defines(void) +{ + COMMENT("LoongArch task_struct offsets."); + OFFSET(TASK_STATE, task_struct, state); + OFFSET(TASK_THREAD_INFO, task_struct, stack); + OFFSET(TASK_FLAGS, task_struct, flags); + OFFSET(TASK_MM, task_struct, mm); + OFFSET(TASK_PID, task_struct, pid); + DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); + BLANK(); +} + +void output_thread_info_defines(void) +{ + COMMENT("LoongArch thread_info offsets."); + OFFSET(TI_TASK, thread_info, task); + OFFSET(TI_FLAGS, thread_info, flags); + OFFSET(TI_TP_VALUE, thread_info, tp_value); + OFFSET(TI_CPU, thread_info, cpu); + OFFSET(TI_PRE_COUNT, thread_info, preempt_count); + OFFSET(TI_REGS, thread_info, regs); + DEFINE(_THREAD_SIZE, THREAD_SIZE); + DEFINE(_THREAD_MASK, THREAD_MASK); + DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); + DEFINE(_IRQ_STACK_START, IRQ_STACK_START); + BLANK(); +} + +void output_thread_defines(void) +{ + COMMENT("LoongArch specific thread_struct offsets."); + OFFSET(THREAD_REG01, task_struct, thread.reg01); + OFFSET(THREAD_REG03, task_struct, thread.reg03); + OFFSET(THREAD_REG22, task_struct, thread.reg22); + OFFSET(THREAD_REG23, task_struct, thread.reg23); + OFFSET(THREAD_REG24, task_struct, thread.reg24); + OFFSET(THREAD_REG25, task_struct, thread.reg25); + OFFSET(THREAD_REG26, task_struct, thread.reg26); + OFFSET(THREAD_REG27, task_struct, thread.reg27); + OFFSET(THREAD_REG28, task_struct, thread.reg28); + OFFSET(THREAD_REG29, task_struct, thread.reg29); + OFFSET(THREAD_REG30, task_struct, thread.reg30); + OFFSET(THREAD_REG31, task_struct, thread.reg31); + OFFSET(THREAD_CSRCRMD, task_struct, + thread.csr_crmd); + OFFSET(THREAD_CSRPRMD, task_struct, + thread.csr_prmd); + OFFSET(THREAD_CSREUEN, task_struct, + thread.csr_euen); + OFFSET(THREAD_CSRECFG, task_struct, + thread.csr_ecfg); + + OFFSET(THREAD_SCR0, task_struct, thread.scr0); + OFFSET(THREAD_SCR1, task_struct, thread.scr1); + OFFSET(THREAD_SCR2, task_struct, thread.scr2); + OFFSET(THREAD_SCR3, task_struct, thread.scr3); + + OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); + + OFFSET(THREAD_FPU, task_struct, thread.fpu); + + OFFSET(THREAD_BVADDR, task_struct, \ + thread.csr_badvaddr); + OFFSET(THREAD_ECODE, task_struct, \ + thread.error_code); + OFFSET(THREAD_TRAPNO, task_struct, thread.trap_nr); + BLANK(); +} + +void output_thread_fpu_defines(void) +{ + OFFSET(THREAD_FPR0, loongarch_fpu, fpr[0]); + OFFSET(THREAD_FPR1, loongarch_fpu, fpr[1]); + OFFSET(THREAD_FPR2, loongarch_fpu, fpr[2]); + OFFSET(THREAD_FPR3, loongarch_fpu, fpr[3]); + OFFSET(THREAD_FPR4, loongarch_fpu, fpr[4]); + OFFSET(THREAD_FPR5, loongarch_fpu, fpr[5]); + OFFSET(THREAD_FPR6, loongarch_fpu, fpr[6]); + OFFSET(THREAD_FPR7, loongarch_fpu, fpr[7]); + OFFSET(THREAD_FPR8, loongarch_fpu, fpr[8]); + OFFSET(THREAD_FPR9, loongarch_fpu, fpr[9]); + OFFSET(THREAD_FPR10, loongarch_fpu, fpr[10]); + OFFSET(THREAD_FPR11, loongarch_fpu, fpr[11]); + OFFSET(THREAD_FPR12, loongarch_fpu, fpr[12]); + OFFSET(THREAD_FPR13, loongarch_fpu, fpr[13]); + OFFSET(THREAD_FPR14, loongarch_fpu, fpr[14]); + OFFSET(THREAD_FPR15, loongarch_fpu, fpr[15]); + OFFSET(THREAD_FPR16, loongarch_fpu, fpr[16]); + OFFSET(THREAD_FPR17, loongarch_fpu, fpr[17]); + OFFSET(THREAD_FPR18, loongarch_fpu, fpr[18]); + OFFSET(THREAD_FPR19, loongarch_fpu, fpr[19]); + OFFSET(THREAD_FPR20, loongarch_fpu, fpr[20]); + OFFSET(THREAD_FPR21, loongarch_fpu, fpr[21]); + OFFSET(THREAD_FPR22, loongarch_fpu, fpr[22]); + OFFSET(THREAD_FPR23, loongarch_fpu, fpr[23]); + OFFSET(THREAD_FPR24, loongarch_fpu, fpr[24]); + OFFSET(THREAD_FPR25, loongarch_fpu, fpr[25]); + OFFSET(THREAD_FPR26, loongarch_fpu, fpr[26]); + OFFSET(THREAD_FPR27, loongarch_fpu, fpr[27]); + OFFSET(THREAD_FPR28, loongarch_fpu, fpr[28]); + OFFSET(THREAD_FPR29, loongarch_fpu, fpr[29]); + OFFSET(THREAD_FPR30, loongarch_fpu, fpr[30]); + OFFSET(THREAD_FPR31, loongarch_fpu, fpr[31]); + + OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); + OFFSET(THREAD_FCC, loongarch_fpu, fcc); + OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); + BLANK(); +} + +void output_mm_defines(void) +{ + COMMENT("Size of struct page"); + DEFINE(STRUCT_PAGE_SIZE, sizeof(struct page)); + BLANK(); + COMMENT("Linux mm_struct offsets."); + OFFSET(MM_USERS, mm_struct, mm_users); + OFFSET(MM_PGD, mm_struct, pgd); + OFFSET(MM_CONTEXT, mm_struct, context); + BLANK(); + DEFINE(_PGD_T_SIZE, sizeof(pgd_t)); + DEFINE(_PMD_T_SIZE, sizeof(pmd_t)); + DEFINE(_PTE_T_SIZE, sizeof(pte_t)); + BLANK(); + DEFINE(_PGD_T_LOG2, PGD_T_LOG2); +#ifndef __PAGETABLE_PMD_FOLDED + DEFINE(_PMD_T_LOG2, PMD_T_LOG2); +#endif + DEFINE(_PTE_T_LOG2, PTE_T_LOG2); + BLANK(); + DEFINE(_PGD_ORDER, PGD_ORDER); +#ifndef __PAGETABLE_PMD_FOLDED + DEFINE(_PMD_ORDER, PMD_ORDER); +#endif + DEFINE(_PTE_ORDER, PTE_ORDER); + BLANK(); + DEFINE(_PMD_SHIFT, PMD_SHIFT); + DEFINE(_PGDIR_SHIFT, PGDIR_SHIFT); + BLANK(); + DEFINE(_PTRS_PER_PGD, PTRS_PER_PGD); + DEFINE(_PTRS_PER_PMD, PTRS_PER_PMD); + DEFINE(_PTRS_PER_PTE, PTRS_PER_PTE); + BLANK(); + DEFINE(_PAGE_SHIFT, PAGE_SHIFT); + DEFINE(_PAGE_SIZE, PAGE_SIZE); + BLANK(); +} + +void output_sc_defines(void) +{ + COMMENT("Linux sigcontext offsets."); + OFFSET(SC_REGS, sigcontext, sc_regs); + OFFSET(SC_PC, sigcontext, sc_pc); + BLANK(); +} + +void output_signal_defines(void) +{ + COMMENT("Linux signal numbers."); + DEFINE(_SIGHUP, SIGHUP); + DEFINE(_SIGINT, SIGINT); + DEFINE(_SIGQUIT, SIGQUIT); + DEFINE(_SIGILL, SIGILL); + DEFINE(_SIGTRAP, SIGTRAP); + DEFINE(_SIGIOT, SIGIOT); + DEFINE(_SIGABRT, SIGABRT); + DEFINE(_SIGFPE, SIGFPE); + DEFINE(_SIGKILL, SIGKILL); + DEFINE(_SIGBUS, SIGBUS); + DEFINE(_SIGSEGV, SIGSEGV); + DEFINE(_SIGSYS, SIGSYS); + DEFINE(_SIGPIPE, SIGPIPE); + DEFINE(_SIGALRM, SIGALRM); + DEFINE(_SIGTERM, SIGTERM); + DEFINE(_SIGUSR1, SIGUSR1); + DEFINE(_SIGUSR2, SIGUSR2); + DEFINE(_SIGCHLD, SIGCHLD); + DEFINE(_SIGPWR, SIGPWR); + DEFINE(_SIGWINCH, SIGWINCH); + DEFINE(_SIGURG, SIGURG); + DEFINE(_SIGIO, SIGIO); + DEFINE(_SIGSTOP, SIGSTOP); + DEFINE(_SIGTSTP, SIGTSTP); + DEFINE(_SIGCONT, SIGCONT); + DEFINE(_SIGTTIN, SIGTTIN); + DEFINE(_SIGTTOU, SIGTTOU); + DEFINE(_SIGVTALRM, SIGVTALRM); + DEFINE(_SIGPROF, SIGPROF); + DEFINE(_SIGXCPU, SIGXCPU); + DEFINE(_SIGXFSZ, SIGXFSZ); + BLANK(); +} diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c new file mode 100644 index 000000000000..32eee1cbffda --- /dev/null +++ b/arch/loongarch/kernel/cacheinfo.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * LoongArch cacheinfo support + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +/* Populates leaf and increments to next leaf */ +#define populate_cache(cache, leaf, c_level, c_type) \ +do { \ + leaf->type = c_type; \ + leaf->level = c_level; \ + leaf->coherency_line_size = c->cache.linesz; \ + leaf->number_of_sets = c->cache.sets; \ + leaf->ways_of_associativity = c->cache.ways; \ + leaf->size = c->cache.linesz * c->cache.sets * \ + c->cache.ways; \ + leaf++; \ +} while (0) + +int __init_cache_level(unsigned int cpu) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int levels = 0, leaves = 0; + + /* + * If Dcache is not set, we assume the cache structures + * are not properly initialized. + */ + if (c->dcache.waysize) + levels += 1; + else + return -ENOENT; + + + leaves += (c->icache.waysize) ? 2 : 1; + + if (c->vcache.waysize) { + levels++; + leaves++; + } + + if (c->scache.waysize) { + levels++; + leaves++; + } + + if (c->tcache.waysize) { + levels++; + leaves++; + } + + this_cpu_ci->num_levels = levels; + this_cpu_ci->num_leaves = leaves; + return 0; +} + +static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, + struct cacheinfo *sib_leaf) +{ + return !((this_leaf->level == 1) || (this_leaf->level == 2)); +} + +static void __cache_cpumap_setup(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf, *sib_leaf; + unsigned int index; + + for (index = 0; index < this_cpu_ci->num_leaves; index++) { + unsigned int i; + + this_leaf = this_cpu_ci->info_list + index; + /* skip if shared_cpu_map is already populated */ + if (!cpumask_empty(&this_leaf->shared_cpu_map)) + continue; + + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + for_each_online_cpu(i) { + struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); + + if (i == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ + sib_leaf = sib_cpu_ci->info_list + index; + if (cache_leaves_are_shared(this_leaf, sib_leaf)) { + cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); + cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + } + } + } +} + +int __populate_cache_leaves(unsigned int cpu) +{ + int level = 1; + + struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + + if (c->icache.waysize) { + populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); + populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); + } else { + populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + } + + if (c->vcache.waysize) + populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + if (c->scache.waysize) + populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + if (c->tcache.waysize) + populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + if (!of_have_populated_dt()) { + __cache_cpumap_setup(cpu); + this_cpu_ci->cpu_map_populated = true; + } + + return 0; +} diff --git a/arch/loongarch/kernel/cmdline.c b/arch/loongarch/kernel/cmdline.c new file mode 100644 index 000000000000..56b5cbe3fa4f --- /dev/null +++ b/arch/loongarch/kernel/cmdline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +#include +#include + +int fw_argc; +long *_fw_argv, *_fw_envp; + +void __init fw_init_cmdline(void) +{ + int i; + + fw_argc = fw_arg0; + _fw_argv = (long *)early_memremap_ro(fw_arg1, SZ_16K); + _fw_envp = (long *)early_memremap_ro(fw_arg2, SZ_64K); + + arcs_cmdline[0] = '\0'; + for (i = 1; i < fw_argc; i++) { + strlcat(arcs_cmdline, fw_argv(i), COMMAND_LINE_SIZE); + if (i < (fw_argc - 1)) + strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE); + } +} diff --git a/arch/loongarch/kernel/cmpxchg.c b/arch/loongarch/kernel/cmpxchg.c new file mode 100644 index 000000000000..aac3547c1779 --- /dev/null +++ b/arch/loongarch/kernel/cmpxchg.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 2017 Imagination Technologies + * Author: Paul Burton + */ + +#include +#include + +unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size) +{ + u32 old32, mask, temp; + volatile u32 *ptr32; + unsigned int shift; + + /* Check that ptr is naturally aligned */ + WARN_ON((unsigned long)ptr & (size - 1)); + + /* Mask value to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + val &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * exchange within the naturally aligned 4 byte integerthat includes + * it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %4 \n" + " or %1, %1, %5 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) + : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (~mask), "Jr" (val << shift) + : "memory"); + + return (old32 & mask) >> shift; +} + +unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) +{ + u32 old32, mask, temp; + volatile u32 *ptr32; + unsigned int shift; + + /* Check that ptr is naturally aligned */ + WARN_ON((unsigned long)ptr & (size - 1)); + + /* Mask inputs to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + old &= mask; + new &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * compare & exchange within the naturally aligned 4 byte integer + * that includes it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + old <<= shift; + new <<= shift; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %4 \n" + " bne %1, %5, 2f \n" + " or %1, %1, %6 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + "2: \n" + : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) + : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (~mask), "Jr" (old), "Jr" (new) + : "memory"); + + return (old32 & mask) >> shift; +} diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c new file mode 100644 index 000000000000..dcf58e464dce --- /dev/null +++ b/arch/loongarch/kernel/cpu-probe.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Processor capabilities determination functions. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* Hardware capabilities */ +unsigned int elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +/* + * Determine the FCSR mask for FPU hardware. + */ +static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_loongarch *c) +{ + unsigned long sr, mask, fcsr, fcsr0, fcsr1; + + fcsr = c->fpu_csr0; + mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM; + + sr = read_csr_euen(); + enable_fpu(); + + fcsr0 = fcsr & mask; + write_fcsr(LOONGARCH_FCSR0, fcsr0); + fcsr0 = read_fcsr(LOONGARCH_FCSR0); + + fcsr1 = fcsr | ~mask; + write_fcsr(LOONGARCH_FCSR0, fcsr1); + fcsr1 = read_fcsr(LOONGARCH_FCSR0); + + write_fcsr(LOONGARCH_FCSR0, fcsr); + + write_csr_euen(sr); + + c->fpu_mask = ~(fcsr0 ^ fcsr1) & ~mask; +} + +static inline void set_elf_platform(int cpu, const char *plat) +{ + if (cpu == 0) + __elf_platform = plat; +} + +/* MAP BASE */ +unsigned long vm_map_base; +EXPORT_SYMBOL_GPL(vm_map_base); + +static void cpu_probe_addrbits(struct cpuinfo_loongarch *c) +{ +#ifdef __NEED_ADDRBITS_PROBE + c->pabits = (read_cpucfg(LOONGARCH_CPUCFG1) & CPUCFG1_PABITS) >> 4; + c->vabits = (read_cpucfg(LOONGARCH_CPUCFG1) & CPUCFG1_VABITS) >> 12; + vm_map_base = 0UL - (1UL << c->vabits); +#endif +} + +static void set_isa(struct cpuinfo_loongarch *c, unsigned int isa) +{ + switch (isa) { + case LOONGARCH_CPU_ISA_LA64: + c->isa_level |= LOONGARCH_CPU_ISA_LA64; + fallthrough; + case LOONGARCH_CPU_ISA_LA32S: + c->isa_level |= LOONGARCH_CPU_ISA_LA32S; + fallthrough; + case LOONGARCH_CPU_ISA_LA32R: + c->isa_level |= LOONGARCH_CPU_ISA_LA32R; + break; + } +} + +static void cpu_probe_common(struct cpuinfo_loongarch *c) +{ + unsigned int config; + unsigned long asid_mask; + + c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | + LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; + + elf_hwcap |= HWCAP_LOONGARCH_CRC32; + + config = read_cpucfg(LOONGARCH_CPUCFG1); + if (config & CPUCFG1_UAL) { + c->options |= LOONGARCH_CPU_UAL; + elf_hwcap |= HWCAP_LOONGARCH_UAL; + } + + config = read_cpucfg(LOONGARCH_CPUCFG2); + if (config & CPUCFG2_LAM) { + c->options |= LOONGARCH_CPU_LAM; + elf_hwcap |= HWCAP_LOONGARCH_LAM; + } + if (config & CPUCFG2_FP) { + c->options |= LOONGARCH_CPU_FPU; + elf_hwcap |= HWCAP_LOONGARCH_FPU; + } + if (config & CPUCFG2_COMPLEX) { + c->options |= LOONGARCH_CPU_COMPLEX; + elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; + } + if (config & CPUCFG2_CRYPTO) { + c->options |= LOONGARCH_CPU_CRYPTO; + elf_hwcap |= HWCAP_LOONGARCH_CRYPTO; + } + if (config & CPUCFG2_LVZP) { + c->options |= LOONGARCH_CPU_LVZ; + elf_hwcap |= HWCAP_LOONGARCH_LVZ; + } + + config = read_cpucfg(LOONGARCH_CPUCFG6); + if (config & CPUCFG6_PMP) + c->options |= LOONGARCH_CPU_PMP; + + config = iocsr_readl(LOONGARCH_IOCSR_FEATURES); + if (config & IOCSRF_CSRIPI) + c->options |= LOONGARCH_CPU_CSRIPI; + if (config & IOCSRF_EXTIOI) + c->options |= LOONGARCH_CPU_EXTIOI; + if (config & IOCSRF_FREQSCALE) + c->options |= LOONGARCH_CPU_SCALEFREQ; + if (config & IOCSRF_FLATMODE) + c->options |= LOONGARCH_CPU_FLATMODE; + if (config & IOCSRF_EIODECODE) + c->options |= LOONGARCH_CPU_EIODECODE; + if (config & IOCSRF_VM) + c->options |= LOONGARCH_CPU_HYPERVISOR; + + config = csr_readl(LOONGARCH_CSR_ASID); + config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT; + asid_mask = GENMASK(config - 1, 0); + set_cpu_asid_mask(c, asid_mask); + + config = read_csr_prcfg1(); + c->kscratch_mask = GENMASK((config & CSR_CONF1_KSNUM) - 1, 0); + c->kscratch_mask &= ~(EXC_KSCRATCH_MASK | PERCPU_KSCRATCH_MASK | KVM_KSCRATCH_MASK); + + config = read_csr_prcfg3(); + switch (config & CSR_CONF3_TLBTYPE) { + case 0: + c->tlbsizemtlb = 0; + c->tlbsizestlbsets = 0; + c->tlbsizestlbways = 0; + c->tlbsize = 0; + break; + case 1: + c->tlbsizemtlb = ((config & CSR_CONF3_MTLBSIZE) >> CSR_CONF3_MTLBSIZE_SHIFT) + 1; + c->tlbsizestlbsets = 0; + c->tlbsizestlbways = 0; + c->tlbsize = c->tlbsizemtlb + c->tlbsizestlbsets * c->tlbsizestlbways; + break; + case 2: + c->tlbsizemtlb = ((config & CSR_CONF3_MTLBSIZE) >> CSR_CONF3_MTLBSIZE_SHIFT) + 1; + c->tlbsizestlbsets = 1 << ((config & CSR_CONF3_STLBIDX) >> CSR_CONF3_STLBIDX_SHIFT); + c->tlbsizestlbways = ((config & CSR_CONF3_STLBWAYS) >> CSR_CONF3_STLBWAYS_SHIFT) + 1; + c->tlbsize = c->tlbsizemtlb + c->tlbsizestlbsets * c->tlbsizestlbways; + break; + default: + pr_warn("Warning: unimplemented tlb type\n"); + } +} + +#define MAX_NAME_LEN 32 +#define VENDOR_OFFSET 0 +#define CPUNAME_OFFSET 9 + +static char cpu_full_name[MAX_NAME_LEN] = " - "; + +static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int cpu) +{ + uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); + uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); + + __cpu_full_name[cpu] = cpu_full_name; + *vendor = iocsr_readq(LOONGARCH_IOCSR_VENDOR); + *cpuname = iocsr_readq(LOONGARCH_IOCSR_CPUNAME); + + switch (c->processor_id & PRID_IMP_MASK) { + case PRID_IMP_LOONGSON_32: + c->cputype = CPU_LOONGSON32; + set_isa(c, LOONGARCH_CPU_ISA_LA32S); + __cpu_family[cpu] = "Loongson-32bit"; + pr_info("Standard 32-bit Loongson Processor probed\n"); + break; + case PRID_IMP_LOONGSON_64R: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("Reduced 64-bit Loongson Processor probed\n"); + break; + case PRID_IMP_LOONGSON_64C: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("Classic 64-bit Loongson Processor probed\n"); + break; + case PRID_IMP_LOONGSON_64G: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("Generic 64-bit Loongson Processor probed\n"); + break; + default: /* Default to 64 bit */ + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("Unknown 64-bit Loongson Processor probed\n"); + } +} + +#ifdef CONFIG_64BIT +/* For use by uaccess.h */ +u64 __ua_limit; +EXPORT_SYMBOL(__ua_limit); +#endif + +const char *__cpu_family[NR_CPUS]; +const char *__cpu_full_name[NR_CPUS]; +const char *__elf_platform; + +static void cpu_report(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + + pr_info("CPU%d revision is: %08x (%s)\n", + smp_processor_id(), c->processor_id, cpu_family_string()); + if (c->options & LOONGARCH_CPU_FPU) + pr_info("FPU%d revision is: %08x\n", smp_processor_id(), c->fpu_vers); +} + +void cpu_probe(void) +{ + unsigned int cpu = smp_processor_id(); + struct cpuinfo_loongarch *c = ¤t_cpu_data; + + /* + * Set a default elf platform, cpu probe may later + * overwrite it with a more precise value + */ + set_elf_platform(cpu, "loongarch"); + + c->cputype = CPU_UNKNOWN; + c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + + c->fpu_csr0 = FPU_CSR_RN; + c->fpu_mask = FPU_CSR_RSVD; + + cpu_probe_common(c); + + per_cpu_trap_init(cpu); + + switch (c->processor_id & PRID_COMP_MASK) { + case PRID_COMP_LOONGSON: + cpu_probe_loongson(c, cpu); + break; + } + + BUG_ON(!__cpu_family[cpu]); + BUG_ON(c->cputype == CPU_UNKNOWN); + + cpu_probe_addrbits(c); + +#ifdef CONFIG_64BIT + if (cpu == 0) + __ua_limit = ~((1ull << cpu_vabits) - 1); +#endif + + cpu_report(); +} + +void cpu_set_cluster(struct cpuinfo_loongarch *cpuinfo, unsigned int cluster) +{ + /* Ensure the core number fits in the field */ + WARN_ON(cluster > (LOONGARCH_GLOBALNUMBER_CLUSTER >> + LOONGARCH_GLOBALNUMBER_CLUSTER_SHF)); + + cpuinfo->globalnumber &= ~LOONGARCH_GLOBALNUMBER_CLUSTER; + cpuinfo->globalnumber |= cluster << LOONGARCH_GLOBALNUMBER_CLUSTER_SHF; +} + +void cpu_set_core(struct cpuinfo_loongarch *cpuinfo, unsigned int core) +{ + /* Ensure the core number fits in the field */ + WARN_ON(core > (LOONGARCH_GLOBALNUMBER_CORE >> LOONGARCH_GLOBALNUMBER_CORE_SHF)); + + cpuinfo->globalnumber &= ~LOONGARCH_GLOBALNUMBER_CORE; + cpuinfo->globalnumber |= core << LOONGARCH_GLOBALNUMBER_CORE_SHF; +} diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c new file mode 100644 index 000000000000..e710a90ce6c0 --- /dev/null +++ b/arch/loongarch/kernel/efi.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * EFI partition + * + * Just for ACPI here, complete it when implementing EFI runtime. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * Jianmin Lv: + * Huacai Chen: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static unsigned long efi_nr_tables; +static unsigned long efi_config_table; +static efi_system_table_t *efi_systab; +static efi_config_table_type_t arch_tables[] __initdata = {{},}; + +static void __init create_tlb(u32 index, u64 vppn, u32 ps, u32 mat) +{ + unsigned long tlblo0, tlblo1; + + write_csr_pagesize(ps); + + tlblo0 = vppn | CSR_TLBLO0_V | CSR_TLBLO0_WE | + CSR_TLBLO0_GLOBAL | (mat << CSR_TLBLO0_CCA_SHIFT); + tlblo1 = tlblo0 + (1 << ps); + + csr_writeq(vppn, LOONGARCH_CSR_TLBEHI); + csr_writeq(tlblo0, LOONGARCH_CSR_TLBELO0); + csr_writeq(tlblo1, LOONGARCH_CSR_TLBELO1); + csr_xchgl(0, CSR_TLBIDX_EHINV, LOONGARCH_CSR_TLBIDX); + csr_xchgl(index, CSR_TLBIDX_IDX, LOONGARCH_CSR_TLBIDX); + + tlb_write_indexed(); +} + +#define MTLB_ENTRY_INDEX 0x800 + +/* Create VA == PA mapping as UEFI */ +static void __init fix_efi_mapping(void) +{ + unsigned int i; + unsigned int index = MTLB_ENTRY_INDEX; + unsigned int tlbnr = boot_cpu_data.tlbsizemtlb - 2; + unsigned long vppn; + + /* Low Memory, Cached */ + create_tlb(index++, 0x00000000, PS_256M, 1); + /* MMIO Registers, Uncached */ + create_tlb(index++, 0x10000000, PS_256M, 0); + + /* High Memory, Cached */ + for (i = 0; i < tlbnr; i++) { + vppn = 0x80000000 + (SZ_2G * i); + create_tlb(index++, vppn, PS_1G, 1); + } +} + +/* + * set_virtual_map() - create a virtual mapping for the EFI memory map and call + * efi_set_virtual_address_map enter virtual for runtime service + * + * This function populates the virt_addr fields of all memory region descriptors + * in @memory_map whose EFI_MEMORY_RUNTIME attribute is set. Those descriptors + * are also copied to @runtime_map, and their total count is returned in @count. + */ +static unsigned int __init set_virtual_map(void) +{ + int i, count = 0; + unsigned int size; + unsigned long attr; + efi_status_t status; + efi_runtime_services_t *rt; + efi_set_virtual_address_map_t *svam; + efi_memory_desc_t *runtime_map, *out; + struct loongsonlist_mem_map *map = loongson_mem_map; + + if (loongson_sysconf.bpi_ver < BPI_VERSION_V3) + return EFI_SUCCESS; + + size = sizeof(struct efi_mmap); + out = runtime_map = (efi_memory_desc_t *)&map->map[EFI_RUNTIME_MAP_START]; + + for (i = 0; i < map->map_count; i++) { + attr = map->map[i].attribute; + if (!(attr & EFI_MEMORY_RUNTIME)) + continue; + + map->map[i].mem_vaddr = TO_CAC(map->map[i].mem_start); + map->map[i].mem_size = map->map[i].mem_size >> EFI_PAGE_SHIFT; + + memcpy(out, &map->map[i], size); + out = (void *)out + size; + ++count; + + } + + rt = early_memremap_ro((unsigned long)efi_systab->runtime, sizeof(*rt)); + + /* Install the new virtual address map */ + svam = rt->set_virtual_address_map; + + fix_efi_mapping(); + + status = svam(size * count, size, map->desc_version, + (efi_memory_desc_t *)TO_PHYS((unsigned long)runtime_map)); + + local_flush_tlb_all(); + + if (status != EFI_SUCCESS) + return -1; + + return 0; +} + +void __init efi_runtime_init(void) +{ + efi_status_t status; + + if (!efi_enabled(EFI_BOOT)) + return; + + if (!efi_systab->runtime) + return; + + status = set_virtual_map(); + if (status < 0) + return; + + if (efi_runtime_disabled()) { + pr_info("EFI runtime services will be disabled.\n"); + return; + } + + efi.runtime = (efi_runtime_services_t *)efi_systab->runtime; + efi.runtime_version = (unsigned int)efi.runtime->hdr.revision; + + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); +} + +void __init efi_init(void) +{ + int size; + void *config_tables; + + if (!efi_bp) + return; + + efi_systab = (efi_system_table_t *)early_memremap_ro + ((unsigned long)efi_bp->systemtable, sizeof(efi_systab)); + + if (!efi_systab) { + pr_err("Can't find EFI system table.\n"); + return; + } + + set_bit(EFI_64BIT, &efi.flags); + efi_nr_tables = efi_systab->nr_tables; + efi_config_table = (unsigned long)efi_systab->tables; + + size = sizeof(efi_config_table_t); + config_tables = early_memremap(efi_config_table, efi_nr_tables * size); + efi_config_parse_tables(config_tables, efi_systab->nr_tables, arch_tables); + early_memunmap(config_tables, efi_nr_tables * size); +} + +static ssize_t boardinfo_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, + "BIOS Information\n" + "Vendor\t\t\t: %s\n" + "Version\t\t\t: %s\n" + "ROM Size\t\t: %d KB\n" + "Release Date\t\t: %s\n\n" + "Board Information\n" + "Manufacturer\t\t: %s\n" + "Board Name\t\t: %s\n" + "Family\t\t\t: LOONGSON64\n\n", + b_info.bios_vendor, b_info.bios_version, + b_info.bios_size, b_info.bios_release_date, + b_info.board_vendor, b_info.board_name); +} + +static struct kobj_attribute boardinfo_attr = __ATTR(boardinfo, 0444, + boardinfo_show, NULL); + +static int __init boardinfo_init(void) +{ + if (!efi_kobj) + return -EINVAL; + + return sysfs_create_file(efi_kobj, &boardinfo_attr.attr); +} +late_initcall(boardinfo_init); diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c new file mode 100644 index 000000000000..900e8d377b7a --- /dev/null +++ b/arch/loongarch/kernel/elf.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include + +#include +#include + +int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, + bool is_interp, struct arch_elf_state *state) +{ + return 0; +} + +int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr, + struct arch_elf_state *state) +{ + return 0; +} + +void loongarch_set_personality_fcsr(struct arch_elf_state *state) +{ + current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; +} diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S new file mode 100644 index 000000000000..1803840381d3 --- /dev/null +++ b/arch/loongarch/kernel/entry.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include + + .text + .cfi_sections .debug_frame + .align 5 +SYM_FUNC_START(handle_syscall) + csrrd t0, PERCPU_BASE_KS + la.abs t1, kernelsp + add.d t1, t1, t0 + move t2, sp + ld.d sp, t1, 0 + + addi.d sp, sp, -PT_SIZE + cfi_st t2, PT_R3 + cfi_rel_offset sp, PT_R3 + st.d zero, sp, PT_R0 + csrrd t2, LOONGARCH_CSR_PRMD + st.d t2, sp, PT_PRMD + csrrd t2, LOONGARCH_CSR_CRMD + st.d t2, sp, PT_CRMD + csrrd t2, LOONGARCH_CSR_EUEN + st.d t2, sp, PT_EUEN + csrrd t2, LOONGARCH_CSR_ECFG + st.d t2, sp, PT_ECFG + csrrd t2, LOONGARCH_CSR_ESTAT + st.d t2, sp, PT_ESTAT + cfi_st ra, PT_R1 + cfi_st a0, PT_R4 + cfi_st a1, PT_R5 + cfi_st a2, PT_R6 + cfi_st a3, PT_R7 + cfi_st a4, PT_R8 + cfi_st a5, PT_R9 + cfi_st a6, PT_R10 + cfi_st a7, PT_R11 + csrrd ra, LOONGARCH_CSR_ERA + st.d ra, sp, PT_ERA + cfi_rel_offset ra, PT_ERA + + cfi_st tp, PT_R2 + cfi_st u0, PT_R21 + cfi_st fp, PT_R22 + + SAVE_STATIC + + move u0, t0 + li.d tp, ~_THREAD_MASK + and tp, tp, sp + + move a0, sp + bl do_syscall + + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_syscall) + +SYM_CODE_START(ret_from_fork) + bl schedule_tail # a0 = struct task_struct *prev + move a0, sp + bl syscall_exit_to_user_mode + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET +SYM_CODE_END(ret_from_fork) + +SYM_CODE_START(ret_from_kernel_thread) + bl schedule_tail # a0 = struct task_struct *prev + move a0, s1 + jirl ra, s0, 0 + move a0, sp + bl syscall_exit_to_user_mode + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET +SYM_CODE_END(ret_from_kernel_thread) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S new file mode 100644 index 000000000000..52a956a1a64c --- /dev/null +++ b/arch/loongarch/kernel/fpu.S @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2019 Pei Huang + * Copyright (C) 2019 Lu Zeng + * Copyright (C) 2020 Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#undef v0 +#undef v1 + +#define FPU_REG_WIDTH 8 +#define LSX_REG_WIDTH 16 +#define LASX_REG_WIDTH 32 + + .macro EX insn, reg, src, offs +.ex\@: \insn \reg, \src, \offs + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + + .macro sc_save_fp base + EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) + EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) + EX fst.d $f2, \base, (2 * FPU_REG_WIDTH) + EX fst.d $f3, \base, (3 * FPU_REG_WIDTH) + EX fst.d $f4, \base, (4 * FPU_REG_WIDTH) + EX fst.d $f5, \base, (5 * FPU_REG_WIDTH) + EX fst.d $f6, \base, (6 * FPU_REG_WIDTH) + EX fst.d $f7, \base, (7 * FPU_REG_WIDTH) + EX fst.d $f8, \base, (8 * FPU_REG_WIDTH) + EX fst.d $f9, \base, (9 * FPU_REG_WIDTH) + EX fst.d $f10, \base, (10 * FPU_REG_WIDTH) + EX fst.d $f11, \base, (11 * FPU_REG_WIDTH) + EX fst.d $f12, \base, (12 * FPU_REG_WIDTH) + EX fst.d $f13, \base, (13 * FPU_REG_WIDTH) + EX fst.d $f14, \base, (14 * FPU_REG_WIDTH) + EX fst.d $f15, \base, (15 * FPU_REG_WIDTH) + EX fst.d $f16, \base, (16 * FPU_REG_WIDTH) + EX fst.d $f17, \base, (17 * FPU_REG_WIDTH) + EX fst.d $f18, \base, (18 * FPU_REG_WIDTH) + EX fst.d $f19, \base, (19 * FPU_REG_WIDTH) + EX fst.d $f20, \base, (20 * FPU_REG_WIDTH) + EX fst.d $f21, \base, (21 * FPU_REG_WIDTH) + EX fst.d $f22, \base, (22 * FPU_REG_WIDTH) + EX fst.d $f23, \base, (23 * FPU_REG_WIDTH) + EX fst.d $f24, \base, (24 * FPU_REG_WIDTH) + EX fst.d $f25, \base, (25 * FPU_REG_WIDTH) + EX fst.d $f26, \base, (26 * FPU_REG_WIDTH) + EX fst.d $f27, \base, (27 * FPU_REG_WIDTH) + EX fst.d $f28, \base, (28 * FPU_REG_WIDTH) + EX fst.d $f29, \base, (29 * FPU_REG_WIDTH) + EX fst.d $f30, \base, (30 * FPU_REG_WIDTH) + EX fst.d $f31, \base, (31 * FPU_REG_WIDTH) + .endm + + .macro sc_restore_fp base + EX fld.d $f0, \base, (0 * FPU_REG_WIDTH) + EX fld.d $f1, \base, (1 * FPU_REG_WIDTH) + EX fld.d $f2, \base, (2 * FPU_REG_WIDTH) + EX fld.d $f3, \base, (3 * FPU_REG_WIDTH) + EX fld.d $f4, \base, (4 * FPU_REG_WIDTH) + EX fld.d $f5, \base, (5 * FPU_REG_WIDTH) + EX fld.d $f6, \base, (6 * FPU_REG_WIDTH) + EX fld.d $f7, \base, (7 * FPU_REG_WIDTH) + EX fld.d $f8, \base, (8 * FPU_REG_WIDTH) + EX fld.d $f9, \base, (9 * FPU_REG_WIDTH) + EX fld.d $f10, \base, (10 * FPU_REG_WIDTH) + EX fld.d $f11, \base, (11 * FPU_REG_WIDTH) + EX fld.d $f12, \base, (12 * FPU_REG_WIDTH) + EX fld.d $f13, \base, (13 * FPU_REG_WIDTH) + EX fld.d $f14, \base, (14 * FPU_REG_WIDTH) + EX fld.d $f15, \base, (15 * FPU_REG_WIDTH) + EX fld.d $f16, \base, (16 * FPU_REG_WIDTH) + EX fld.d $f17, \base, (17 * FPU_REG_WIDTH) + EX fld.d $f18, \base, (18 * FPU_REG_WIDTH) + EX fld.d $f19, \base, (19 * FPU_REG_WIDTH) + EX fld.d $f20, \base, (20 * FPU_REG_WIDTH) + EX fld.d $f21, \base, (21 * FPU_REG_WIDTH) + EX fld.d $f22, \base, (22 * FPU_REG_WIDTH) + EX fld.d $f23, \base, (23 * FPU_REG_WIDTH) + EX fld.d $f24, \base, (24 * FPU_REG_WIDTH) + EX fld.d $f25, \base, (25 * FPU_REG_WIDTH) + EX fld.d $f26, \base, (26 * FPU_REG_WIDTH) + EX fld.d $f27, \base, (27 * FPU_REG_WIDTH) + EX fld.d $f28, \base, (28 * FPU_REG_WIDTH) + EX fld.d $f29, \base, (29 * FPU_REG_WIDTH) + EX fld.d $f30, \base, (30 * FPU_REG_WIDTH) + EX fld.d $f31, \base, (31 * FPU_REG_WIDTH) + .endm + + .macro sc_save_fcc base, tmp0, tmp1 + movcf2gr \tmp0, $fcc0 + move \tmp1, \tmp0 + movcf2gr \tmp0, $fcc1 + bstrins.d \tmp1, \tmp0, 15, 8 + movcf2gr \tmp0, $fcc2 + bstrins.d \tmp1, \tmp0, 23, 16 + movcf2gr \tmp0, $fcc3 + bstrins.d \tmp1, \tmp0, 31, 24 + movcf2gr \tmp0, $fcc4 + bstrins.d \tmp1, \tmp0, 39, 32 + movcf2gr \tmp0, $fcc5 + bstrins.d \tmp1, \tmp0, 47, 40 + movcf2gr \tmp0, $fcc6 + bstrins.d \tmp1, \tmp0, 55, 48 + movcf2gr \tmp0, $fcc7 + bstrins.d \tmp1, \tmp0, 63, 56 + EX st.d \tmp1, \base, 0 + .endm + + .macro sc_restore_fcc base, tmp0, tmp1 + EX ld.d \tmp0, \base, 0 + bstrpick.d \tmp1, \tmp0, 7, 0 + movgr2cf $fcc0, \tmp1 + bstrpick.d \tmp1, \tmp0, 15, 8 + movgr2cf $fcc1, \tmp1 + bstrpick.d \tmp1, \tmp0, 23, 16 + movgr2cf $fcc2, \tmp1 + bstrpick.d \tmp1, \tmp0, 31, 24 + movgr2cf $fcc3, \tmp1 + bstrpick.d \tmp1, \tmp0, 39, 32 + movgr2cf $fcc4, \tmp1 + bstrpick.d \tmp1, \tmp0, 47, 40 + movgr2cf $fcc5, \tmp1 + bstrpick.d \tmp1, \tmp0, 55, 48 + movgr2cf $fcc6, \tmp1 + bstrpick.d \tmp1, \tmp0, 63, 56 + movgr2cf $fcc7, \tmp1 + .endm + + .macro sc_save_fcsr base, tmp0 + movfcsr2gr \tmp0, fcsr0 + EX st.w \tmp0, \base, 0 + .endm + + .macro sc_restore_fcsr base, tmp0 + EX ld.w \tmp0, \base, 0 + movgr2fcsr fcsr0, \tmp0 + .endm + + .macro sc_save_vcsr base, tmp0 + movfcsr2gr \tmp0, vcsr16 + EX st.w \tmp0, \base, 0 + .endm + + .macro sc_restore_vcsr base, tmp0 + EX ld.w \tmp0, \base, 0 + movgr2fcsr vcsr16, \tmp0 + .endm + +/* + * Save a thread's fp context. + */ +SYM_FUNC_START(_save_fp) + fpu_save_csr a0 t1 + fpu_save_double a0 t1 # clobbers t1 + fpu_save_cc a0 t1 t2 # clobbers t1, t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_fp) +EXPORT_SYMBOL(_save_fp) + +/* + * Restore a thread's fp context. + */ +SYM_FUNC_START(_restore_fp) + fpu_restore_double a0 t1 # clobbers t1 + fpu_restore_csr a0 t1 + fpu_restore_cc a0 t1 t2 # clobbers t1, t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_fp) + +/* + * Load the FPU with signalling NANS. This bit pattern we're using has + * the property that no matter whether considered as single or as double + * precision represents signaling NANS. + * + * The value to initialize fcsr0 to comes in $a0. + */ + +SYM_FUNC_START(_init_fpu) + li.w t1, CSR_EUEN_FPEN + csrxchg t1, t1, LOONGARCH_CSR_EUEN + + movgr2fcsr fcsr0, a0 + + li.w t1, -1 # SNaN + + movgr2fr.d $f0, t1 + movgr2fr.d $f1, t1 + movgr2fr.d $f2, t1 + movgr2fr.d $f3, t1 + movgr2fr.d $f4, t1 + movgr2fr.d $f5, t1 + movgr2fr.d $f6, t1 + movgr2fr.d $f7, t1 + movgr2fr.d $f8, t1 + movgr2fr.d $f9, t1 + movgr2fr.d $f10, t1 + movgr2fr.d $f11, t1 + movgr2fr.d $f12, t1 + movgr2fr.d $f13, t1 + movgr2fr.d $f14, t1 + movgr2fr.d $f15, t1 + movgr2fr.d $f16, t1 + movgr2fr.d $f17, t1 + movgr2fr.d $f18, t1 + movgr2fr.d $f19, t1 + movgr2fr.d $f20, t1 + movgr2fr.d $f21, t1 + movgr2fr.d $f22, t1 + movgr2fr.d $f23, t1 + movgr2fr.d $f24, t1 + movgr2fr.d $f25, t1 + movgr2fr.d $f26, t1 + movgr2fr.d $f27, t1 + movgr2fr.d $f28, t1 + movgr2fr.d $f29, t1 + movgr2fr.d $f30, t1 + movgr2fr.d $f31, t1 + + jirl zero, ra, 0 +SYM_FUNC_END(_init_fpu) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_fp_context) + sc_save_fcc a1 t1 t2 + sc_save_fcsr a2 t1 + sc_save_fp a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_fp_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_fp_context) + sc_restore_fp a0 + sc_restore_fcc a1 t1 t2 + sc_restore_fcsr a2 t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_fp_context) + +SYM_FUNC_START(fault) + li.w a0, -EFAULT # failure + jirl zero, ra, 0 +SYM_FUNC_END(fault) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S new file mode 100644 index 000000000000..03a058c48a5f --- /dev/null +++ b/arch/loongarch/kernel/genex.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(__arch_cpu_idle) + /* start of rollback region */ + LONG_L t0, tp, TI_FLAGS + nop + andi t0, t0, _TIF_NEED_RESCHED + bnez t0, 1f + nop + nop + nop + idle 0 + /* end of rollback region */ +1: jirl zero, ra, 0 +SYM_FUNC_END(__arch_cpu_idle) + +SYM_FUNC_START(handle_vint) + BACKUP_T0T1 + SAVE_ALL + la.abs t1, __arch_cpu_idle + LONG_L t0, sp, PT_ERA + /* 32 byte rollback region */ + ori t0, t0, 0x1f + xori t0, t0, 0x1f + bne t0, t1, 1f + LONG_S t0, sp, PT_ERA +1: move a0, sp + move a1, sp + la.abs t0, do_vint + jirl ra, t0, 0 + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_vint) + +SYM_FUNC_START(except_vec_cex) + b cache_parity_error +SYM_FUNC_END(except_vec_cex) + + .macro build_prep_badv + csrrd t0, LOONGARCH_CSR_BADV + PTR_S t0, sp, PT_BVADDR + .endm + + .macro build_prep_fcsr + movfcsr2gr a1, fcsr0 + .endm + + .macro build_prep_none + .endm + + .macro BUILD_HANDLER exception handler prep + .align 5 + SYM_FUNC_START(handle_\exception) + BACKUP_T0T1 + SAVE_ALL + build_prep_\prep + move a0, sp + la.abs t0, do_\handler + jirl ra, t0, 0 + RESTORE_ALL_AND_RET + SYM_FUNC_END(handle_\exception) + .endm + + BUILD_HANDLER ade ade badv + BUILD_HANDLER ale ale badv + BUILD_HANDLER bp bp none + BUILD_HANDLER fpe fpe fcsr + BUILD_HANDLER fpu fpu none + BUILD_HANDLER lsx lsx none + BUILD_HANDLER lasx lasx none + BUILD_HANDLER lbt lbt none + BUILD_HANDLER ri ri none + BUILD_HANDLER watch watch none + BUILD_HANDLER reserved reserved none /* others */ + +SYM_FUNC_START(handle_sys) + la.abs t0, handle_syscall + jirl zero, t0, 0 +SYM_FUNC_END(handle_sys) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S new file mode 100644 index 000000000000..b4223842f690 --- /dev/null +++ b/arch/loongarch/kernel/head.S @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) + + __REF + +SYM_CODE_START(kernel_entry) # kernel entry point + + /* Config direct window and set PG */ + li.d t0, CSR_DMW0_INIT # UC, PLV0, 0x8000 xxxx xxxx xxxx + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0, 0x9000 xxxx xxxx xxxx + csrwr t0, LOONGARCH_CSR_DMWIN1 + /* Enable PG */ + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + li.w t0, 0x04 # PLV=0, PIE=1, PWE=0 + csrwr t0, LOONGARCH_CSR_PRMD + li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 + csrwr t0, LOONGARCH_CSR_EUEN + + /* We might not get launched at the address the kernel is linked to, + so we jump there. */ + la.abs t0, 0f + jirl zero, t0, 0 +0: + la t0, __bss_start # clear .bss + st.d zero, t0, 0 + la t1, __bss_stop - LONGSIZE +1: + addi.d t0, t0, LONGSIZE + st.d zero, t0, 0 + bne t0, t1, 1b + + la t0, fw_arg0 + st.d a0, t0, 0 # firmware arguments + la t0, fw_arg1 + st.d a1, t0, 0 + la t0, fw_arg2 + st.d a2, t0, 0 + la t0, fw_arg3 + st.d a3, t0, 0 + + /* KScratch3 used for percpu base, initialized as 0 */ + csrwr zero, PERCPU_BASE_KS + /* GPR21 used for percpu base (runtime), initialized as 0 */ + or u0, zero, zero + + la tp, init_thread_union + /* Set the SP after an empty pt_regs. */ + PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) + PTR_ADDU sp, sp, tp + set_saved_sp sp, t0, t1 + PTR_ADDIU sp, sp, -4 * SZREG # init stack pointer + + bl start_kernel + +SYM_CODE_END(kernel_entry) + +SYM_ENTRY(kernel_entry_end, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c new file mode 100644 index 000000000000..27030472ce84 --- /dev/null +++ b/arch/loongarch/kernel/idle.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch idle loop support. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +void __cpuidle arch_cpu_idle(void) +{ + raw_local_irq_enable(); + __arch_cpu_idle(); /* idle instruction needs irq enabled */ +} diff --git a/arch/loongarch/kernel/io.c b/arch/loongarch/kernel/io.c new file mode 100644 index 000000000000..07c5fb99cbb4 --- /dev/null +++ b/arch/loongarch/kernel/io.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 8) { + *(u64 *)to = __raw_readq(from); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 8) { + __raw_writeq(*(u64 *)from, to); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ + u64 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + qc |= qc << 32; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 8) { + __raw_writeq(qc, dst); + dst += 8; + count -= 8; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} +EXPORT_SYMBOL(__memset_io); diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c new file mode 100644 index 000000000000..3c97cbeac9f2 --- /dev/null +++ b/arch/loongarch/kernel/irq.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +DEFINE_PER_CPU(unsigned long, irq_stack); + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ # %d\n", irq); +} + +atomic_t irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) +{ + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + return 0; +} + +asmlinkage void spurious_interrupt(void) +{ + atomic_inc(&irq_err_count); +} + +void __init init_IRQ(void) +{ + int i; + unsigned int order = get_order(IRQ_STACK_SIZE); + + for (i = 0; i < NR_IRQS; i++) + irq_set_noprobe(i); + + arch_init_irq(); + + for_each_possible_cpu(i) { + void *s = (void *)__get_free_pages(GFP_KERNEL, order); + + per_cpu(irq_stack, i) = (unsigned long)s; + pr_debug("CPU%d IRQ stack at 0x%lx - 0x%lx\n", i, + per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); + } +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c new file mode 100644 index 000000000000..daef311069c9 --- /dev/null +++ b/arch/loongarch/kernel/module.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ + +#define pr_fmt(fmt) "kmod: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) +{ + if (RELA_STACK_DEPTH <= *rela_stack_top) + return -ENOEXEC; + + rela_stack[(*rela_stack_top)++] = stack_value; + pr_debug("%s stack_value = 0x%llx\n", __func__, stack_value); + + return 0; +} + +static int rela_stack_pop(s64 *stack_value, s64 *rela_stack, size_t *rela_stack_top) +{ + if (*rela_stack_top == 0) + return -ENOEXEC; + + *stack_value = rela_stack[--(*rela_stack_top)]; + pr_debug("%s stack_value = 0x%llx\n", __func__, *stack_value); + + return 0; +} + +static int apply_r_larch_none(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return 0; +} + +static int apply_r_larch_32(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *location = v; + return 0; +} + +static int apply_r_larch_64(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *(Elf_Addr *)location = v; + return 0; +} + +static int apply_r_larch_mark_la(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return 0; +} + +static int apply_r_larch_mark_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return 0; +} + +static int apply_r_larch_sop_push_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return rela_stack_push(v - (u64)location, rela_stack, rela_stack_top); +} + +static int apply_r_larch_sop_push_absolute(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return rela_stack_push(v, rela_stack, rela_stack_top); +} + +static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top); +} + +static int apply_r_larch_sop_sub(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2; + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 - opr2, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_sl(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2; + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 << opr2, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_sr(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2; + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 >> opr2, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_add(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2; + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 + opr2, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} +static int apply_r_larch_sop_and(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2; + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 & opr2, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_if_else(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1, opr2, opr3; + + err = rela_stack_pop(&opr3, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1 ? opr2 : opr3, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_10_5(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 5-bit signed */ + if ((opr1 & ~(u64)0xf) && + (opr1 & ~(u64)0xf) != ~(u64)0xf) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [14 ... 10] = opr [4 ... 0] */ + *location = (*location & (~(u32)0x7c00)) | ((opr1 & 0x1f) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_u_10_12(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 12-bit unsigned */ + if (opr1 & ~(u64)0xfff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [21 ... 10] = opr [11 ... 0] */ + *location = (*location & (~(u32)0x3ffc00)) | ((opr1 & 0xfff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_10_12(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 12-bit signed */ + if ((opr1 & ~(u64)0x7ff) && + (opr1 & ~(u64)0x7ff) != ~(u64)0x7ff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [21 ... 10] = opr [11 ... 0] */ + *location = (*location & (~(u32)0x3ffc00)) | ((opr1 & 0xfff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_10_16(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 16-bit signed */ + if ((opr1 & ~(u64)0x7fff) && + (opr1 & ~(u64)0x7fff) != ~(u64)0x7fff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [25 ... 10] = opr [15 ... 0] */ + *location = (*location & 0xfc0003ff) | ((opr1 & 0xffff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_10_16_s2(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 4-aligned */ + if (opr1 % 4) { + pr_err("module %s: opr1 = 0x%llx unaligned! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + opr1 >>= 2; + /* check 18-bit signed */ + if ((opr1 & ~(u64)0x7fff) && + (opr1 & ~(u64)0x7fff) != ~(u64)0x7fff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [25 ... 10] = opr [17 ... 2] */ + *location = (*location & 0xfc0003ff) | ((opr1 & 0xffff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_5_20(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 20-bit signed */ + if ((opr1 & ~(u64)0x7ffff) && + (opr1 & ~(u64)0x7ffff) != ~(u64)0x7ffff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) [24 ... 5] = opr [19 ... 0] */ + *location = (*location & (~(u32)0x1ffffe0)) | ((opr1 & 0xfffff) << 5); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_0_5_10_16_s2(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 4-aligned */ + if (opr1 % 4) { + pr_err("module %s: opr1 = 0x%llx unaligned! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + opr1 >>= 2; + /* check 23-bit signed */ + if ((opr1 & ~(u64)0xfffff) && + (opr1 & ~(u64)0xfffff) != ~(u64)0xfffff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* + * (*(uint32_t *) PC) [4 ... 0] = opr [22 ... 18] + * (*(uint32_t *) PC) [25 ... 10] = opr [17 ... 2] + */ + *location = (*location & 0xfc0003e0) + | ((opr1 & 0x1f0000) >> 16) | ((opr1 & 0xffff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_s_0_10_10_16_s2(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 4-aligned */ + if (opr1 % 4) { + pr_err("module %s: opr1 = 0x%llx unaligned! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + opr1 >>= 2; + /* check 28-bit signed */ + if ((opr1 & ~(u64)0x1ffffff) && + (opr1 & ~(u64)0x1ffffff) != ~(u64)0x1ffffff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* + * (*(uint32_t *) PC) [9 ... 0] = opr [27 ... 18] + * (*(uint32_t *) PC) [25 ... 10] = opr [17 ... 2] + */ + *location = (*location & 0xfc000000) + | ((opr1 & 0x3ff0000) >> 16) | ((opr1 & 0xffff) << 10); + + return 0; +} + +static int apply_r_larch_sop_pop_32_u(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + /* check 32-bit unsigned */ + if (opr1 & ~(u64)0xffffffff) { + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s relocation\n", + mod->name, opr1, __func__); + return -ENOEXEC; + } + + /* (*(uint32_t *) PC) = opr */ + *location = (u32)opr1; + + return 0; +} + +static int apply_r_larch_add32(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *(s32 *)location += v; + return 0; +} + +static int apply_r_larch_add64(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *(s64 *)location += v; + return 0; +} + +static int apply_r_larch_sub32(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *(s32 *)location -= v; + return 0; +} + +static int apply_r_larch_sub64(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top) +{ + *(s64 *)location -= v; + return 0; +} + +/* + * reloc_handlers_rela() - Apply a particular relocation to a module + * @mod: the module to apply the reloc to + * @location: the address at which the reloc is to be applied + * @v: the value of the reloc, with addend for RELA-style + * @rela_stack: the stack used for store relocation info, LOCAL to THIS module + * @rela_stac_top: where the stack operation(pop/push) applies to + * + * Return: 0 upon success, else -ERRNO + */ +typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top); + +/* The handlers for known reloc types */ +static reloc_rela_handler reloc_rela_handlers[] = { + [R_LARCH_NONE] = apply_r_larch_none, + [R_LARCH_32] = apply_r_larch_32, + [R_LARCH_64] = apply_r_larch_64, + [R_LARCH_MARK_LA] = apply_r_larch_mark_la, + [R_LARCH_MARK_PCREL] = apply_r_larch_mark_pcrel, + [R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel, + [R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute, + [R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup, + [R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel, + [R_LARCH_SOP_SUB] = apply_r_larch_sop_sub, + [R_LARCH_SOP_SL] = apply_r_larch_sop_sl, + [R_LARCH_SOP_SR] = apply_r_larch_sop_sr, + [R_LARCH_SOP_ADD] = apply_r_larch_sop_add, + [R_LARCH_SOP_AND] = apply_r_larch_sop_and, + [R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop_if_else, + [R_LARCH_SOP_POP_32_S_10_5] = apply_r_larch_sop_pop_32_s_10_5, + [R_LARCH_SOP_POP_32_U_10_12] = apply_r_larch_sop_pop_32_u_10_12, + [R_LARCH_SOP_POP_32_S_10_12] = apply_r_larch_sop_pop_32_s_10_12, + [R_LARCH_SOP_POP_32_S_10_16] = apply_r_larch_sop_pop_32_s_10_16, + [R_LARCH_SOP_POP_32_S_10_16_S2] = apply_r_larch_sop_pop_32_s_10_16_s2, + [R_LARCH_SOP_POP_32_S_5_20] = apply_r_larch_sop_pop_32_s_5_20, + [R_LARCH_SOP_POP_32_S_0_5_10_16_S2] = apply_r_larch_sop_pop_32_s_0_5_10_16_s2, + [R_LARCH_SOP_POP_32_S_0_10_10_16_S2] = apply_r_larch_sop_pop_32_s_0_10_10_16_s2, + [R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_pop_32_u, + [R_LARCH_ADD32] = apply_r_larch_add32, + [R_LARCH_ADD64] = apply_r_larch_add64, + [R_LARCH_SUB32] = apply_r_larch_sub32, + [R_LARCH_SUB64] = apply_r_larch_sub64, +}; + +int apply_relocate(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *mod) +{ + int i, err; + unsigned int type; + s64 rela_stack[RELA_STACK_DEPTH]; + size_t rela_stack_top = 0; + reloc_rela_handler handler; + void *location; + Elf_Addr v; + Elf_Sym *sym; + Elf_Rel *rel = (void *) sechdrs[relsec].sh_addr; + + pr_debug("%s: Applying relocate section %u to %u\n", __func__, relsec, + sechdrs[relsec].sh_info); + + rela_stack_top = 0; + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_R_SYM(rel[i].r_info); + if (IS_ERR_VALUE(sym->st_value)) { + /* Ignore unresolved weak symbol */ + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) + continue; + pr_warn("%s: Unknown symbol %s\n", mod->name, strtab + sym->st_name); + return -ENOENT; + } + + type = ELF_R_TYPE(rel[i].r_info); + + if (type < ARRAY_SIZE(reloc_rela_handlers)) + handler = reloc_rela_handlers[type]; + else + handler = NULL; + + if (!handler) { + pr_err("%s: Unknown relocation type %u\n", mod->name, type); + return -EINVAL; + } + + v = sym->st_value; + err = handler(mod, location, v, rela_stack, &rela_stack_top); + if (err) + return err; + } + + return 0; +} + +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *mod) +{ + int i, err; + unsigned int type; + s64 rela_stack[RELA_STACK_DEPTH]; + size_t rela_stack_top = 0; + reloc_rela_handler handler; + void *location; + Elf_Addr v; + Elf_Sym *sym; + Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; + + pr_debug("%s: Applying relocate section %u to %u\n", __func__, relsec, + sechdrs[relsec].sh_info); + + rela_stack_top = 0; + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_R_SYM(rel[i].r_info); + if (IS_ERR_VALUE(sym->st_value)) { + /* Ignore unresolved weak symbol */ + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) + continue; + pr_warn("%s: Unknown symbol %s\n", mod->name, strtab + sym->st_name); + return -ENOENT; + } + + type = ELF_R_TYPE(rel[i].r_info); + + if (type < ARRAY_SIZE(reloc_rela_handlers)) + handler = reloc_rela_handlers[type]; + else + handler = NULL; + + if (!handler) { + pr_err("%s: Unknown relocation type %u\n", mod->name, type); + return -EINVAL; + } + + pr_debug("type %d st_value %llx r_addend %llx loc %llx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)location); + + v = sym->st_value + rel[i].r_addend; + err = handler(mod, location, v, rela_stack, &rela_stack_top); + if (err) + return err; + } + + return 0; +} + +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); +} diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c new file mode 100644 index 000000000000..23de61e565c9 --- /dev/null +++ b/arch/loongarch/kernel/proc.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * * No lock; only written during early bootup by CPU 0. + * */ +static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain); + +int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&proc_cpuinfo_chain, nb); +} + +int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) +{ + return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v); +} + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long n = (unsigned long) v - 1; + unsigned int version = cpu_data[n].processor_id & 0xff; + unsigned int fp_version = cpu_data[n].fpu_vers; + struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; + + /* + * For the first processor also print the system type + */ + if (n == 0) + seq_printf(m, "system type\t\t: %s\n", get_system_type()); + + seq_printf(m, "processor\t\t: %ld\n", n); + seq_printf(m, "package\t\t\t: %d\n", cpu_data[n].package); + seq_printf(m, "core\t\t\t: %d\n", cpu_core(&cpu_data[n])); + seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); + seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); + seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); + seq_printf(m, "FPU Revision\t\t: 0x%02x\n", fp_version); + seq_printf(m, "CPU MHz\t\t\t: %llu.%02llu\n", + cpu_clock_freq / 1000000, (cpu_clock_freq / 10000) % 100); + seq_printf(m, "BogoMIPS\t\t: %llu.%02llu\n", + (lpj_fine * cpu_clock_freq / const_clock_freq) / (500000/HZ), + ((lpj_fine * cpu_clock_freq / const_clock_freq) / (5000/HZ)) % 100); + seq_printf(m, "TLB Entries\t\t: %d\n", cpu_data[n].tlbsize); + seq_printf(m, "Address Sizes\t\t: %d bits physical, %d bits virtual\n", + cpu_pabits + 1, cpu_vabits + 1); + + seq_printf(m, "ISA\t\t\t:"); + if (cpu_has_loongarch32) + seq_printf(m, "%s", " loongarch32"); + if (cpu_has_loongarch64) + seq_printf(m, "%s", " loongarch64"); + seq_printf(m, "\n"); + + seq_printf(m, "Features\t\t:"); + if (cpu_has_cpucfg) seq_printf(m, "%s", " cpucfg"); + if (cpu_has_lam) seq_printf(m, "%s", " lam"); + if (cpu_has_ual) seq_printf(m, "%s", " ual"); + if (cpu_has_fpu) seq_printf(m, "%s", " fpu"); + if (cpu_has_lsx) seq_printf(m, "%s", " lsx"); + if (cpu_has_lasx) seq_printf(m, "%s", " lasx"); + if (cpu_has_complex) seq_printf(m, "%s", " complex"); + if (cpu_has_crypto) seq_printf(m, "%s", " crypto"); + if (cpu_has_lvz) seq_printf(m, "%s", " lvz"); + if (cpu_has_lbt_x86) seq_printf(m, "%s", " lbt_x86"); + if (cpu_has_lbt_arm) seq_printf(m, "%s", " lbt_arm"); + if (cpu_has_lbt_mips) seq_printf(m, "%s", " lbt_mips"); + seq_printf(m, "\n"); + + seq_printf(m, "Hardware Watchpoint\t: %s", + cpu_has_watch ? "yes, " : "no\n"); + if (cpu_has_watch) { + seq_printf(m, "iwatch count: %d, dwatch count: %d\n", + cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); + } + + proc_cpuinfo_notifier_args.m = m; + proc_cpuinfo_notifier_args.n = n; + + raw_notifier_call_chain(&proc_cpuinfo_chain, 0, + &proc_cpuinfo_notifier_args); + + seq_printf(m, "\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + unsigned long i = *pos; + + return i < NR_CPUS ? (void *) (i + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c new file mode 100644 index 000000000000..15dec62bbe63 --- /dev/null +++ b/arch/loongarch/kernel/process.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Idle related variables and functions + */ + +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(boot_option_idle_override); + +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + play_dead(); +} +#endif + +asmlinkage void ret_from_fork(void); +asmlinkage void ret_from_kernel_thread(void); + +void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) +{ + unsigned long crmd; + unsigned long prmd; + unsigned long euen; + + /* New thread loses kernel privileges. */ + crmd = regs->csr_crmd & ~(PLV_MASK); + crmd |= PLV_USER; + regs->csr_crmd = crmd; + + prmd = regs->csr_prmd & ~(PLV_MASK); + prmd |= PLV_USER; + regs->csr_prmd = prmd; + + euen = regs->csr_euen & ~(CSR_EUEN_FPEN); + regs->csr_euen = euen; + lose_fpu(0); + + clear_thread_flag(TIF_LSX_CTX_LIVE); + clear_thread_flag(TIF_LASX_CTX_LIVE); + clear_used_math(); + regs->csr_era = pc; + regs->regs[3] = sp; +} + +void exit_thread(struct task_struct *tsk) +{ +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + /* + * Save any process state which is live in hardware registers to the + * parent context prior to duplication. This prevents the new child + * state becoming stale if the parent is preempted before copy_thread() + * gets a chance to save the parent's live hardware registers to the + * child context. + */ + preempt_disable(); + + if (is_fpu_owner()) + save_fp(current); + + preempt_enable(); + + if (used_math()) + memcpy(dst, src, sizeof(struct task_struct)); + else + memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); + + return 0; +} + +/* + * Copy architecture-specific thread state + */ +int copy_thread(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) +{ + unsigned long childksp; + struct pt_regs *childregs, *regs = current_pt_regs(); + + childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; + + /* set up new TSS. */ + childregs = (struct pt_regs *) childksp - 1; + /* Put the stack after the struct pt_regs. */ + childksp = (unsigned long) childregs; + p->thread.csr_euen = 0; + p->thread.csr_crmd = csr_readl(LOONGARCH_CSR_CRMD); + p->thread.csr_prmd = csr_readl(LOONGARCH_CSR_PRMD); + p->thread.csr_ecfg = csr_readl(LOONGARCH_CSR_ECFG); + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + /* kernel thread */ + p->thread.reg23 = usp; /* fn */ + p->thread.reg24 = kthread_arg; + p->thread.reg03 = childksp; + p->thread.reg01 = (unsigned long) ret_from_kernel_thread; + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->csr_euen = p->thread.csr_euen; + childregs->csr_crmd = p->thread.csr_crmd; + childregs->csr_prmd = p->thread.csr_prmd; + childregs->csr_ecfg = p->thread.csr_ecfg; + return 0; + } + + /* user thread */ + *childregs = *regs; + childregs->regs[4] = 0; /* Child gets zero as return value */ + if (usp) + childregs->regs[3] = usp; + + p->thread.reg03 = (unsigned long) childregs; + p->thread.reg01 = (unsigned long) ret_from_fork; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + childregs->csr_euen = 0; + + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + + return 0; +} + +unsigned long get_wchan(struct task_struct *task) +{ + return 0; +} + +unsigned long stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* Space for the VDSO & data page */ + top -= PAGE_ALIGN(current->thread.vdso->size); + top -= PAGE_SIZE; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + +/* + * Don't forget that the stack pointer must be aligned on a 8 bytes + * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. + */ +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + + return sp & ALMASK; +} + +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); +static struct cpumask backtrace_csd_busy; + +static void handle_backtrace(void *info) +{ + nmi_cpu_backtrace(get_irq_regs()); + cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); +} + +static void raise_backtrace(cpumask_t *mask) +{ + call_single_data_t *csd; + int cpu; + + for_each_cpu(cpu, mask) { + /* + * If we previously sent an IPI to the target CPU & it hasn't + * cleared its bit in the busy cpumask then it didn't handle + * our previous IPI & it's not safe for us to reuse the + * call_single_data_t. + */ + if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) { + pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n", + cpu); + continue; + } + + csd = &per_cpu(backtrace_csd, cpu); + csd->func = handle_backtrace; + smp_call_function_single_async(cpu, csd); + } +} + +bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); + return true; +} + +#ifdef CONFIG_64BIT +void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs) +{ + unsigned int i; + + for (i = LOONGARCH_EF_R1; i <= LOONGARCH_EF_R31; i++) { + uregs[i] = regs->regs[i - LOONGARCH_EF_R0]; + } + + uregs[LOONGARCH_EF_CSR_ERA] = regs->csr_era; + uregs[LOONGARCH_EF_CSR_BADV] = regs->csr_badvaddr; + uregs[LOONGARCH_EF_CSR_CRMD] = regs->csr_crmd; + uregs[LOONGARCH_EF_CSR_PRMD] = regs->csr_prmd; + uregs[LOONGARCH_EF_CSR_EUEN] = regs->csr_euen; + uregs[LOONGARCH_EF_CSR_ECFG] = regs->csr_ecfg; + uregs[LOONGARCH_EF_CSR_ESTAT] = regs->csr_estat; +} +#endif /* CONFIG_64BIT */ diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c new file mode 100644 index 000000000000..5d1429d737af --- /dev/null +++ b/arch/loongarch/kernel/ptrace.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0 +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void init_fp_ctx(struct task_struct *target) +{ + /* The target already has context */ + if (tsk_used_math(target)) + return; + + /* Begin with data registers set to all 1s... */ + memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr)); + set_stopped_child_used_math(target); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* Don't load the watchpoint registers for the ex-child. */ + clear_tsk_thread_flag(child, TIF_LOAD_WATCH); +} + +/* regset get/set implementations */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + struct pt_regs *regs = task_pt_regs(target); + + r = membuf_write(&to, ®s->regs, sizeof(u64) * GPR_NUM); + r = membuf_write(&to, ®s->csr_era, sizeof(u64)); + r = membuf_write(&to, ®s->csr_badvaddr, sizeof(u64)); + + return r; +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + int era_start = sizeof(u64) * GPR_NUM; + int badvaddr_start = era_start + sizeof(u64); + struct pt_regs *regs = task_pt_regs(target); + + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->regs, + 0, era_start); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->csr_era, + era_start, era_start + sizeof(u64)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->csr_badvaddr, + badvaddr_start, badvaddr_start + sizeof(u64)); + + return err; +} + + +/* + * Get the general floating-point registers. + */ +static int gfpr_get(struct task_struct *target, struct membuf *to) +{ + return membuf_write(to, &target->thread.fpu.fpr, + sizeof(elf_fpreg_t) * NUM_FPU_REGS); +} + +static int gfpr_get_simd(struct task_struct *target, struct membuf *to) +{ + int i, r; + u64 fpr_val; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS; i++) { + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); + r = membuf_write(to, &fpr_val, sizeof(elf_fpreg_t)); + } + + return r; +} + +/* + * Choose the appropriate helper for general registers, and then copy + * the FCC and FCSR registers separately. + */ +static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + r = gfpr_get(target, &to); + else + r = gfpr_get_simd(target, &to); + + r = membuf_write(&to, &target->thread.fpu.fcc, sizeof(target->thread.fpu.fcc)); + r = membuf_write(&to, &target->thread.fpu.fcsr, sizeof(target->thread.fpu.fcsr)); + + return r; +} + +static int gfpr_set(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu.fpr, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +static int gfpr_set_simd(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + int i, err; + u64 fpr_val; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); + if (err) + return err; + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); + } + + return 0; +} + +/* + * Choose the appropriate helper for general registers, and then copy + * the FCC register separately. + */ +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fcc_end = fcc_start + sizeof(u64); + int err; + + BUG_ON(count % sizeof(elf_fpreg_t)); + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = gfpr_set(target, &pos, &count, &kbuf, &ubuf); + else + err = gfpr_set_simd(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + if (count > 0) + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcc, + fcc_start, fcc_end); + + return err; +} + +static int cfg_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int i, r; + u32 cfg_val; + + i = 0; + while (to.left > 0) { + cfg_val = read_cpucfg(i++); + r = membuf_write(&to, &cfg_val, sizeof(u32)); + } + + return r; +} + +/* + * CFG registers are read-only. + */ +static int cfg_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(n, r) {.name = #n, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + REG_OFFSET_NAME(r0, regs[0]), + REG_OFFSET_NAME(r1, regs[1]), + REG_OFFSET_NAME(r2, regs[2]), + REG_OFFSET_NAME(r3, regs[3]), + REG_OFFSET_NAME(r4, regs[4]), + REG_OFFSET_NAME(r5, regs[5]), + REG_OFFSET_NAME(r6, regs[6]), + REG_OFFSET_NAME(r7, regs[7]), + REG_OFFSET_NAME(r8, regs[8]), + REG_OFFSET_NAME(r9, regs[9]), + REG_OFFSET_NAME(r10, regs[10]), + REG_OFFSET_NAME(r11, regs[11]), + REG_OFFSET_NAME(r12, regs[12]), + REG_OFFSET_NAME(r13, regs[13]), + REG_OFFSET_NAME(r14, regs[14]), + REG_OFFSET_NAME(r15, regs[15]), + REG_OFFSET_NAME(r16, regs[16]), + REG_OFFSET_NAME(r17, regs[17]), + REG_OFFSET_NAME(r18, regs[18]), + REG_OFFSET_NAME(r19, regs[19]), + REG_OFFSET_NAME(r20, regs[20]), + REG_OFFSET_NAME(r21, regs[21]), + REG_OFFSET_NAME(r22, regs[22]), + REG_OFFSET_NAME(r23, regs[23]), + REG_OFFSET_NAME(r24, regs[24]), + REG_OFFSET_NAME(r25, regs[25]), + REG_OFFSET_NAME(r26, regs[26]), + REG_OFFSET_NAME(r27, regs[27]), + REG_OFFSET_NAME(r28, regs[28]), + REG_OFFSET_NAME(r29, regs[29]), + REG_OFFSET_NAME(r30, regs[30]), + REG_OFFSET_NAME(r31, regs[31]), + REG_OFFSET_NAME(csr_era, csr_era), + REG_OFFSET_NAME(csr_badvaddr, csr_badvaddr), + REG_OFFSET_NAME(csr_crmd, csr_crmd), + REG_OFFSET_NAME(csr_prmd, csr_prmd), + REG_OFFSET_NAME(csr_euen, csr_euen), + REG_OFFSET_NAME(csr_ecfg, csr_ecfg), + REG_OFFSET_NAME(csr_estat, csr_estat), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +enum loongarch_regset { + REGSET_GPR, + REGSET_FPR, + REGSET_CPUCFG, +}; + +static const struct user_regset loongarch64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .regset_get = gpr_get, + .set = gpr_set, + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .regset_get = fpr_get, + .set = fpr_set, + }, + [REGSET_CPUCFG] = { + .core_note_type = NT_LOONGARCH_CPUCFG, + .n = 64, + .size = sizeof(u32), + .align = sizeof(u32), + .regset_get = cfg_get, + .set = cfg_set, + }, +}; + +static const struct user_regset_view user_loongarch64_view = { + .name = "loongarch64", + .e_machine = ELF_ARCH, + .regsets = loongarch64_regsets, + .n = ARRAY_SIZE(loongarch64_regsets), +}; + + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_loongarch64_view; +} + +static inline int read_user(struct task_struct *target, unsigned long addr, + unsigned long __user *data) +{ + unsigned long tmp = 0; + + switch (addr) { + case 0 ... 31: + tmp = task_pt_regs(target)->regs[addr]; + break; + case PC: + tmp = task_pt_regs(target)->csr_era; + break; + case BADVADDR: + tmp = task_pt_regs(target)->csr_badvaddr; + break; + default: + return -EIO; + } + + return put_user(tmp, data); +} + +static inline int write_user(struct task_struct *target, unsigned long addr, + unsigned long data) +{ + switch (addr) { + case 0 ... 31: + task_pt_regs(target)->regs[addr] = data; + break; + case PC: + task_pt_regs(target)->csr_era = data; + break; + case BADVADDR: + task_pt_regs(target)->csr_badvaddr = data; + break; + default: + return -EIO; + } + + return 0; +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + unsigned long __user *datap = (void __user *) data; + + switch (request) { + case PTRACE_PEEKUSR: + ret = read_user(child, addr, datap); + break; + + case PTRACE_POKEUSR: + ret = write_user(child, addr, data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c new file mode 100644 index 000000000000..f39d911254a2 --- /dev/null +++ b/arch/loongarch/kernel/reset.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static void machine_hang(void) +{ + local_irq_disable(); + clear_csr_ecfg(ECFG0_IM); + + pr_notice("\n\n** You can safely turn off the power now **\n\n"); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + + while (true) { + __arch_cpu_idle(); + local_irq_disable(); + } +} + +void (*pm_restart)(void) = machine_hang; +void (*pm_power_off)(void) = machine_hang; + +EXPORT_SYMBOL(pm_power_off); + +void machine_halt(void) +{ + machine_hang(); +} + +void machine_power_off(void) +{ + pm_power_off(); +} + +void machine_restart(char *command) +{ + do_kernel_restart(command); + pm_restart(); +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c new file mode 100644 index 000000000000..5b3b8e5fb8c0 --- /dev/null +++ b/arch/loongarch/kernel/setup.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(unsigned long, kernelsp); +unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; +struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; + +EXPORT_SYMBOL(cpu_data); + +#ifdef CONFIG_VT +struct screen_info screen_info; +#endif + +/* + * Setup information + * + * These are initialized so they are in the .data section + */ + +char __initdata arcs_cmdline[COMMAND_LINE_SIZE]; +static char __initdata command_line[COMMAND_LINE_SIZE]; + +static int num_standard_resources; +static struct resource *standard_resources; + +static struct resource code_resource = { .name = "Kernel code", }; +static struct resource data_resource = { .name = "Kernel data", }; +static struct resource bss_resource = { .name = "Kernel bss", }; + +unsigned long __kaslr_offset __ro_after_init; +EXPORT_SYMBOL(__kaslr_offset); + +static void *detect_magic __initdata = detect_memory_region; + +void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max) +{ + void *dm = &detect_magic; + phys_addr_t size; + + for (size = sz_min; size < sz_max; size <<= 1) { + if (!memcmp(dm, dm + size, sizeof(detect_magic))) + break; + } + + pr_debug("Memory: %lluMB of RAM detected at 0x%llx (min: %lluMB, max: %lluMB)\n", + ((unsigned long long) size) / SZ_1M, + (unsigned long long) start, + ((unsigned long long) sz_min) / SZ_1M, + ((unsigned long long) sz_max) / SZ_1M); + + memblock_add(start, size); +} + +/* + * Manage initrd + */ +#ifdef CONFIG_BLK_DEV_INITRD + +static unsigned long __init init_initrd(void) +{ + if (!phys_initrd_start || !phys_initrd_size) + goto disable; + + initrd_start = (unsigned long)phys_to_virt(phys_initrd_start); + initrd_end = (unsigned long)phys_to_virt(phys_initrd_start + phys_initrd_size); + + if (!initrd_start || initrd_end <= initrd_start) + goto disable; + + if (initrd_start & ~PAGE_MASK) { + pr_err("initrd start must be page aligned\n"); + goto disable; + } + if (initrd_start < PAGE_OFFSET) { + pr_err("initrd start < PAGE_OFFSET\n"); + goto disable; + } + + ROOT_DEV = Root_RAM0; + + initrd_below_start_ok = 1; + memblock_reserve(phys_initrd_start, phys_initrd_size); + + pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n", + initrd_start, initrd_end - initrd_start); + + return 0; + +disable: + initrd_start = 0; + initrd_end = 0; + printk(KERN_CONT " - disabling initrd\n"); + + return 0; +} + +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') + size = memparse(endp + 1, NULL); + + if (start + size > PFN_PHYS(max_low_pfn)) { + pr_err("Initrd physical address is out of memory!"); + return 0; + } + + phys_initrd_start = start; + phys_initrd_size = size; + + return 0; +} +early_param("initrd", early_initrd); + +static int __init rd_start_early(char *p) +{ + unsigned long start = memparse(p, &p); + + phys_initrd_start = TO_PHYS(start); + + return 0; +} +early_param("rd_start", rd_start_early); + +static int __init rd_size_early(char *p) +{ + unsigned long size; + + size = memparse(p, &p); + phys_initrd_size = size; + + return 0; +} +early_param("rd_size", rd_size_early); + +#else /* !CONFIG_BLK_DEV_INITRD */ + +static unsigned long __init init_initrd(void) +{ + return 0; +} + +#endif + +static int usermem __initdata; + +static int __init early_parse_mem(char *p) +{ + phys_addr_t start, size; + + /* + * If a user specifies memory size, we + * blow away any automatically generated + * size. + */ + if (usermem == 0) { + usermem = 1; + memblock_remove(memblock_start_of_DRAM(), + memblock_end_of_DRAM() - memblock_start_of_DRAM()); + } + start = 0; + size = memparse(p, &p); + + memblock_add(start, size); + + return 0; +} +early_param("mem", early_parse_mem); + +static int __init early_parse_memmap(char *p) +{ + phys_addr_t start, size; + + /* + * If a user specifies memory size, we + * blow away any automatically generated + * size. + */ + if (usermem == 0) { + usermem = 1; + memblock_remove(memblock_start_of_DRAM(), + memblock_end_of_DRAM() - memblock_start_of_DRAM()); + } + size = memparse(p, &p); + if (*p == '@') + start = memparse(p + 1, &p); + else { + pr_err("\"memmap\" invalid format!\n"); + return -EINVAL; + } + + memblock_add(start, size); + +#ifdef CONFIG_CRASH_DUMP + if (start && size) { + crashmem_start = start; + crashmem_size = size; + } +#endif + + return 0; +} +early_param("memmap", early_parse_memmap); + +static void __init check_kernel_sections_mem(void) +{ + phys_addr_t start = __pa_symbol(&_text); + phys_addr_t size = __pa_symbol(&_end) - start; + + if (!memblock_is_region_memory(start, size)) { + pr_info("Kernel sections are not in the memory maps\n"); + memblock_add(start, size); + } +} + +static void __init bootcmdline_append(const char *s, size_t max) +{ + if (!s[0] || !max) + return; + + if (boot_command_line[0]) + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + + strlcat(boot_command_line, s, max); +} + +static void __init bootcmdline_init(char **cmdline_p) +{ + boot_command_line[0] = 0; + + /* + * Take arguments from the bootloader at first. Early code should have + * filled arcs_cmdline with arguments from the bootloader. + */ + bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE); + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); +} + +/* + * arch_mem_init - initialize memory management subsystem + */ +static void __init arch_mem_init(char **cmdline_p) +{ + /* call board setup routine */ + plat_mem_setup(); + memblock_set_bottom_up(true); + + if (usermem) + pr_info("User-defined physical RAM map overwrite\n"); + + check_kernel_sections_mem(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); + sparse_init(); + memblock_set_bottom_up(true); + + swiotlb_init(1); + + dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + + memblock_dump_all(); + + early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); +} + +static void __init resource_init(void) +{ + long i = 0; + size_t res_size; + struct resource *res; + struct memblock_region *region; + + code_resource.start = __pa_symbol(&_text); + code_resource.end = __pa_symbol(&_etext) - 1; + data_resource.start = __pa_symbol(&_etext); + data_resource.end = __pa_symbol(&_edata) - 1; + bss_resource.start = __pa_symbol(&__bss_start); + bss_resource.end = __pa_symbol(&__bss_stop) - 1; + + num_standard_resources = memblock.memory.cnt; + res_size = num_standard_resources * sizeof(*standard_resources); + standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES); + + for_each_mem_region(region) { + res = &standard_resources[i++]; + if (!memblock_is_nomap(region)) { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + } else { + res->name = "Reserved"; + res->flags = IORESOURCE_MEM; + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; + } + + request_resource(&iomem_resource, res); + + /* + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. + */ + request_resource(res, &code_resource); + request_resource(res, &data_resource); + request_resource(res, &bss_resource); + } +} + +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); + +void __init setup_arch(char **cmdline_p) +{ + cpu_probe(); + + early_init(); + bootcmdline_init(cmdline_p); + + init_initrd(); + platform_init(); + pagetable_init(); + + arch_mem_init(cmdline_p); + + resource_init(); + + paging_init(); +} diff --git a/arch/loongarch/kernel/signal-common.h b/arch/loongarch/kernel/signal-common.h new file mode 100644 index 000000000000..7cfb80e087cc --- /dev/null +++ b/arch/loongarch/kernel/signal-common.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __SIGNAL_COMMON_H +#define __SIGNAL_COMMON_H + +/* #define DEBUG_SIG */ + +#ifdef DEBUG_SIG +# define DEBUGP(fmt, args...) printk("%s: " fmt, __func__, ##args) +#else +# define DEBUGP(fmt, args...) +#endif + +/* + * Determine which stack to use.. + */ +extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + size_t frame_size); +/* Check and clear pending FPU exceptions in saved CSR */ +extern int fpcsr_pending(unsigned int __user *fpcsr); + +/* Make sure we will not lose FPU ownership */ +#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) +#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) + +/* Assembly functions to move context to/from the FPU */ +extern asmlinkage int +_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +#endif /* __SIGNAL_COMMON_H */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c new file mode 100644 index 000000000000..286291db7b36 --- /dev/null +++ b/arch/loongarch/kernel/signal.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Author: Hanlu Li +* Author: Huacai Chen +* Copyright (C) 2020 Loongson Technology Corporation Limited +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "signal-common.h" + +static int (*save_fp_context)(struct sigcontext __user *sc); +static int (*restore_fp_context)(struct sigcontext __user *sc); + +struct rt_sigframe { + u32 rs_ass[4]; /* argument save space for o32 */ + u32 rs_pad[2]; /* Was: signal trampoline */ + struct siginfo rs_info; + struct ucontext rs_uctx; +}; + +/* + * Thread saved context copy to/from a signal context presumed to be on the + * user stack, and therefore accessed with appropriate macros from uaccess.h. + */ +static int copy_fp_to_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *csr = &sc->sc_fcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= + __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + &fpregs[4*i]); + } + err |= __put_user(current->thread.fpu.fcsr, csr); + err |= __put_user(current->thread.fpu.fcc, fcc); + + return err; +} + +static int copy_fp_from_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + u64 fpr_val; + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *csr = &sc->sc_fcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= __get_user(fpr_val, &fpregs[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + } + err |= __get_user(current->thread.fpu.fcsr, csr); + err |= __get_user(current->thread.fpu.fcc, fcc); + + return err; +} + +/* + * Wrappers for the assembly _{save,restore}_fp_context functions. + */ +static int save_hw_fp_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _save_fp_context(fpregs, fcc, fcsr); +} + +static int restore_hw_fp_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *csr = &sc->sc_fcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _restore_fp_context(fpregs, fcc, csr); +} + + +/* + * Helper routines + */ +static int protected_save_fp_context(struct sigcontext __user *sc) +{ + int err = 0; + unsigned int used; + uint32_t __user *fcc = &sc->sc_fcsr; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint32_t __user *flags = &sc->sc_flags; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + used = used_math() ? USED_FP : 0; + if (!used) + goto fp_done; + + while (1) { + lock_fpu_owner(); + if (is_fpu_owner()) + err = save_fp_context(sc); + else + err |= copy_fp_to_sigcontext(sc); +finish: + unlock_fpu_owner(); + if (likely(!err)) + break; + /* touch the sigcontext and try again */ + err = __put_user(0, &fpregs[0]) | + __put_user(0, &fpregs[32*4 - 1]) | + __put_user(0, fcc) | + __put_user(0, fcsr) | + __put_user(0, vcsr); + if (err) + return err; /* really bad sigcontext */ + } + +fp_done: + return __put_user(used, flags); +} + +static int protected_restore_fp_context(struct sigcontext __user *sc) +{ + unsigned int used; + int err = 0, sig = 0, tmp __maybe_unused; + uint32_t __user *fcc = &sc->sc_fcsr; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint32_t __user *flags = &sc->sc_flags; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + err = __get_user(used, flags); + conditional_used_math(used & USED_FP); + + /* + * The signal handler may have used FPU; give it up if the program + * doesn't want it following sigreturn. + */ + if (err || !(used & USED_FP)) + lose_fpu(0); + + if (err) + return err; + + if (!(used & USED_FP)) + goto fp_done; + + err = sig = fpcsr_pending(fcsr); + if (err < 0) + return err; + + err = 0; + + while (1) { + lock_fpu_owner(); + if (is_fpu_owner()) + err = restore_fp_context(sc); + else + err |= copy_fp_from_sigcontext(sc); +finish: + unlock_fpu_owner(); + if (likely(!err)) + break; + /* touch the sigcontext and try again */ + err = __get_user(tmp, &fpregs[0]) | + __get_user(tmp, &fpregs[32*4 - 1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr) | + __get_user(tmp, vcsr); + if (err) + break; /* really bad sigcontext */ + } + +fp_done: + return err ?: sig; +} + +static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +{ + int i, err = 0; + unsigned int sc_flags = 0; + + err |= __put_user(regs->csr_era, &sc->sc_pc); + + err |= __put_user(0, &sc->sc_regs[0]); + for (i = 1; i < 32; i++) + err |= __put_user(regs->regs[i], &sc->sc_regs[i]); + + /* + * Save FPU state to signal context. Signal handler + * will "inherit" current FPU state. + */ + err |= protected_save_fp_context(sc); + + if (current->thread.error_code == 1) { + err |= __get_user(sc_flags, &sc->sc_flags); + sc_flags |= ADRERR_RD; + err |= __put_user(sc_flags, &sc->sc_flags); + } else if (current->thread.error_code == 2) { + err |= __get_user(sc_flags, &sc->sc_flags); + sc_flags |= ADRERR_WR; + err |= __put_user(sc_flags, &sc->sc_flags); + } + + return err; +} + +int fpcsr_pending(unsigned int __user *fpcsr) +{ + int err, sig = 0; + unsigned int csr, enabled; + + err = __get_user(csr, fpcsr); + enabled = ((csr & FPU_CSR_ALL_E) << 24); + /* + * If the signal handler set some FPU exceptions, clear it and + * send SIGFPE. + */ + if (csr & enabled) { + csr &= ~enabled; + err |= __put_user(csr, fpcsr); + sig = SIGFPE; + } + return err ?: sig; +} + +static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +{ + int i, err = 0; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + err |= __get_user(regs->csr_era, &sc->sc_pc); + + for (i = 1; i < 32; i++) + err |= __get_user(regs->regs[i], &sc->sc_regs[i]); + + return err ?: protected_restore_fp_context(sc); +} + +void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + size_t frame_size) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->regs[3]; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user __force *)(-1UL); + + sp = sigsp(sp, ksig); + + return (void __user *)((sp - frame_size) & ALMASK); +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ + +asmlinkage long sys_rt_sigreturn(void) +{ + int sig; + sigset_t set; + struct pt_regs *regs; + struct rt_sigframe __user *frame; + + regs = current_pt_regs(); + frame = (struct rt_sigframe __user *)regs->regs[3]; + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->rs_uctx.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + sig = restore_sigcontext(regs, &frame->rs_uctx.uc_mcontext); + if (sig < 0) + goto badframe; + else if (sig) + force_sig(sig); + + regs->regs[0] = 0; /* No syscall restarting */ + if (restore_altstack(&frame->rs_uctx.uc_stack)) + goto badframe; + + return regs->regs[4]; + +badframe: + force_sig(SIGSEGV); + return 0; +} + +static int setup_rt_frame(void *sig_return, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *set) +{ + int err = 0; + struct rt_sigframe __user *frame; + + frame = get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(frame, sizeof (*frame))) + return -EFAULT; + + /* Create siginfo. */ + err |= copy_siginfo_to_user(&frame->rs_info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->rs_uctx.uc_flags); + err |= __put_user(NULL, &frame->rs_uctx.uc_link); + err |= __save_altstack(&frame->rs_uctx.uc_stack, regs->regs[3]); + err |= setup_sigcontext(regs, &frame->rs_uctx.uc_mcontext); + err |= __copy_to_user(&frame->rs_uctx.uc_sigmask, set, sizeof(*set)); + + if (err) + return -EFAULT; + + /* + * Arguments to signal handler: + * + * a0 = signal number + * a1 = pointer to siginfo + * a2 = pointer to ucontext + * + * c0_era point to the signal handler, $r3 (sp) points to + * the struct rt_sigframe. + */ + regs->regs[4] = ksig->sig; + regs->regs[5] = (unsigned long) &frame->rs_info; + regs->regs[6] = (unsigned long) &frame->rs_uctx; + regs->regs[3] = (unsigned long) frame; + regs->regs[1] = (unsigned long) sig_return; + regs->csr_era = (unsigned long) ksig->ka.sa.sa_handler; + + DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", + current->comm, current->pid, + frame, regs->csr_era, regs->regs[1]); + + return 0; +} + +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) +{ + int ret; + sigset_t *oldset = sigmask_to_save(); + void *vdso = current->mm->context.vdso; + + /* Are we from a system call? */ + if (regs->regs[0]) { + switch (regs->regs[4]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->regs[4] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + regs->regs[4] = -EINTR; + break; + } + fallthrough; + case -ERESTARTNOINTR: + regs->regs[4] = regs->orig_a0; + regs->csr_era -= 4; + } + + regs->regs[0] = 0; /* Don't deal with this again. */ + } + + rseq_signal_deliver(ksig, regs); + + ret = setup_rt_frame(vdso + current->thread.vdso->offset_sigreturn, ksig, regs, oldset); + + signal_setup_done(ret, ksig, 0); +} + +void arch_do_signal(struct pt_regs *regs) +{ + struct ksignal ksig; + + if (get_signal(&ksig)) { + /* Whee! Actually deliver the signal. */ + handle_signal(&ksig, regs); + return; + } + + /* Are we from a system call? */ + if (regs->regs[0]) { + switch (regs->regs[4]) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->regs[4] = regs->orig_a0; + regs->csr_era -= 4; + break; + + case -ERESTART_RESTARTBLOCK: + regs->regs[4] = regs->orig_a0; + regs->regs[11] = __NR_restart_syscall; + regs->csr_era -= 4; + break; + } + regs->regs[0] = 0; /* Don't deal with this again. */ + } + + /* + * If there's no signal to deliver, we just put the saved sigmask + * back + */ + restore_saved_sigmask(); +} + +static int signal_setup(void) +{ + if (cpu_has_fpu) { + save_fp_context = save_hw_fp_context; + restore_fp_context = restore_hw_fp_context; + } else { + save_fp_context = copy_fp_to_sigcontext; + restore_fp_context = copy_fp_from_sigcontext; + } + + return 0; +} + +arch_initcall(signal_setup); diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S new file mode 100644 index 000000000000..ac145f7c1583 --- /dev/null +++ b/arch/loongarch/kernel/switch.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include + +/* + * task_struct *__switch_to(task_struct *prev, task_struct *next, + * struct thread_info *next_ti) + */ + .align 5 +SYM_FUNC_START(__switch_to) + csrrd t1, LOONGARCH_CSR_PRMD + stptr.d t1, a0, THREAD_CSRPRMD + + cpu_save_nonscratch a0 + stptr.d ra, a0, THREAD_REG01 + move tp, a2 + cpu_restore_nonscratch a1 + + li.w t0, _THREAD_SIZE - 32 + PTR_ADDU t0, t0, tp + set_saved_sp t0, t1, t2 + + ldptr.d t1, a1, THREAD_CSRPRMD + csrwr t1, LOONGARCH_CSR_PRMD + + jr ra +SYM_FUNC_END(__switch_to) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c new file mode 100644 index 000000000000..105c370a8c22 --- /dev/null +++ b/arch/loongarch/kernel/syscall.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, + prot, unsigned long, flags, unsigned long, fd, off_t, offset) +{ + if (offset & ~PAGE_MASK) + return -EINVAL; + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); +} + +void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include +}; + +typedef long (*sys_call_fn)(unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long, unsigned long); + +void noinstr do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + sys_call_fn syscall_fn; + + nr = regs->regs[11]; + /* Set for syscall restarting */ + if (nr < NR_syscalls) + regs->regs[0] = nr + 1; + + regs->csr_era += 4; + regs->orig_a0 = regs->regs[4]; + regs->regs[4] = -ENOSYS; + + nr = syscall_enter_from_user_mode(regs, nr); + + if (nr < NR_syscalls) { + syscall_fn = sys_call_table[nr]; + regs->regs[4] = syscall_fn(regs->orig_a0, regs->regs[5], regs->regs[6], + regs->regs[7], regs->regs[8], regs->regs[9]); + } + + syscall_exit_to_user_mode(regs); +} diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c new file mode 100644 index 000000000000..cd86575dcedc --- /dev/null +++ b/arch/loongarch/kernel/time.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Common time service routines for LoongArch machines. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +u64 cpu_clock_freq; +EXPORT_SYMBOL(cpu_clock_freq); +u64 const_clock_freq; +EXPORT_SYMBOL(const_clock_freq); + +static DEFINE_RAW_SPINLOCK(state_lock); +static DEFINE_PER_CPU(struct clock_event_device, constant_clockevent_device); + +static void constant_event_handler(struct clock_event_device *dev) +{ +} + +irqreturn_t constant_timer_interrupt(int irq, void *data) +{ + int cpu = smp_processor_id(); + struct clock_event_device *cd; + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + cd = &per_cpu(constant_clockevent_device, cpu); + cd->event_handler(cd); + + return IRQ_HANDLED; +} + +static int constant_set_state_oneshot(struct clock_event_device *evt) +{ + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + timer_config = csr_readq(LOONGARCH_CSR_TCFG); + timer_config |= CSR_TCFG_EN; + timer_config &= ~CSR_TCFG_PERIOD; + csr_writeq(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +{ + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + timer_config = csr_readq(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; + csr_writeq(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_periodic(struct clock_event_device *evt) +{ + unsigned long period; + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + csr_writeq(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_shutdown(struct clock_event_device *evt) +{ + return 0; +} + +static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) +{ + unsigned long timer_config; + + delta &= CSR_TCFG_VAL; + timer_config = delta | CSR_TCFG_EN; + csr_writeq(timer_config, LOONGARCH_CSR_TCFG); + + return 0; +} + +static unsigned long __init get_loops_per_jiffy(void) +{ + unsigned long lpj = (unsigned long)const_clock_freq; + + do_div(lpj, HZ); + + return lpj; +} + +static long init_timeval; + +void sync_counter(void) +{ + /* Ensure counter begin at 0 */ + csr_writeq(-init_timeval, LOONGARCH_CSR_CNTC); +} + +int constant_clockevent_init(void) +{ + unsigned int irq; + unsigned int cpu = smp_processor_id(); + unsigned long min_delta = 0x600; + unsigned long max_delta = (1UL << 48) - 1; + struct clock_event_device *cd; + static int timer_irq_installed; + + irq = get_timer_irq(); + + cd = &per_cpu(constant_clockevent_device, cpu); + + cd->name = "Constant"; + cd->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_PERCPU; + + cd->irq = irq; + cd->rating = 320; + cd->cpumask = cpumask_of(cpu); + cd->set_state_oneshot = constant_set_state_oneshot; + cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_periodic = constant_set_state_periodic; + cd->set_state_shutdown = constant_set_state_shutdown; + cd->set_next_event = constant_timer_next_event; + cd->event_handler = constant_event_handler; + + clockevents_config_and_register(cd, const_clock_freq, min_delta, max_delta); + + if (timer_irq_installed) + return 0; + + timer_irq_installed = 1; + + sync_counter(); + + if (request_irq(irq, constant_timer_interrupt, IRQF_PERCPU | IRQF_TIMER, "timer", NULL)) + pr_err("Failed to request irq %d (timer)\n", irq); + + lpj_fine = get_loops_per_jiffy(); + pr_info("Constant clock event device register\n"); + + return 0; +} + +static u64 read_const_counter(struct clocksource *clk) +{ + return drdtime(); +} + +static u64 native_sched_clock(void) +{ + return read_const_counter(NULL); +} + +static struct clocksource clocksource_const = { + .name = "Constant", + .rating = 400, + .read = read_const_counter, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mult = 0, + .shift = 10, +}; + +int __init constant_clocksource_init(void) +{ + int res; + unsigned long freq; + + freq = const_clock_freq; + + clocksource_const.mult = + clocksource_hz2mult(freq, clocksource_const.shift); + + res = clocksource_register_hz(&clocksource_const, freq); + + sched_clock_register(native_sched_clock, 64, freq); + + pr_info("Constant clock source device register\n"); + + return res; +} + +void __init time_init(void) +{ + if (!cpu_has_cpucfg) + const_clock_freq = cpu_clock_freq; + else + const_clock_freq = calc_const_freq(); + + init_timeval = drdtime() - csr_readq(LOONGARCH_CSR_CNTC); + + constant_clockevent_init(); + constant_clocksource_init(); +} diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c new file mode 100644 index 000000000000..3b2cbb95875b --- /dev/null +++ b/arch/loongarch/kernel/topology.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +static struct cpu cpu_device; + +static int __init topology_init(void) +{ + return register_cpu(&cpu_device, 0); +} + +subsys_initcall(topology_init); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c new file mode 100644 index 000000000000..1919db64eb3d --- /dev/null +++ b/arch/loongarch/kernel/traps.c @@ -0,0 +1,721 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "access-helper.h" + +extern asmlinkage void handle_ade(void); +extern asmlinkage void handle_ale(void); +extern asmlinkage void handle_sys(void); +extern asmlinkage void handle_bp(void); +extern asmlinkage void handle_ri(void); +extern asmlinkage void handle_fpu(void); +extern asmlinkage void handle_fpe(void); +extern asmlinkage void handle_lbt(void); +extern asmlinkage void handle_lsx(void); +extern asmlinkage void handle_lasx(void); +extern asmlinkage void handle_reserved(void); +extern asmlinkage void handle_watch(void); +extern asmlinkage void handle_vint(void); + +static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, + const char *loglvl, bool user) +{ + unsigned long addr; + unsigned long *sp = (unsigned long *)(regs->regs[3] & ~3); + + printk("%sCall Trace:", loglvl); +#ifdef CONFIG_KALLSYMS + printk("%s\n", loglvl); +#endif + while (!kstack_end(sp)) { + if (__get_addr(&addr, sp++, user)) { + printk("%s (Bad stack address)", loglvl); + break; + } + if (__kernel_text_address(addr)) + print_ip_sym(loglvl, addr); + } + printk("%s\n", loglvl); +} + +static void show_stacktrace(struct task_struct *task, + const struct pt_regs *regs, const char *loglvl, bool user) +{ + int i; + const int field = 2 * sizeof(unsigned long); + unsigned long stackdata; + unsigned long *sp = (unsigned long *)regs->regs[3]; + + printk("%sStack :", loglvl); + i = 0; + while ((unsigned long) sp & (PAGE_SIZE - 1)) { + if (i && ((i % (64 / field)) == 0)) { + pr_cont("\n"); + printk("%s ", loglvl); + } + if (i > 39) { + pr_cont(" ..."); + break; + } + + if (__get_addr(&stackdata, sp++, user)) { + pr_cont(" (Bad stack address)"); + break; + } + + pr_cont(" %0*lx", field, stackdata); + i++; + } + pr_cont("\n"); + show_backtrace(task, regs, loglvl, user); +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + struct pt_regs regs; + + regs.csr_crmd = 0; + if (sp) { + regs.csr_era = 0; + regs.regs[1] = 0; + regs.regs[3] = (unsigned long)sp; + } else { + if (!task || task == current) + prepare_frametrace(®s); + else { + regs.csr_era = task->thread.reg01; + regs.regs[1] = 0; + regs.regs[3] = task->thread.reg03; + regs.regs[22] = task->thread.reg22; + } + } + + show_stacktrace(task, ®s, loglvl, false); +} + +static void show_code(void *pc, bool user) +{ + long i; + unsigned int insn; + + printk("Code:"); + + for (i = -3 ; i < 6 ; i++) { + if (__get_inst(&insn, pc + i, user)) { + pr_cont(" (Bad address in era)\n"); + break; + } + pr_cont("%c%08x%c", (i?' ':'<'), insn, (i?' ':'>')); + } + pr_cont("\n"); +} + +static void __show_regs(const struct pt_regs *regs) +{ + const int field = 2 * sizeof(unsigned long); + unsigned int excsubcode; + unsigned int exccode; + int i; + + show_regs_print_info(KERN_DEFAULT); + + /* + * Saved main processor registers + */ + for (i = 0; i < 32; ) { + if ((i % 4) == 0) + printk("$%2d :", i); + pr_cont(" %0*lx", field, regs->regs[i]); + + i++; + if ((i % 4) == 0) + pr_cont("\n"); + } + + /* + * Saved csr registers + */ + printk("era : %0*lx %pS\n", field, regs->csr_era, + (void *) regs->csr_era); + printk("ra : %0*lx %pS\n", field, regs->regs[1], + (void *) regs->regs[1]); + + printk("CSR crmd: %08lx ", regs->csr_crmd); + printk("CSR prmd: %08lx ", regs->csr_prmd); + printk("CSR euen: %08lx ", regs->csr_euen); + printk("CSR ecfg: %08lx ", regs->csr_ecfg); + printk("CSR estat: %08lx ", regs->csr_estat); + + pr_cont("\n"); + + exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT; + printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode); + + if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE) + printk("BadVA : %0*lx\n", field, regs->csr_badvaddr); + + printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0), + cpu_family_string()); +} + +void show_regs(struct pt_regs *regs) +{ + __show_regs((struct pt_regs *)regs); + dump_stack(); +} + +void show_registers(struct pt_regs *regs) +{ + __show_regs(regs); + print_modules(); + printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", + current->comm, current->pid, current_thread_info(), current); + + show_stacktrace(current, regs, KERN_DEFAULT, user_mode(regs)); + show_code((void *)regs->csr_era, user_mode(regs)); + printk("\n"); +} + +static DEFINE_RAW_SPINLOCK(die_lock); + +void __noreturn die(const char *str, struct pt_regs *regs) +{ + static int die_counter; + int sig = SIGSEGV; + + oops_enter(); + + if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr, + SIGSEGV) == NOTIFY_STOP) + sig = 0; + + console_verbose(); + raw_spin_lock_irq(&die_lock); + bust_spinlocks(1); + + printk("%s[#%d]:\n", str, ++die_counter); + show_registers(regs); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + raw_spin_unlock_irq(&die_lock); + + oops_exit(); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) + panic("Fatal exception"); + + do_exit(sig); +} + +static inline void setup_vint_size(unsigned int size) +{ + unsigned int vs; + + vs = ilog2(size/4); + + if (vs == 0 || vs > 7) + panic("vint_size %d Not support yet", vs); + + csr_xchgl(vs<mm); + vma = find_vma(current->mm, (unsigned long)fault_addr); + if (vma && (vma->vm_start <= (unsigned long)fault_addr)) + si_code = SEGV_ACCERR; + else + si_code = SEGV_MAPERR; + mmap_read_unlock(current->mm); + force_sig_fault(SIGSEGV, si_code, fault_addr); + return 1; + + default: + force_sig(sig); + return 1; + } +} + +/* + * Delayed fp exceptions when doing a lazy ctx switch + */ +asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr) +{ + int sig; + void __user *fault_addr; + irqentry_state_t state = irqentry_enter(regs); + + if (notify_die(DIE_FP, "FP exception", regs, 0, current->thread.trap_nr, + SIGFPE) == NOTIFY_STOP) + goto out; + + /* Clear FCSR.Cause before enabling interrupts */ + write_fcsr(LOONGARCH_FCSR0, fcsr & ~mask_fcsr_x(fcsr)); + local_irq_enable(); + + die_if_kernel("FP exception in kernel code", regs); + + sig = SIGFPE; + fault_addr = (void __user *) regs->csr_era; + + /* Send a signal if required. */ + process_fpemu_return(sig, fault_addr, fcsr); + +out: + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_bp(struct pt_regs *regs) +{ + bool user = user_mode(regs); + unsigned int opcode, bcode; + unsigned long era = exception_era(regs); + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + current->thread.trap_nr = read_csr_excode(); + if (__get_inst(&opcode, (u32 *)era, user)) + goto out_sigsegv; + + bcode = (opcode & 0x7fff); + + /* + * notify the kprobe handlers, if instruction is likely to + * pertain to them. + */ + switch (bcode) { + case BRK_KPROBE_BP: + if (notify_die(DIE_BREAK, "Kprobe", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_KPROBE_SSTEPBP: + if (notify_die(DIE_SSTEPBP, "Kprobe_SingleStep", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_UPROBE_BP: + if (notify_die(DIE_UPROBE, "Uprobe", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_UPROBE_XOLBP: + if (notify_die(DIE_UPROBE_XOL, "Uprobe_XOL", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + default: + if (notify_die(DIE_TRAP, "Break", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + } + + switch (bcode) { + case BRK_BUG: + die_if_kernel("Kernel bug detected", regs); + force_sig(SIGTRAP); + break; + case BRK_DIVZERO: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->csr_era); + break; + case BRK_OVERFLOW: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->csr_era); + break; + default: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->csr_era); + break; + } + +out: + local_irq_disable(); + irqentry_exit(regs, state); + return; + +out_sigsegv: + force_sig(SIGSEGV); + goto out; +} + +asmlinkage void noinstr do_watch(struct pt_regs *regs) +{ + pr_warn("Hardware watch point handler not implemented!\n"); +} + +asmlinkage void noinstr do_ri(struct pt_regs *regs) +{ + int status = -1; + unsigned int opcode = 0; + unsigned int __user *era = (unsigned int __user *)exception_era(regs); + unsigned long old_era = regs->csr_era; + unsigned long old_ra = regs->regs[1]; + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + current->thread.trap_nr = read_csr_excode(); + + if (notify_die(DIE_RI, "RI Fault", regs, 0, current->thread.trap_nr, + SIGILL) == NOTIFY_STOP) + goto out; + + die_if_kernel("Reserved instruction in kernel code", regs); + + if (unlikely(compute_return_era(regs) < 0)) + goto out; + + if (unlikely(get_user(opcode, era) < 0)) { + status = SIGSEGV; + current->thread.error_code = 1; + } + + if (status < 0) + status = SIGILL; + + if (unlikely(status > 0)) { + regs->csr_era = old_era; /* Undo skip-over. */ + regs->regs[1] = old_ra; + force_sig(status); + } + +out: + local_irq_disable(); + irqentry_exit(regs, state); +} + +static void init_restore_fp(void) +{ + if (!used_math()) { + /* First time FP context user. */ + init_fpu(); + } else { + /* This task has formerly used the FP context */ + if (!is_fpu_owner()) + own_fpu_inatomic(1); + } + + BUG_ON(!is_fp_enabled()); +} + +asmlinkage void noinstr do_fpu(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + die_if_kernel("do_fpu invoked from kernel context!", regs); + + preempt_disable(); + init_restore_fp(); + preempt_enable(); + + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lsx(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lasx(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lbt(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_reserved(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + /* + * Game over - no way to handle this if it ever occurs. Most probably + * caused by a new unknown cpu type or after another deadly + * hard/software error. + */ + local_irq_enable(); + show_regs(regs); + panic("Caught reserved exception %u - should not happen.", read_csr_excode()); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void cache_parity_error(void) +{ + const int field = 2 * sizeof(unsigned long); + unsigned int reg_val; + + /* For the moment, report the problem and hang. */ + printk("Cache error exception:\n"); + printk("csr_merrera == %0*llx\n", field, csr_readq(LOONGARCH_CSR_MERRERA)); + reg_val = csr_readl(LOONGARCH_CSR_MERRCTL); + printk("csr_merrctl == %08x\n", reg_val); + + printk("Decoded c0_cacheerr: %s cache fault in %s reference.\n", + reg_val & (1<<30) ? "secondary" : "primary", + reg_val & (1<<31) ? "data" : "insn"); + if (((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_LOONGSON)) { + pr_err("Error bits: %s%s%s%s%s%s%s%s\n", + reg_val & (1<<29) ? "ED " : "", + reg_val & (1<<28) ? "ET " : "", + reg_val & (1<<27) ? "ES " : "", + reg_val & (1<<26) ? "EE " : "", + reg_val & (1<<25) ? "EB " : "", + reg_val & (1<<24) ? "EI " : "", + reg_val & (1<<23) ? "E1 " : "", + reg_val & (1<<22) ? "E0 " : ""); + } else { + pr_err("Error bits: %s%s%s%s%s%s%s\n", + reg_val & (1<<29) ? "ED " : "", + reg_val & (1<<28) ? "ET " : "", + reg_val & (1<<26) ? "EE " : "", + reg_val & (1<<25) ? "EB " : "", + reg_val & (1<<24) ? "EI " : "", + reg_val & (1<<23) ? "E1 " : "", + reg_val & (1<<22) ? "E0 " : ""); + } + printk("IDX: 0x%08x\n", reg_val & ((1<<22)-1)); + + panic("Can't handle the cache error!"); +} + +asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) +{ + register int cpu; + register unsigned long stack; + irqentry_state_t state = irqentry_enter(regs); + + cpu = smp_processor_id(); + + if (on_irq_stack(cpu, sp)) + handle_arch_irq(regs); + else { + stack = per_cpu(irq_stack, cpu) + IRQ_STACK_START; + + /* Save task's sp on IRQ stack for unwinding */ + *(unsigned long *)stack = sp; + + __asm__ __volatile__( + "move $s0, $sp \n" /* Preserve sp */ + "move $sp, %[stk] \n" /* Switch stack */ + "move $a0, %[regs] \n" + "la $t0, handle_arch_irq \n" + "ld.d $t1, $t0, 0 \n" + "jirl $ra, $t1, 0 \n" + "move $sp, $s0 \n" /* Restore sp */ + : /* No outputs */ + : [stk] "r" (stack), [regs] "r" (regs) + : "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", "$s0", + "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", + "memory"); + } + + irqentry_exit(regs, state); +} + +extern void tlb_init(void); +extern void cache_error_setup(void); + +unsigned long eentry; +EXPORT_SYMBOL_GPL(eentry); +unsigned long tlbrentry; +EXPORT_SYMBOL_GPL(tlbrentry); + +long exception_handlers[VECSIZE * 128 / sizeof(long)] __aligned(SZ_64K); + +static void configure_exception_vector(void) +{ + eentry = (unsigned long)exception_handlers; + tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; + + csr_writeq(eentry, LOONGARCH_CSR_EENTRY); + csr_writeq(eentry, LOONGARCH_CSR_MERRENTRY); + csr_writeq(tlbrentry, LOONGARCH_CSR_TLBRENTRY); +} + +void per_cpu_trap_init(int cpu) +{ + unsigned int i; + + setup_vint_size(VECSIZE); + + configure_exception_vector(); + + if (!cpu_data[cpu].asid_cache) + cpu_data[cpu].asid_cache = asid_first_version(cpu); + + mmgrab(&init_mm); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); + + /* Initialise exception handlers */ + if (cpu == 0) + for (i = 0; i < 64; i++) + set_handler(i * VECSIZE, handle_reserved, VECSIZE); + + tlb_init(); + cpu_cache_init(); +} + +/* Install CPU exception handler */ +void set_handler(unsigned long offset, void *addr, unsigned long size) +{ + memcpy((void *)(eentry + offset), addr, size); + local_flush_icache_range(eentry + offset, eentry + offset + size); +} + +static const char panic_null_cerr[] = + "Trying to set NULL cache error exception handler\n"; + +/* + * Install uncached CPU exception handler. + * This is suitable only for the cache error exception which is the only + * exception handler that is being run uncached. + */ +void set_merr_handler(unsigned long offset, void *addr, unsigned long size) +{ + unsigned long uncached_eentry = TO_UNCAC(__pa(eentry)); + + if (!addr) + panic(panic_null_cerr); + + memcpy((void *)(uncached_eentry + offset), addr, size); +} + +void __init trap_init(void) +{ + long i; + + /* Set interrupt vector handler */ + for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++) + set_handler(i * VECSIZE, handle_vint, VECSIZE); + + set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); + set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); + set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); + set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); + set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); + set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); + set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); + set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); + set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); + set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); + set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); + set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + + cache_error_setup(); + + local_flush_icache_range(eentry, eentry + 0x400); +} diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c new file mode 100644 index 000000000000..88064deb6b68 --- /dev/null +++ b/arch/loongarch/kernel/unaligned.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * Handle unaligned accesses by emulation. + * + * This file contains exception handler for address error exception with the + * special capability to execute faulting instructions in software. The + * handler does not try to handle the case when the program counter points + * to an address not aligned to a word boundary. + * + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "access-helper.h" + +static inline void write_fpr(unsigned int fd, unsigned long value) +{ +#define WRITE_FPR(fd, value) \ +{ \ + __asm__ __volatile__( \ + "movgr2fr.d $f%1, %0\n\t" \ + :: "r"(value), "i"(fd)); \ +} + + switch (fd) { + case 0: + WRITE_FPR(0, value); + break; + case 1: + WRITE_FPR(1, value); + break; + case 2: + WRITE_FPR(2, value); + break; + case 3: + WRITE_FPR(3, value); + break; + case 4: + WRITE_FPR(4, value); + break; + case 5: + WRITE_FPR(5, value); + break; + case 6: + WRITE_FPR(6, value); + break; + case 7: + WRITE_FPR(7, value); + break; + case 8: + WRITE_FPR(8, value); + break; + case 9: + WRITE_FPR(9, value); + break; + case 10: + WRITE_FPR(10, value); + break; + case 11: + WRITE_FPR(11, value); + break; + case 12: + WRITE_FPR(12, value); + break; + case 13: + WRITE_FPR(13, value); + break; + case 14: + WRITE_FPR(14, value); + break; + case 15: + WRITE_FPR(15, value); + break; + case 16: + WRITE_FPR(16, value); + break; + case 17: + WRITE_FPR(17, value); + break; + case 18: + WRITE_FPR(18, value); + break; + case 19: + WRITE_FPR(19, value); + break; + case 20: + WRITE_FPR(20, value); + break; + case 21: + WRITE_FPR(21, value); + break; + case 22: + WRITE_FPR(22, value); + break; + case 23: + WRITE_FPR(23, value); + break; + case 24: + WRITE_FPR(24, value); + break; + case 25: + WRITE_FPR(25, value); + break; + case 26: + WRITE_FPR(26, value); + break; + case 27: + WRITE_FPR(27, value); + break; + case 28: + WRITE_FPR(28, value); + break; + case 29: + WRITE_FPR(29, value); + break; + case 30: + WRITE_FPR(30, value); + break; + case 31: + WRITE_FPR(31, value); + break; + default: + panic("unexpected fd '%d'", fd); + } +#undef WRITE_FPR +} + +static inline unsigned long read_fpr(unsigned int fd) +{ +#define READ_FPR(fd, __value) \ +{ \ + __asm__ __volatile__( \ + "movfr2gr.d\t%0, $f%1\n\t" \ + : "=r"(__value) : "i"(fd)); \ +} + + unsigned long __value; + + switch (fd) { + case 0: + READ_FPR(0, __value); + break; + case 1: + READ_FPR(1, __value); + break; + case 2: + READ_FPR(2, __value); + break; + case 3: + READ_FPR(3, __value); + break; + case 4: + READ_FPR(4, __value); + break; + case 5: + READ_FPR(5, __value); + break; + case 6: + READ_FPR(6, __value); + break; + case 7: + READ_FPR(7, __value); + break; + case 8: + READ_FPR(8, __value); + break; + case 9: + READ_FPR(9, __value); + break; + case 10: + READ_FPR(10, __value); + break; + case 11: + READ_FPR(11, __value); + break; + case 12: + READ_FPR(12, __value); + break; + case 13: + READ_FPR(13, __value); + break; + case 14: + READ_FPR(14, __value); + break; + case 15: + READ_FPR(15, __value); + break; + case 16: + READ_FPR(16, __value); + break; + case 17: + READ_FPR(17, __value); + break; + case 18: + READ_FPR(18, __value); + break; + case 19: + READ_FPR(19, __value); + break; + case 20: + READ_FPR(20, __value); + break; + case 21: + READ_FPR(21, __value); + break; + case 22: + READ_FPR(22, __value); + break; + case 23: + READ_FPR(23, __value); + break; + case 24: + READ_FPR(24, __value); + break; + case 25: + READ_FPR(25, __value); + break; + case 26: + READ_FPR(26, __value); + break; + case 27: + READ_FPR(27, __value); + break; + case 28: + READ_FPR(28, __value); + break; + case 29: + READ_FPR(29, __value); + break; + case 30: + READ_FPR(30, __value); + break; + case 31: + READ_FPR(31, __value); + break; + default: + panic("unexpected fd '%d'", fd); + } +#undef READ_FPR + return __value; +} + +static void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc) +{ + bool user = user_mode(regs); + unsigned int res; + unsigned long value; + unsigned long origpc; + unsigned long origra; + union loongarch_instruction insn; + + origpc = (unsigned long)pc; + origra = regs->regs[1]; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + /* + * This load never faults. + */ + __get_inst(&insn.word, pc, user); + if (user && !access_ok(addr, 8)) + goto sigbus; + + if (insn.reg2i12_format.opcode == ldd_op || + insn.reg2i14_format.opcode == ldptrd_op || + insn.reg3_format.opcode == ldxd_op) { + LoadDW(addr, value, res); + if (res) + goto fault; + regs->regs[insn.reg2i12_format.rd] = value; + } else if (insn.reg2i12_format.opcode == ldw_op || + insn.reg2i14_format.opcode == ldptrw_op || + insn.reg3_format.opcode == ldxw_op) { + LoadW(addr, value, res); + if (res) + goto fault; + regs->regs[insn.reg2i12_format.rd] = value; + } else if (insn.reg2i12_format.opcode == ldwu_op || + insn.reg3_format.opcode == ldxwu_op) { + LoadWU(addr, value, res); + if (res) + goto fault; + regs->regs[insn.reg2i12_format.rd] = value; + } else if (insn.reg2i12_format.opcode == ldh_op || + insn.reg3_format.opcode == ldxh_op) { + LoadHW(addr, value, res); + if (res) + goto fault; + regs->regs[insn.reg2i12_format.rd] = value; + } else if (insn.reg2i12_format.opcode == ldhu_op || + insn.reg3_format.opcode == ldxhu_op) { + LoadHWU(addr, value, res); + if (res) + goto fault; + regs->regs[insn.reg2i12_format.rd] = value; + } else if (insn.reg2i12_format.opcode == std_op || + insn.reg2i14_format.opcode == stptrd_op || + insn.reg3_format.opcode == stxd_op) { + value = regs->regs[insn.reg2i12_format.rd]; + StoreDW(addr, value, res); + if (res) + goto fault; + } else if (insn.reg2i12_format.opcode == stw_op || + insn.reg2i14_format.opcode == stptrw_op || + insn.reg3_format.opcode == stxw_op) { + value = regs->regs[insn.reg2i12_format.rd]; + StoreW(addr, value, res); + if (res) + goto fault; + } else if (insn.reg2i12_format.opcode == sth_op || + insn.reg3_format.opcode == stxh_op) { + value = regs->regs[insn.reg2i12_format.rd]; + StoreHW(addr, value, res); + if (res) + goto fault; + } else if (insn.reg2i12_format.opcode == fldd_op || + insn.reg3_format.opcode == fldxd_op) { + LoadDW(addr, value, res); + if (res) + goto fault; + write_fpr(insn.reg2i12_format.rd, value); + } else if (insn.reg2i12_format.opcode == flds_op || + insn.reg3_format.opcode == fldxs_op) { + LoadW(addr, value, res); + if (res) + goto fault; + write_fpr(insn.reg2i12_format.rd, value); + } else if (insn.reg2i12_format.opcode == fstd_op || + insn.reg3_format.opcode == fstxd_op) { + value = read_fpr(insn.reg2i12_format.rd); + StoreDW(addr, value, res); + if (res) + goto fault; + } else if (insn.reg2i12_format.opcode == fsts_op || + insn.reg3_format.opcode == fstxs_op) { + value = read_fpr(insn.reg2i12_format.rd); + StoreW(addr, value, res); + if (res) + goto fault; + } else + goto sigbus; + + compute_return_era(regs); + return; + +fault: + /* roll back jump/branch */ + regs->csr_era = origpc; + regs->regs[1] = origra; + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return; + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return; + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return; +} + +asmlinkage void noinstr do_ade(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + die_if_kernel("Kernel ade access", regs); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)regs->csr_badvaddr); + + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_ale(struct pt_regs *regs) +{ + unsigned int *pc; + irqentry_state_t state = irqentry_enter(regs); + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, + 1, regs, regs->csr_badvaddr); + /* + * Did we catch a fault trying to load an instruction? + */ + if (regs->csr_badvaddr == regs->csr_era) + goto sigbus; + + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) + goto sigbus; + + pc = (unsigned int *)exception_era(regs); + + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc); + + goto out; + +sigbus: + die_if_kernel("Kernel unaligned instruction access", regs); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); + +out: + irqentry_exit(regs, state); +} diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c new file mode 100644 index 000000000000..e47c3175bef5 --- /dev/null +++ b/arch/loongarch/kernel/vdso.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern char vdso_start[], vdso_end[]; + +/* Kernel-provided data used by the VDSO. */ +static union loongarch_vdso_data { + u8 page[PAGE_SIZE]; + struct vdso_data data[CS_BASES]; +} loongarch_vdso_data __page_aligned_data; +struct vdso_data *vdso_data = loongarch_vdso_data.data; +static struct page *vdso_pages[] = { NULL }; + +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) +{ + current->mm->context.vdso = (void *)(new_vma->vm_start); + + return 0; +} + +struct loongarch_vdso_info vdso_info = { + .vdso = vdso_start, + .size = PAGE_SIZE, + .code_mapping = { + .name = "[vdso]", + .pages = vdso_pages, + .mremap = vdso_mremap, + }, + .data_mapping = { + .name = "[vvar]", + }, + .offset_sigreturn = vdso_offset_sigreturn, +}; + +static int __init init_vdso(void) +{ + unsigned long i, pfn; + + BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); + BUG_ON(!PAGE_ALIGNED(vdso_info.size)); + + pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); + for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) + vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); + + return 0; +} +subsys_initcall(init_vdso); + +static unsigned long vdso_base(void) +{ + unsigned long base = STACK_TOP; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int ret; + unsigned long vvar_size, size, data_addr, vdso_addr; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct loongarch_vdso_info *info = current->thread.vdso; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + /* + * Determine total area size. This includes the VDSO data itself + * and the data page. + */ + vvar_size = PAGE_SIZE; + size = vvar_size + info->size; + + data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); + if (IS_ERR_VALUE(data_addr)) { + ret = data_addr; + goto out; + } + vdso_addr = data_addr + PAGE_SIZE; + + vma = _install_special_mapping(mm, data_addr, vvar_size, + VM_READ | VM_MAYREAD, + &info->data_mapping); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } + + /* Map VDSO data page. */ + ret = remap_pfn_range(vma, data_addr, + virt_to_phys(vdso_data) >> PAGE_SHIFT, + PAGE_SIZE, PAGE_READONLY); + if (ret) + goto out; + + /* Map VDSO code page. */ + vma = _install_special_mapping(mm, vdso_addr, info->size, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &info->code_mapping); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } + + mm->context.vdso = (void *)vdso_addr; + ret = 0; + +out: + mmap_write_unlock(mm); + return ret; +} diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..3ae212100de5 --- /dev/null +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +#define PAGE_SIZE _PAGE_SIZE + +/* + * Put .bss..swapper_pg_dir as the first thing in .bss. This will + * ensure that it has .bss alignment (64K). + */ +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) + +#include + +OUTPUT_ARCH(loongarch) +ENTRY(kernel_entry) +PHDRS { + text PT_LOAD FLAGS(7); /* RWX */ + note PT_NOTE FLAGS(4); /* R__ */ +} + +jiffies = jiffies_64; + +SECTIONS +{ + . = VMLINUX_LOAD_ADDRESS; + /* Read-only */ + _text = .; /* Text and read-only data */ + .text : { + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + } :text = 0 + _etext = .; /* End of text section */ + + EXCEPTION_TABLE(16) + + _sdata = .; /* Start of data section */ + RO_DATA(4096) + RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) + + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { + *(.sdata) + } + _edata = .; /* End of data section */ + + /* Will be freed after init */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ + __init_begin = .; + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + + . = ALIGN(4); + + /* .exit.text is discarded at runtime, not link time, to deal with + * references from .rodata + */ + .exit.text : { + EXIT_TEXT + } + .exit.data : { + EXIT_DATA + } + + /* + * Align to 64K in attempt to eliminate holes before the + * .bss..swapper_pg_dir section at the start of .bss. This + * also satisfies PAGE_SIZE alignment as the largest page size + * allowed is 64K. + */ + . = ALIGN(0x10000); + __init_end = .; + /* freed after init ends here */ + + /* + * Force .bss to 64K alignment so that .bss..swapper_pg_dir + * gets that alignment. .sbss should be empty, so there will be + * no holes after __init_end. */ + BSS_SECTION(0, 0x10000, 8) + + _end = . ; + + STABS_DEBUG + DWARF_DEBUG + + /* These must appear regardless of . */ + .gptab.sdata : { + *(.gptab.data) + *(.gptab.sdata) + } + .gptab.sbss : { + *(.gptab.bss) + *(.gptab.sbss) + } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { + *(.gnu.attributes) + *(.options) + *(.eh_frame) + } +} diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile new file mode 100644 index 000000000000..2f337487878e --- /dev/null +++ b/arch/loongarch/lib/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for LoongArch-specific library files.. +# + +lib-y += delay.o memset.o memcpy.o memmove.o clear_user.o \ + copy_user.o strncpy_user.o strnlen_user.o dump_tlb.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S new file mode 100644 index 000000000000..97b9998360cc --- /dev/null +++ b/arch/loongarch/lib/clear_user.S @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +.macro fixup_ex from, to, offset, fix +.if \fix + .section .fixup, "ax" +\to: addi.d v0, a1, \offset + jr ra + .previous +.endif + .section __ex_table, "a" + PTR \from\()b, \to\()b + .previous +.endm + +/* + * unsigned long __clear_user(void *addr, size_t size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user) + beqz a1, 2f + +1: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 1b + +2: move v0, a1 + jr ra + + fixup_ex 1, 3, 0, 1 +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S new file mode 100644 index 000000000000..b814efd9cbd2 --- /dev/null +++ b/arch/loongarch/lib/copy_user.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +.macro fixup_ex from, to, offset, fix +.if \fix + .section .fixup, "ax" +\to: addi.d v0, a2, \offset + jr ra + .previous +.endif + .section __ex_table, "a" + PTR \from\()b, \to\()b + .previous +.endm + +/* + * unsigned long __copy_user(void *to, const void *from, size_t n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user) + beqz a2, 3f + +1: ld.b t0, a1, 0 +2: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +3: move v0, a2 + jr ra + + fixup_ex 1, 4, 0, 1 + fixup_ex 2, 4, 0, 0 +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c new file mode 100644 index 000000000000..4a3c209bb335 --- /dev/null +++ b/arch/loongarch/lib/delay.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +#include +#include + +void __delay(unsigned long cycles) +{ + u64 t0 = get_cycles(); + + while ((unsigned long)(get_cycles() - t0) < cycles) + cpu_relax(); +} +EXPORT_SYMBOL(__delay); + +/* + * Division by multiplication: you don't have to worry about + * loss of precision. + * + * Use only for very small delays ( < 1 msec). Should probably use a + * lookup table, really, as the multiplications take much too long with + * short delays. This is a "reasonable" implementation, though (and the + * first constant multiplications gets optimized away if the delay is + * a constant) + */ + +void __udelay(unsigned long us) +{ + __delay((us * 0x000010c7ull * HZ * lpj_fine) >> 32); +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long ns) +{ + __delay((ns * 0x00000005ull * HZ * lpj_fine) >> 32); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c new file mode 100644 index 000000000000..f2b93320664d --- /dev/null +++ b/arch/loongarch/lib/dump_tlb.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include +#include +#include + +void dump_tlb_regs(void) +{ + const int field = 2 * sizeof(unsigned long); + + pr_info("Index : %0x\n", read_csr_tlbidx()); + pr_info("PageSize : %0x\n", read_csr_pagesize()); + pr_info("EntryHi : %0*lx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : %0*lx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : %0*lx\n", field, read_csr_entrylo1()); +} + +static void dump_tlb(int first, int last) +{ + unsigned long s_entryhi, entryhi, asid; + unsigned long long entrylo0, entrylo1, pa; + unsigned int index; + unsigned int s_index, s_asid; + unsigned int pagesize, c0, c1, i; + unsigned long asidmask = cpu_asid_mask(¤t_cpu_data); + int pwidth = 11; + int vwidth = 11; + int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4); + + s_entryhi = read_csr_entryhi(); + s_index = read_csr_tlbidx(); + s_asid = read_csr_asid(); + + for (i = first; i <= last; i++) { + write_csr_index(i); + tlb_read(); + pagesize = read_csr_pagesize(); + entryhi = read_csr_entryhi(); + entrylo0 = read_csr_entrylo0(); + entrylo1 = read_csr_entrylo1(); + index = read_csr_tlbidx(); + asid = read_csr_asid(); + + /* EHINV bit marks entire entry as invalid */ + if (index & CSR_TLBIDX_EHINV) + continue; + /* + * ASID takes effect in absence of G (global) bit. + */ + if (!((entrylo0 | entrylo1) & ENTRYLO_G) && + asid != s_asid) + continue; + + /* + * Only print entries in use + */ + printk("Index: %2d pgsize=%x ", i, (1 << pagesize)); + + c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; + c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; + + pr_cont("va=%0*lx asid=%0*lx", + vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask); + + /* NR/NX are in awkward places, so mask them off separately */ + pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX); + pa = pa & PAGE_MASK; + pr_cont("\n\t["); + pr_cont("ri=%d xi=%d ", + (entrylo0 & ENTRYLO_NR) ? 1 : 0, + (entrylo0 & ENTRYLO_NX) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", + pwidth, pa, c0, + (entrylo0 & ENTRYLO_D) ? 1 : 0, + (entrylo0 & ENTRYLO_V) ? 1 : 0, + (entrylo0 & ENTRYLO_G) ? 1 : 0, + (entrylo0 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT); + /* NR/NX are in awkward places, so mask them off separately */ + pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX); + pa = pa & PAGE_MASK; + pr_cont("ri=%d xi=%d ", + (entrylo1 & ENTRYLO_NR) ? 1 : 0, + (entrylo1 & ENTRYLO_NX) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", + pwidth, pa, c1, + (entrylo1 & ENTRYLO_D) ? 1 : 0, + (entrylo1 & ENTRYLO_V) ? 1 : 0, + (entrylo1 & ENTRYLO_G) ? 1 : 0, + (entrylo1 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT); + } + printk("\n"); + + write_csr_entryhi(s_entryhi); + write_csr_tlbidx(s_index); + write_csr_asid(s_asid); +} + +void dump_tlb_all(void) +{ + dump_tlb(0, current_cpu_data.tlbsize - 1); +} diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S new file mode 100644 index 000000000000..e94899d99624 --- /dev/null +++ b/arch/loongarch/lib/memcpy.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +/* + * void *memcpy(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(memcpy) + move a3, a0 + beqz a2, 2f + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move v0, a3 + jr ra +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S new file mode 100644 index 000000000000..f55d8dcd09dd --- /dev/null +++ b/arch/loongarch/lib/memmove.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +/* + * void *rmemcpy(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(rmemcpy) + move a3, a0 + beqz a2, 2f + + add.d a0, a0, a2 + add.d a1, a1, a2 + +1: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move v0, a3 + jr ra +SYM_FUNC_END(rmemcpy) + +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 2f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + +1: b memcpy + +2: b rmemcpy +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S new file mode 100644 index 000000000000..009d8ec7fc60 --- /dev/null +++ b/arch/loongarch/lib/memset.S @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Co., Ltd. + */ + +#include +#include +#include + +/* + * void *memset(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(memset) + move a3, a0 + beqz a2, 2f + +1: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move v0, a3 + jr ra +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) diff --git a/arch/loongarch/lib/strncpy_user.S b/arch/loongarch/lib/strncpy_user.S new file mode 100644 index 000000000000..7f40ceed10e0 --- /dev/null +++ b/arch/loongarch/lib/strncpy_user.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + +/* + * long __strncpy_from_user(char *to, const char *from, long len) + * + * a0: to + * a1: from + * a2: len + */ +SYM_FUNC_START(__strncpy_from_user) + move a3, zero + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + beqz t0, 2f + + addi.d a3, a3, 1 + blt a3, a2, 1b + + /* + * return len if the entire buffer filled, + * return strlen else + */ +2: move v0, a3 + jr ra + + .section .fixup, "ax" + /* return -EFAULT if exception before terminator */ +3: li.w a0, -EFAULT + jr ra + .previous + + .section __ex_table, "a" + PTR 1b, 3b + .previous +SYM_FUNC_END(__strncpy_from_user) + +EXPORT_SYMBOL(__strncpy_from_user) diff --git a/arch/loongarch/lib/strnlen_user.S b/arch/loongarch/lib/strnlen_user.S new file mode 100644 index 000000000000..eb0472e7dfaa --- /dev/null +++ b/arch/loongarch/lib/strnlen_user.S @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +/* + * long __strnlen_user(const char *s, long n) + * + * a0: s + * a1: n + */ +SYM_FUNC_START(__strnlen_user) + move a2, zero + +1: ld.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, 1 + beqz t0, 2f + + bge a1, a2, 1b + + /* + * return the size of a string including the ending NUL character + * up to a maximum of n + */ +2: move v0, a2 + jr ra + + .section .fixup, "ax" + /* return 0 in case of error */ +3: move v0, zero + jr ra + .previous + + .section __ex_table, "a" + PTR 1b, 3b + .previous +SYM_FUNC_END(__strnlen_user) + +EXPORT_SYMBOL(__strnlen_user) diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile new file mode 100644 index 000000000000..d89d3c8fe382 --- /dev/null +++ b/arch/loongarch/loongson64/Makefile @@ -0,0 +1,5 @@ +# +# All Loongson based systems +# + +obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o diff --git a/arch/loongarch/loongson64/Platform b/arch/loongarch/loongson64/Platform new file mode 100644 index 000000000000..15a033be4699 --- /dev/null +++ b/arch/loongarch/loongson64/Platform @@ -0,0 +1,13 @@ +# +# Loongson Processors' Support +# + +cflags-$(CONFIG_CPU_LOONGSON64) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + +# +# Loongson Machines Support +# + +platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ +cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/loongarch/include/asm/mach-loongson64 +load-$(CONFIG_MACH_LOONGSON64) += 0x9000000000200000 diff --git a/arch/loongarch/loongson64/env.c b/arch/loongarch/loongson64/env.c new file mode 100644 index 000000000000..e8ebcf12802c --- /dev/null +++ b/arch/loongarch/loongson64/env.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +struct boot_params *efi_bp; +struct loongsonlist_mem_map *loongson_mem_map; +struct loongsonlist_vbios *pvbios; +struct loongson_system_configuration loongson_sysconf; +EXPORT_SYMBOL(loongson_sysconf); + +u64 loongson_chipcfg[MAX_PACKAGES]; +u64 loongson_chiptemp[MAX_PACKAGES]; +u64 loongson_freqctrl[MAX_PACKAGES]; +unsigned long long smp_group[MAX_PACKAGES]; + +static void __init register_addrs_set(u64 *registers, const u64 addr, int num) +{ + u64 i; + + for (i = 0; i < num; i++) { + *registers = (i << 44) | addr; + registers++; + } +} + +static u8 ext_listhdr_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) { + sum = (u8)(sum + *(buffer++)); + } + + return (sum); +} + +static int parse_mem(struct _extention_list_hdr *head) +{ + struct loongsonlist_mem_map_legacy *ptr; + static struct loongsonlist_mem_map mem_map; + int i; + + loongson_mem_map = (struct loongsonlist_mem_map *)head; + + if (ext_listhdr_checksum((u8 *)loongson_mem_map, head->length)) { + printk("mem checksum error\n"); + return -EPERM; + } + + if (loongson_sysconf.bpi_ver < BPI_VERSION_V3) { + ptr = (struct loongsonlist_mem_map_legacy *)head; + + pr_info("convert legacy mem map to new mem map.\n"); + memcpy(&mem_map, ptr, sizeof(mem_map.header)); + mem_map.map_count = ptr->map_count; + for (i = 0; i < ptr->map_count; i++) { + mem_map.map[i].mem_type = ptr->map[i].mem_type; + mem_map.map[i].mem_start = ptr->map[i].mem_start; + mem_map.map[i].mem_size = ptr->map[i].mem_size; + } + loongson_mem_map = &mem_map; + } + return 0; +} + +static int parse_vbios(struct _extention_list_hdr *head) +{ + pvbios = (struct loongsonlist_vbios *)head; + + if (ext_listhdr_checksum((u8 *)pvbios, head->length)) { + printk("vbios_addr checksum error\n"); + return -EPERM; + } + + loongson_sysconf.vgabios_addr = + (u64)early_memremap_ro(pvbios->vbios_addr, 64); + + return 0; +} + +static int parse_screeninfo(struct _extention_list_hdr *head) +{ + struct loongsonlist_screeninfo *pscreeninfo; + + pscreeninfo = (struct loongsonlist_screeninfo *)head; + if (ext_listhdr_checksum((u8 *)pscreeninfo, head->length)) { + printk("screeninfo_addr checksum error\n"); + return -EPERM; + } + + memcpy(&screen_info, &pscreeninfo->si, sizeof(screen_info)); + + return 0; +} + +static int parse_extlist(struct boot_params *bp) +{ + unsigned long next_offset; + struct _extention_list_hdr *fhead; + + if (loongson_sysconf.bpi_ver >= BPI_VERSION_V3) + fhead = (struct _extention_list_hdr *)((char *)bp + + bp->extlist_offset); + else + fhead = (struct _extention_list_hdr *)bp->extlist_offset; + + if (fhead == NULL) { + printk("the ext struct is empty!\n"); + return -1; + } + + do { + if (memcmp(&(fhead->signature), LOONGSON_MEM_SIGNATURE, 3) == 0) { + if (parse_mem(fhead) != 0) { + printk("parse mem failed\n"); + return -EPERM; + } + } else if (memcmp(&(fhead->signature), LOONGSON_VBIOS_SIGNATURE, 5) == 0) { + if (parse_vbios(fhead) != 0) { + printk("parse vbios failed\n"); + return -EPERM; + } + } else if (memcmp(&(fhead->signature), LOONGSON_SCREENINFO_SIGNATURE, 5) == 0) { + if (parse_screeninfo(fhead) != 0) { + printk("parse screeninfo failed\n"); + return -EPERM; + } + } + + if (loongson_sysconf.bpi_ver >= BPI_VERSION_V3) { + next_offset = fhead->next_offset; + fhead = (struct _extention_list_hdr *)((void *)bp + next_offset); + } else { + fhead = (struct _extention_list_hdr *)fhead->next_offset; + next_offset = (unsigned long)fhead; + } + + } while (next_offset); + + return 0; +} + +static void __init parse_flags(u64 flags) +{ + if (flags & BPI_FLAGS_UEFI_SUPPORTED) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); +} + +static int get_bpi_version(void *signature) +{ + char data[8]; + int r, version = 0; + + memset(data, 0, 8); + memcpy(data, signature + 4, 4); + r = kstrtoint(data, 10, &version); + + if (r < 0 || version < BPI_VERSION_V1) + panic("Fatal error, invalid BPI version: %d\n", version); + + if (version >= BPI_VERSION_V2) + parse_flags(efi_bp->flags); + + return version; +} + +void __init fw_init_environ(void) +{ + efi_bp = (struct boot_params *)_fw_envp; + loongson_sysconf.bpi_ver = get_bpi_version(&efi_bp->signature); + + register_addrs_set(smp_group, TO_UNCAC(0x1fe01000), 16); + register_addrs_set(loongson_chipcfg, TO_UNCAC(0x1fe00180), 16); + register_addrs_set(loongson_chiptemp, TO_UNCAC(0x1fe0019c), 16); + register_addrs_set(loongson_freqctrl, TO_UNCAC(0x1fe001d0), 16); + + if (parse_extlist(efi_bp)) + printk("Scan bootparm failed\n"); +} + +static int __init init_cpu_fullname(void) +{ + int cpu; + + if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { + for (cpu = 0; cpu < NR_CPUS; cpu++) + __cpu_full_name[cpu] = loongson_sysconf.cpuname; + } + return 0; +} +arch_initcall(init_cpu_fullname); diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c new file mode 100644 index 000000000000..5f65558425e0 --- /dev/null +++ b/arch/loongarch/loongson64/init.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SMBIOS_BIOSSIZE_OFFSET 0x09 +#define SMBIOS_BIOSEXTERN_OFFSET 0x13 +#define SMBIOS_FREQLOW_OFFSET 0x16 +#define SMBIOS_FREQHIGH_OFFSET 0x17 +#define SMBIOS_FREQLOW_MASK 0xFF +#define SMBIOS_CORE_PACKAGE_OFFSET 0x23 +#define LOONGSON_EFI_ENABLE (1 << 3) + +struct loongson_board_info b_info; +static const char dmi_empty_string[] = " "; + +static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) +{ + const u8 *bp = ((u8 *) dm) + dm->length; + + if (s) { + s--; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; + s--; + } + + if (*bp != 0) { + size_t len = strlen(bp)+1; + size_t cmp_len = len > 8 ? 8 : len; + + if (!memcmp(bp, dmi_empty_string, cmp_len)) + return dmi_empty_string; + + return bp; + } + } + + return ""; + +} + +static void __init parse_cpu_table(const struct dmi_header *dm) +{ + long freq_temp = 0; + char *dmi_data = (char *)dm; + + freq_temp = ((*(dmi_data + SMBIOS_FREQHIGH_OFFSET) << 8) + \ + ((*(dmi_data + SMBIOS_FREQLOW_OFFSET)) & SMBIOS_FREQLOW_MASK)); + cpu_clock_freq = freq_temp * 1000000; + + loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); + loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET); + + pr_info("CpuClock = %llu\n", cpu_clock_freq); + +} + +static void __init parse_bios_table(const struct dmi_header *dm) +{ + int bios_extern; + char *dmi_data = (char *)dm; + + bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); + b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET); + + if (bios_extern & LOONGSON_EFI_ENABLE) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); +} + +static void __init find_tokens(const struct dmi_header *dm, void *dummy) +{ + switch (dm->type) { + case 0x0: /* Extern BIOS */ + parse_bios_table(dm); + break; + case 0x4: /* Calling interface */ + parse_cpu_table(dm); + break; + } +} +static void __init smbios_parse(void) +{ + b_info.bios_vendor = (void *)dmi_get_system_info(DMI_BIOS_VENDOR); + b_info.bios_version = (void *)dmi_get_system_info(DMI_BIOS_VERSION); + b_info.bios_release_date = (void *)dmi_get_system_info(DMI_BIOS_DATE); + b_info.board_vendor = (void *)dmi_get_system_info(DMI_BOARD_VENDOR); + b_info.board_name = (void *)dmi_get_system_info(DMI_BOARD_NAME); + dmi_walk(find_tokens, NULL); +} + +void __init early_init(void) +{ + fw_init_cmdline(); + fw_init_environ(); + early_memblock_init(); +} + +void __init platform_init(void) +{ + efi_init(); +#ifdef CONFIG_ACPI_TABLE_UPGRADE + acpi_table_upgrade(); +#endif +#ifdef CONFIG_ACPI + acpi_gbl_use_default_register_widths = false; + acpi_boot_table_init(); + acpi_boot_init(); +#endif + + fw_init_memory(); + dmi_setup(); + smbios_parse(); + pr_info("The BIOS Version: %s\n", b_info.bios_version); + + efi_runtime_init(); +} diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c new file mode 100644 index 000000000000..7ba15255e5d6 --- /dev/null +++ b/arch/loongarch/loongson64/irq.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct acpi_madt_lio_pic *acpi_liointc; +struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS]; + +struct acpi_madt_ht_pic *acpi_htintc; +struct acpi_madt_lpc_pic *acpi_pchlpc; +struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS]; +struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS]; + +struct irq_domain *cpu_domain; +struct irq_domain *liointc_domain; +struct irq_domain *pch_lpc_domain; +struct irq_domain *pch_msi_domain[MAX_IO_PICS]; +struct irq_domain *pch_pic_domain[MAX_IO_PICS]; + +int find_pch_pic(u32 gsi) +{ + int i, start, end; + + /* Find the PCH_PIC that manages this GSI. */ + for (i = 0; i < loongson_sysconf.nr_io_pics; i++) { + struct acpi_madt_bio_pic *irq_cfg = acpi_pchpic[i]; + + start = irq_cfg->gsi_base; + end = irq_cfg->gsi_base + irq_cfg->size; + if (gsi >= start && gsi < end) + return i; + } + + pr_err("ERROR: Unable to locate PCH_PIC for GSI %d\n", gsi); + return -1; +} + +void __init setup_IRQ(void) +{ + int i; + struct irq_domain *parent_domain; + + if (!acpi_eiointc[0]) + cpu_data[0].options &= ~LOONGARCH_CPU_EXTIOI; + + cpu_domain = loongarch_cpu_irq_init(); + liointc_domain = liointc_acpi_init(cpu_domain, acpi_liointc); + + if (cpu_has_extioi) { + pr_info("Using EIOINTC interrupt mode\n"); + for (i = 0; i < loongson_sysconf.nr_io_pics; i++) { + parent_domain = eiointc_acpi_init(cpu_domain, acpi_eiointc[i]); + pch_pic_domain[i] = pch_pic_acpi_init(parent_domain, acpi_pchpic[i]); + pch_msi_domain[i] = pch_msi_acpi_init(parent_domain, acpi_pchmsi[i]); + } + } else { + pr_info("Using HTVECINTC interrupt mode\n"); + parent_domain = htvec_acpi_init(liointc_domain, acpi_htintc); + pch_pic_domain[0] = pch_pic_acpi_init(parent_domain, acpi_pchpic[0]); + pch_msi_domain[0] = pch_msi_acpi_init(parent_domain, acpi_pchmsi[0]); + } + + irq_set_default_host(pch_pic_domain[0]); + pch_lpc_domain = pch_lpc_acpi_init(pch_pic_domain[0], acpi_pchlpc); +} + +void __init arch_init_irq(void) +{ + clear_csr_ecfg(ECFG0_IM); + clear_csr_estat(ESTATF_IP); + + setup_IRQ(); + + set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); +} diff --git a/arch/loongarch/loongson64/mem.c b/arch/loongarch/loongson64/mem.c new file mode 100644 index 000000000000..1b500546f7eb --- /dev/null +++ b/arch/loongarch/loongson64/mem.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include + +#include +#include + +#include +#include + +void __init early_memblock_init(void) +{ + int i; + u32 mem_type; + u64 mem_start, mem_end, mem_size; + + /* Parse memory information */ + for (i = 0; i < loongson_mem_map->map_count; i++) { + mem_type = loongson_mem_map->map[i].mem_type; + mem_start = loongson_mem_map->map[i].mem_start; + mem_size = loongson_mem_map->map[i].mem_size; + mem_end = mem_start + mem_size; + + switch (mem_type) { + case ADDRESS_TYPE_SYSRAM: + memblock_add(mem_start, mem_size); + if (max_low_pfn < (mem_end >> PAGE_SHIFT)) + max_low_pfn = mem_end >> PAGE_SHIFT; + break; + } + } + memblock_set_current_limit(PFN_PHYS(max_low_pfn)); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); + + /* Reserve the first 2MB */ + memblock_reserve(PHYS_OFFSET, 0x200000); + + /* Reserve the kernel text/data/bss */ + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); +} + +void __init fw_init_memory(void) +{ + int i; + u32 mem_type; + u64 mem_start, mem_end, mem_size; + unsigned long start_pfn, end_pfn; + static unsigned long num_physpages; + + /* Parse memory information */ + for (i = 0; i < loongson_mem_map->map_count; i++) { + mem_type = loongson_mem_map->map[i].mem_type; + mem_start = loongson_mem_map->map[i].mem_start; + mem_size = loongson_mem_map->map[i].mem_size; + mem_end = mem_start + mem_size; + + switch (mem_type) { + case ADDRESS_TYPE_SYSRAM: + mem_start = PFN_ALIGN(mem_start); + mem_end = PFN_ALIGN(mem_end - PAGE_SIZE + 1); + num_physpages += (mem_size >> PAGE_SHIFT); + memblock_set_node(mem_start, mem_size, &memblock.memory, 0); + break; + case ADDRESS_TYPE_ACPI: + mem_start = PFN_ALIGN(mem_start); + mem_end = PFN_ALIGN(mem_end - PAGE_SIZE + 1); + num_physpages += (mem_size >> PAGE_SHIFT); + memblock_add(mem_start, mem_size); + memblock_set_node(mem_start, mem_size, &memblock.memory, 0); + fallthrough; + case ADDRESS_TYPE_RESERVED: + memblock_reserve(mem_start, mem_size); + break; + } + } + + get_pfn_range_for_nid(0, &start_pfn, &end_pfn); + pr_info("start_pfn=0x%lx, end_pfn=0x%lx, num_physpages:0x%lx\n", + start_pfn, end_pfn, num_physpages); +} diff --git a/arch/loongarch/loongson64/reset.c b/arch/loongarch/loongson64/reset.c new file mode 100644 index 000000000000..08e67368c94e --- /dev/null +++ b/arch/loongarch/loongson64/reset.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen, chenhuacai@loongson.cn + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void loongson_restart(void) +{ +#ifdef CONFIG_EFI + if (efi_capsule_pending(NULL)) + efi_reboot(REBOOT_WARM, NULL); + else + efi_reboot(REBOOT_COLD, NULL); +#endif + if (!acpi_disabled) + acpi_reboot(); + + while (1) { + __arch_cpu_idle(); + } +} + +static void loongson_poweroff(void) +{ +#ifdef CONFIG_EFI + efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); +#endif + while (1) { + __arch_cpu_idle(); + } +} + +static int __init loongarch_reboot_setup(void) +{ + pm_restart = loongson_restart; + pm_power_off = loongson_poweroff; + + return 0; +} + +arch_initcall(loongarch_reboot_setup); diff --git a/arch/loongarch/loongson64/rtc.c b/arch/loongarch/loongson64/rtc.c new file mode 100644 index 000000000000..2472f3a70f52 --- /dev/null +++ b/arch/loongarch/loongson64/rtc.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include + +#define RTC_TOYREAD0 0x2C +#define RTC_YEAR 0x30 + +unsigned long loongson_get_rtc_time(void) +{ + unsigned int year, mon, day, hour, min, sec; + unsigned int value; + + value = ls7a_readl(LS7A_RTC_REG_BASE + RTC_TOYREAD0); + sec = (value >> 4) & 0x3f; + min = (value >> 10) & 0x3f; + hour = (value >> 16) & 0x1f; + day = (value >> 21) & 0x1f; + mon = (value >> 26) & 0x3f; + year = ls7a_readl(LS7A_RTC_REG_BASE + RTC_YEAR); + + year = 1900 + year; + + return mktime64(year, mon, day, hour, min, sec); +} + +void read_persistent_clock64(struct timespec64 *ts) +{ + ts->tv_sec = loongson_get_rtc_time(); + ts->tv_nsec = 0; +} diff --git a/arch/loongarch/loongson64/setup.c b/arch/loongarch/loongson64/setup.c new file mode 100644 index 000000000000..701708769bc0 --- /dev/null +++ b/arch/loongarch/loongson64/setup.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include + +#ifdef CONFIG_VT +#include +#include +#include +#endif + +#include + +const char *get_system_type(void) +{ + return "generic-loongson-machine"; +} + +void __init plat_mem_setup(void) +{ +} + +static int __init register_gop_device(void) +{ + void *pd; + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + return 0; + pd = platform_device_register_data(NULL, "efi-framebuffer", 0, + &screen_info, sizeof(screen_info)); + return PTR_ERR_OR_ZERO(pd); +} +subsys_initcall(register_gop_device); diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile new file mode 100644 index 000000000000..a3a137709650 --- /dev/null +++ b/arch/loongarch/mm/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/LoongArch-specific parts of the memory manager. +# + +obj-y += init.o cache.o tlb.o tlbex.o extable.o \ + fault.o ioremap.o maccess.o mmap.o pgtable.o page.o + +obj-$(CONFIG_64BIT) += pgtable-64.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c new file mode 100644 index 000000000000..286f7cd9ad59 --- /dev/null +++ b/arch/loongarch/mm/cache.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * LoongArch maintains ICache/DCache coherency by hardware, + * we just need "ibar" to avoid instruction hazard here. + */ +void local_flush_icache_range(unsigned long start, unsigned long end) +{ + asm volatile ("\tibar 0\n"::); +} + +void __update_cache(unsigned long address, pte_t pte) +{ + unsigned long pfn; + struct page *page; + + pfn = pte_pfn(pte); + if (unlikely(!pfn_valid(pfn))) + return; + page = pfn_to_page(pfn); + if (Page_dcache_dirty(page)) + ClearPageDcacheDirty(page); +} + +void cache_error_setup(void) +{ + extern char __weak except_vec_cex; + set_merr_handler(0x0, &except_vec_cex, 0x80); +} + +static unsigned long icache_size __read_mostly; +static unsigned long dcache_size __read_mostly; +static unsigned long vcache_size __read_mostly; +static unsigned long scache_size __read_mostly; + +static char *way_string[] = { NULL, "direct mapped", "2-way", + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", +}; + +static void probe_pcache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG17); + lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); + sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); + ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; + + c->icache.linesz = lsize; + c->icache.sets = sets; + c->icache.ways = ways; + icache_size = sets * ways * lsize; + c->icache.waysize = icache_size / c->icache.ways; + + config = read_cpucfg(LOONGARCH_CPUCFG18); + lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); + sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); + ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; + + c->dcache.linesz = lsize; + c->dcache.sets = sets; + c->dcache.ways = ways; + dcache_size = sets * ways * lsize; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->options |= LOONGARCH_CPU_PREFETCH; + + pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", + icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); + + pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", + dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); +} + +static void probe_vcache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG19); + lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); + sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); + ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; + + c->vcache.linesz = lsize; + c->vcache.sets = sets; + c->vcache.ways = ways; + vcache_size = lsize * sets * ways; + c->vcache.waysize = vcache_size / c->vcache.ways; + + pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", + vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); +} + +static void probe_scache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG20); + lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); + sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); + ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; + + c->scache.linesz = lsize; + c->scache.sets = sets; + c->scache.ways = ways; + /* 4 cores. scaches are shared */ + scache_size = lsize * sets * ways; + c->scache.waysize = scache_size / c->scache.ways; + + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); +} + +void cpu_cache_init(void) +{ + probe_pcache(); + probe_vcache(); + probe_scache(); + + shm_align_mask = PAGE_SIZE - 1; +} diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c new file mode 100644 index 000000000000..f31aabb4b9c6 --- /dev/null +++ b/arch/loongarch/mm/extable.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(exception_era(regs)); + if (fixup) { + regs->csr_era = fixup->fixup; + + return 1; + } + + return 0; +} diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c new file mode 100644 index 000000000000..605579b19a00 --- /dev/null +++ b/arch/loongarch/mm/fault.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1995 - 2000 by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +int show_unhandled_signals = 1; + +static void __kprobes no_context(struct pt_regs *regs, unsigned long address) +{ + const int field = sizeof(unsigned long) * 2; + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + + pr_alert("CPU %d Unable to handle kernel paging request at " + "virtual address %0*lx, era == %0*lx, ra == %0*lx\n", + raw_smp_processor_id(), field, address, field, regs->csr_era, + field, regs->regs[1]); + die("Oops", regs); +} + +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + pagefault_out_of_memory(); +} + +static void __kprobes do_sigbus(struct pt_regs *regs, + unsigned long write, unsigned long address, int si_code) +{ + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + current->thread.csr_badvaddr = address; + current->thread.trap_nr = read_csr_excode(); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); +} + +static void __kprobes do_sigsegv(struct pt_regs *regs, + unsigned long write, unsigned long address, int si_code) +{ + const int field = sizeof(unsigned long) * 2; + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + + /* User mode accesses just cause a SIGSEGV */ + current->thread.csr_badvaddr = address; + if (!write) + current->thread.error_code = 1; + else + current->thread.error_code = 2; + current->thread.trap_nr = read_csr_excode(); + + if (show_unhandled_signals && + unhandled_signal(current, SIGSEGV) && __ratelimit(&ratelimit_state)) { + pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", + current->comm, + write ? "write access to" : "read access from", + field, address); + pr_info("era = %0*lx in", field, + (unsigned long) regs->csr_era); + print_vma_addr(KERN_CONT " ", regs->csr_era); + pr_cont("\n"); + pr_info("ra = %0*lx in", field, + (unsigned long) regs->regs[1]); + print_vma_addr(KERN_CONT " ", regs->regs[1]); + pr_cont("\n"); + } + force_sig_fault(SIGSEGV, si_code, (void __user *)address); +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +static void __kprobes __do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + int si_code = SEGV_MAPERR; + unsigned int flags = FAULT_FLAG_DEFAULT; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + struct vm_area_struct *vma = NULL; + vm_fault_t fault; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (address & __UA_LIMIT) { + if (!user_mode(regs)) + no_context(regs, address); + else + do_sigsegv(regs, write, address, si_code); + return; + } + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) { + do_sigsegv(regs, write, address, si_code); + return; + } + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: + mmap_read_lock(mm); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (!expand_stack(vma, address)) + goto good_area; +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + mmap_read_unlock(mm); + do_sigsegv(regs, write, address, si_code); + return; + +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + if (write) { + flags |= FAULT_FLAG_WRITE; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) + goto bad_area; + if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(vma, address, flags, regs); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, address); + return; + } + + if (unlikely(fault & VM_FAULT_RETRY)) { + flags |= FAULT_FLAG_TRIED; + + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; + } + if (unlikely(fault & VM_FAULT_ERROR)) { + mmap_read_unlock(mm); + if (fault & VM_FAULT_OOM) { + do_out_of_memory(regs, address); + return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_sigsegv(regs, write, address, si_code); + return; + } else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + do_sigbus(regs, write, address, si_code); + return; + } + BUG(); + } + + mmap_read_unlock(mm); +} + +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + irqentry_state_t state = irqentry_enter(regs); + + /* Enable interrupt if enabled in parent context */ + if (likely(regs->csr_prmd & CSR_PRMD_PIE)) + local_irq_enable(); + + __do_page_fault(regs, write, address); + + local_irq_disable(); + + irqentry_exit(regs, state); +} diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c new file mode 100644 index 000000000000..1a06411656a2 --- /dev/null +++ b/arch/loongarch/mm/hugetlbpage.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, addr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_HUGE) != 0; +} + +int pud_huge(pud_t pud) +{ + return (pud_val(pud) & _PAGE_HUGE) != 0; +} + +uint64_t pmd_to_entrylo(unsigned long pmd_val) +{ + uint64_t val; + /* PMD as PTE. Must be huge page */ + if (!pmd_huge(__pmd(pmd_val))) + panic("%s", __func__); + + val = pmd_val ^ _PAGE_HUGE; + val |= ((val & _PAGE_HGLOBAL) >> + (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)); + + return val; +} diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c new file mode 100644 index 000000000000..a34e2a3cd08b --- /dev/null +++ b/arch/loongarch/mm/init.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * We have up to 8 empty zeroed pages so we can map one of the right colour + * when needed. Since page is never written to after the initialization we + * don't have to care about aliases on other CPUs. + */ +unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); + +void setup_zero_pages(void) +{ + unsigned int order, i; + struct page *page; + + order = 0; + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Oh boy, that early out of memory?"); + + page = virt_to_page((void *)empty_zero_page); + split_page(page, order); + for (i = 0; i < (1 << order); i++, page++) + mark_page_reserved(page); + + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *vfrom, *vto; + + vto = kmap_atomic(to); + vfrom = kmap_atomic(from); + copy_page(vto, vfrom); + kunmap_atomic(vfrom); + kunmap_atomic(vto); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); +} + +void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); +} + +void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); +} +EXPORT_SYMBOL_GPL(copy_from_user_page); + +void __init paging_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + + free_area_init(max_zone_pfns); +} + +void __init mem_init(void) +{ + max_mapnr = max_low_pfn; + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + memblock_free_all(); + setup_zero_pages(); /* Setup zeroed pages. */ + mem_init_print_info(NULL); +} + +void __ref free_initmem(void) +{ + free_initmem_default(POISON_FREE_INITMEM); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + ret = __add_pages(nid, start_pfn, nr_pages, params); + + if (ret) + printk("%s: Problem encountered in __add_pages() as ret=%d\n", + __func__, ret); + + return ret; +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +void arch_remove_memory(int nid, u64 start, + u64 size, struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + + /* With altmap the first mapped page is offset from @start */ + if (altmap) + page += vmem_altmap_offset(altmap); + __remove_pages(start_pfn, nr_pages, altmap); +} +#endif +#endif + +/* + * Align swapper_pg_dir in to 64K, allows its address to be loaded + * with a single LUI instruction in the TLB handlers. If we used + * __aligned(64K), its size would get rounded up to the alignment + * size, and waste space. So we place it in its own section and align + * it in the linker script. + */ +pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); + +pgd_t invalid_pg_dir[_PTRS_PER_PGD] __page_aligned_bss; +#ifndef __PAGETABLE_PUD_FOLDED +pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +#endif +#ifndef __PAGETABLE_PMD_FOLDED +pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; +EXPORT_SYMBOL_GPL(invalid_pmd_table); +#endif +pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c new file mode 100644 index 000000000000..a2cb118238ac --- /dev/null +++ b/arch/loongarch/mm/ioremap.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +{ + return ((void __iomem *)TO_CAC(phys_addr)); +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ + +} + +void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +{ + return early_memremap(phys_addr, size); +} + +void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, + unsigned long prot_val) +{ + return early_memremap(phys_addr, size); +} diff --git a/arch/loongarch/mm/maccess.c b/arch/loongarch/mm/maccess.c new file mode 100644 index 000000000000..58173842c6be --- /dev/null +++ b/arch/loongarch/mm/maccess.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + /* highest bit set means kernel space */ + return (unsigned long)unsafe_src >> (BITS_PER_LONG - 1); +} diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c new file mode 100644 index 000000000000..744f06902daa --- /dev/null +++ b/arch/loongarch/mm/mmap.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ +EXPORT_SYMBOL(shm_align_mask); + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + shm_align_mask) & ~shm_align_mask) + \ + (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_common(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags, enum mmap_allocation_direction dir) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long addr = addr0; + int do_color_align; + struct vm_unmapped_area_info info; + + if (unlikely(len > TASK_SIZE)) + return -ENOMEM; + + if (flags & MAP_FIXED) { + /* Even MAP_FIXED mappings must reside within TASK_SIZE */ + if (TASK_SIZE - len < addr) + return -EINVAL; + + /* + * We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + return -EINVAL; + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.length = len; + info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + + if (dir == DOWN) { + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + addr = vm_unmapped_area(&info); + + if (!(addr & ~PAGE_MASK)) + return addr; + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + } + + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + return vm_unmapped_area(&info); +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, DOWN); +} + +int __virt_addr_valid(volatile void *kaddr) +{ + unsigned long vaddr = (unsigned long)kaddr; + + if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) + return 0; + + return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); +} +EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S new file mode 100644 index 000000000000..bf77d2815449 --- /dev/null +++ b/arch/loongarch/mm/page.S @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(clear_page) + lu12i.w t0, 1 << (PAGE_SHIFT - 12) + add.d t0, t0, a0 +1: + st.d zero, a0, 0 + st.d zero, a0, 8 + st.d zero, a0, 16 + st.d zero, a0, 24 + st.d zero, a0, 32 + st.d zero, a0, 40 + st.d zero, a0, 48 + st.d zero, a0, 56 + addi.d a0, a0, 128 + st.d zero, a0, -64 + st.d zero, a0, -56 + st.d zero, a0, -48 + st.d zero, a0, -40 + st.d zero, a0, -32 + st.d zero, a0, -24 + st.d zero, a0, -16 + st.d zero, a0, -8 + bne t0, a0, 1b + + jirl $r0, ra, 0 +SYM_FUNC_END(clear_page) +EXPORT_SYMBOL(clear_page) + +.align 5 +SYM_FUNC_START(copy_page) + lu12i.w t8, 1 << (PAGE_SHIFT - 12) + add.d t8, t8, a0 +1: + ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + + st.d t0, a0, 0 + st.d t1, a0, 8 + ld.d t0, a1, 64 + ld.d t1, a1, 72 + st.d t2, a0, 16 + st.d t3, a0, 24 + ld.d t2, a1, 80 + ld.d t3, a1, 88 + st.d t4, a0, 32 + st.d t5, a0, 40 + ld.d t4, a1, 96 + ld.d t5, a1, 104 + st.d t6, a0, 48 + st.d t7, a0, 56 + ld.d t6, a1, 112 + ld.d t7, a1, 120 + addi.d a0, a0, 128 + addi.d a1, a1, 128 + + st.d t0, a0, -64 + st.d t1, a0, -56 + st.d t2, a0, -48 + st.d t3, a0, -40 + st.d t4, a0, -32 + st.d t5, a0, -24 + st.d t6, a0, -16 + st.d t7, a0, -8 + + bne t8, a0, 1b + jirl $r0, ra, 0 +SYM_FUNC_END(copy_page) +EXPORT_SYMBOL(copy_page) diff --git a/arch/loongarch/mm/pgtable-64.c b/arch/loongarch/mm/pgtable-64.c new file mode 100644 index 000000000000..7f3fbe3afc88 --- /dev/null +++ b/arch/loongarch/mm/pgtable-64.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +void pgd_init(unsigned long page) +{ + unsigned long *p, *end; + unsigned long entry; + +#if !defined(__PAGETABLE_PUD_FOLDED) + entry = (unsigned long)invalid_pud_table; +#elif !defined(__PAGETABLE_PMD_FOLDED) + entry = (unsigned long)invalid_pmd_table; +#else + entry = (unsigned long)invalid_pte_table; +#endif + + p = (unsigned long *) page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} +EXPORT_SYMBOL_GPL(pgd_init); + +#ifndef __PAGETABLE_PMD_FOLDED +void pmd_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *) addr; + end = p + PTRS_PER_PMD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +EXPORT_SYMBOL_GPL(pmd_init); +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +void pud_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *)addr; + end = p + PTRS_PER_PUD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +#endif + +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ + pmd_t pmd; + + pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + + return pmd; +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; + flush_tlb_all(); +} + +void __init pagetable_init(void) +{ + pgd_t *pgd_base; + + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); + pgd_init((unsigned long)invalid_pg_dir); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table); +#endif +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); +#endif + pgd_base = swapper_pg_dir; +} diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c new file mode 100644 index 000000000000..648d8d59cea6 --- /dev/null +++ b/arch/loongarch/mm/pgtable.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret, *init; + + ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER); + if (ret) { + init = pgd_offset(&init_mm, 0UL); + pgd_init((unsigned long)ret); + memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + + return ret; +} +EXPORT_SYMBOL_GPL(pgd_alloc); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c new file mode 100644 index 000000000000..9994987abe09 --- /dev/null +++ b/arch/loongarch/mm/tlb.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +void local_flush_tlb_all(void) +{ + invtlb_all(INVTLB_CURRENT_ALL, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_all); + +void local_flush_tlb_user(void) +{ + invtlb_all(INVTLB_CURRENT_GFALSE, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_user); + +void local_flush_tlb_kernel(void) +{ + invtlb_all(INVTLB_CURRENT_GTRUE, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_kernel); + +/* All entries common to a mm share an asid. To effectively flush + these entries, we just bump the asid. */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu; + + preempt_disable(); + + cpu = smp_processor_id(); + + if (asid_valid(mm, cpu)) { + drop_mmu_context(mm, cpu); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } + + preempt_enable(); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (asid_valid(mm, cpu)) { + unsigned long size, flags; + + local_irq_save(flags); + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); + if (size <= (current_cpu_data.tlbsizestlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + + int asid = cpu_asid(cpu, mm); + while (start < end) { + invtlb(INVTLB_ADDR_GFALSE_AND_ASID, asid, start); + start += (PAGE_SIZE << 1); + } + } else { + drop_mmu_context(mm, cpu); + } + local_irq_restore(flags); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long size, flags; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= (current_cpu_data.tlbsizestlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + + while (start < end) { + invtlb_addr(INVTLB_ADDR_GTRUE_OR_ASID, 0, start); + start += (PAGE_SIZE << 1); + } + } else { + local_flush_tlb_kernel(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + + if (asid_valid(vma->vm_mm, cpu)) { + int newpid; + + newpid = cpu_asid(cpu, vma->vm_mm); + page &= (PAGE_MASK << 1); + invtlb(INVTLB_ADDR_GFALSE_AND_ASID, newpid, page); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(vma->vm_mm)); + } +} + +/* + * This one is only used for pages with the global bit set so we don't care + * much about the ASID. + */ +void local_flush_tlb_one(unsigned long page) +{ + page &= (PAGE_MASK << 1); + invtlb_addr(INVTLB_ADDR_GTRUE_OR_ASID, 0, page); +} + +static void __update_hugetlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ +#ifdef CONFIG_HUGETLB_PAGE + int idx; + unsigned long lo; + unsigned long flags; + + local_irq_save(flags); + + address &= (PAGE_MASK << 1); + write_csr_entryhi(address); + tlb_probe(); + idx = read_csr_tlbidx(); + write_csr_pagesize(PS_HUGE_SIZE); + lo = pmd_to_entrylo(pte_val(*ptep)); + write_csr_entrylo0(lo); + write_csr_entrylo1(lo + (HPAGE_SIZE >> 1)); + + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + write_csr_pagesize(PS_DEFAULT_SIZE); + + local_irq_restore(flags); +#endif +} + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + int idx; + unsigned long flags; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + if (pte_val(*ptep) & _PAGE_HUGE) { + __update_hugetlb(vma, address, ptep); + return; + } + + local_irq_save(flags); + + if ((unsigned long)ptep & sizeof(pte_t)) + ptep--; + + address &= (PAGE_MASK << 1); + write_csr_entryhi(address); + tlb_probe(); + idx = read_csr_tlbidx(); + write_csr_pagesize(PS_DEFAULT_SIZE); + write_csr_entrylo0(pte_val(*ptep++)); + write_csr_entrylo1(pte_val(*ptep)); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + + local_irq_restore(flags); +} + +static void setup_pw(void) +{ + unsigned long pgd_i, pgd_w; +#ifndef __PAGETABLE_PMD_FOLDED + unsigned long pmd_i, pmd_w; +#endif + unsigned long pte_i, pte_w; + + pgd_i = PGDIR_SHIFT; /* 1st level PGD */ +#ifndef __PAGETABLE_PMD_FOLDED + pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_ORDER; + pmd_i = PMD_SHIFT; /* 2nd level PMD */ + pmd_w = PMD_SHIFT - PAGE_SHIFT; +#else + pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_ORDER; +#endif + pte_i = PAGE_SHIFT; /* 3rd level PTE */ + pte_w = PAGE_SHIFT - 3; + +#ifndef __PAGETABLE_PMD_FOLDED + csr_writeq(pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15, LOONGARCH_CSR_PWCTL0); + csr_writeq(pgd_i | pgd_w << 6, LOONGARCH_CSR_PWCTL1); +#else + csr_writeq(pte_i | pte_w << 5, LOONGARCH_CSR_PWCTL0); + csr_writeq(pgd_i | pgd_w << 6, LOONGARCH_CSR_PWCTL1); +#endif + csr_writeq((long)swapper_pg_dir, LOONGARCH_CSR_PGDH); + csr_writeq((long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + csr_writeq((long)smp_processor_id(), LOONGARCH_CSR_TMID); +} + +static void output_pgtable_bits_defines(void) +{ +#define pr_define(fmt, ...) \ + pr_debug("#define " fmt, ##__VA_ARGS__) + + pr_debug("#include \n"); + pr_debug("#include \n"); + pr_debug("\n"); + + pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); + pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); + pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); + pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); + pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); + pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); + pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); + pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); + pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); + pr_debug("\n"); +} + +void setup_tlb_handler(void) +{ + static int run_once; + + setup_pw(); + output_pgtable_bits_defines(); + + /* The tlb handlers are generated only once */ + if (!run_once) { + memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); + local_flush_icache_range(tlbrentry, tlbrentry + 0x80); + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); + set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); + set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + run_once++; + } +} + +void tlb_init(void) +{ + write_csr_pagesize(PS_DEFAULT_SIZE); + write_csr_stlbpgsize(PS_DEFAULT_SIZE); + write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); + + if (read_csr_pagesize() != PS_DEFAULT_SIZE) + panic("MMU doesn't support PAGE_SIZE=0x%lx", PAGE_SIZE); + + setup_tlb_handler(); + local_flush_tlb_all(); +} diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S new file mode 100644 index 000000000000..0336ba72c2d1 --- /dev/null +++ b/arch/loongarch/mm/tlbex.S @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_64BIT +#include +#endif + + .macro tlb_do_page_fault, write + SYM_FUNC_START(tlb_do_page_fault_\write) + SAVE_ALL + csrrd a2, LOONGARCH_CSR_BADV + move a0, sp + REG_S a2, sp, PT_BVADDR + li.w a1, \write + la.abs t0, do_page_fault + jirl ra, t0, 0 + RESTORE_ALL_AND_RET + SYM_FUNC_END(tlb_do_page_fault_\write) + .endm + + tlb_do_page_fault 0 + tlb_do_page_fault 1 + +SYM_FUNC_START(handle_tlb_protect) + BACKUP_T0T1 + SAVE_ALL + move a0, sp + move a1, zero + csrrd a2, LOONGARCH_CSR_BADV + REG_S a2, sp, PT_BVADDR + la.abs t0, do_page_fault + jirl ra, t0, 0 + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_tlb_protect) + +SYM_FUNC_START(handle_tlb_load) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_load + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_load: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_load + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_load + + ori t0, t0, _PAGE_VALID + st.d t0, t1, 0 + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_load: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_load: + la.abs t1, swapper_pg_dir + b vmalloc_done_load +#endif + + /* + * This is the entry point when build_tlbchange_handler_head + * spots a huge page. + */ +tlb_huge_update_load: + ld.d t0, t1, 0 + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_load + tlbsrch + + ori t0, t0, _PAGE_VALID + st.d t0, t1, 0 + addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbwr + + csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbfill + + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_load: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_0 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_load) + +SYM_FUNC_START(handle_tlb_store) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_store + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_store: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 + +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_store + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + bne ra, $r0, nopage_tlb_store + + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 + + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_store: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_store: + la.abs t1, swapper_pg_dir + b vmalloc_done_store +#endif + + /* + * This is the entry point when build_tlbchange_handler_head + * spots a huge page. + */ +tlb_huge_update_store: + ld.d t0, t1, 0 + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + bne ra, $r0, nopage_tlb_store + + tlbsrch + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + + st.d t0, t1, 0 + addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbwr + + csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbfill + + /* Reset default page size */ + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_store: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_store) + +SYM_FUNC_START(handle_tlb_modify) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_modify + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_modify: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_modify + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_WRITE_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_modify + + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_modify: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_modify: + la.abs t1, swapper_pg_dir + b vmalloc_done_modify +#endif + + /* + * This is the entry point when + * build_tlbchange_handler_head spots a huge page. + */ +tlb_huge_update_modify: + ld.d t0, t1, 0 + + srli.d ra, t0, _PAGE_WRITE_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_modify + + tlbsrch + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + + st.d t0, t1, 0 + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbwr + + /* Reset default page size */ + addu16i.d t0, $r0, (PS_MASK >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_modify: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_modify) + +SYM_FUNC_START(handle_tlb_refill) + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 +#if CONFIG_PGTABLE_LEVELS > 2 + lddir t0, t0, 1 +#endif + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn +SYM_FUNC_END(handle_tlb_refill) diff --git a/arch/loongarch/pci/Makefile b/arch/loongarch/pci/Makefile new file mode 100644 index 000000000000..8101ef3df71c --- /dev/null +++ b/arch/loongarch/pci/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PCI specific kernel interface routines under Linux. +# + +obj-y += pci.o +obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c new file mode 100644 index 000000000000..28d82f2fe031 --- /dev/null +++ b/arch/loongarch/pci/acpi.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct pci_root_info { + struct acpi_pci_root_info common; + struct pci_config_window *cfg; +}; + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_config_window *cfg = bridge->bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + + ACPI_COMPANION_SET(&bridge->dev, adev); + + return 0; +} + +int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_pci_root *root = acpi_driver_data(adev); + + return root->segment; +} + +static void acpi_release_root_info(struct acpi_pci_root_info *ci) +{ + struct pci_root_info *info; + + info = container_of(ci, struct pci_root_info, common); + pci_ecam_free(info->cfg); + kfree(ci->ops); + kfree(info); +} + +static void arch_pci_root_validate_resources(struct device *dev, + struct list_head *resources, + unsigned long type) +{ + LIST_HEAD(list); + struct resource *res1, *res2, *root = NULL; + struct resource_entry *tmp, *entry, *entry2; + + BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0); + root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource; + + list_splice_init(resources, &list); + resource_list_for_each_entry_safe(entry, tmp, &list) { + bool free = false; + resource_size_t end; + + res1 = entry->res; + if (!(res1->flags & type)) + goto next; + + /* Exclude non-addressable range or non-addressable portion */ + end = min(res1->end, root->end); + if (end <= res1->start) { + dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n", + res1); + free = true; + goto next; + } else if (res1->end != end) { + dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n", + res1, (unsigned long long)end + 1, + (unsigned long long)res1->end); + res1->end = end; + } + + resource_list_for_each_entry(entry2, resources) { + res2 = entry2->res; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_overlaps(res1, res2)) { + res2->start = min(res1->start, res2->start); + res2->end = max(res1->end, res2->end); + dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n", + res2, res1); + free = true; + goto next; + } + } + +next: + resource_list_del(entry); + if (free) + resource_list_free_entry(entry); + else + resource_list_add_tail(entry, resources); + } +} +static void arch_pci_root_remap_iospace(struct fwnode_handle *fwnode, + struct resource_entry *entry) +{ + struct resource *res = entry->res; + resource_size_t cpu_addr = res->start; + resource_size_t pci_addr = cpu_addr - entry->offset; + resource_size_t length = resource_size(res); + unsigned long port; + if (pci_register_io_range(fwnode, cpu_addr, length)) { + res->start += ISA_IOSIZE; + cpu_addr = res->start; + pci_addr = cpu_addr - entry->offset; + length = resource_size(res); + if (pci_register_io_range(fwnode, cpu_addr, length)) + goto err; + } + + port = pci_address_to_pio(cpu_addr); + if (port == (unsigned long)-1) + goto err; + + res->start = port; + res->end = port + length - 1; + entry->offset = port - pci_addr; + + if (pci_remap_iospace(res, cpu_addr) < 0) + goto err; + + pr_info("Remapped I/O %pa to %pR\n", &cpu_addr, res); + return; +err: + res->flags |= IORESOURCE_DISABLED; +} + +static int arch_pci_probe_root_resources(struct acpi_pci_root_info *info) +{ + int ret; + struct list_head *list = &info->resources; + struct acpi_device *device = info->bridge; + struct resource_entry *entry, *tmp; + unsigned long flags; + struct resource *res; + + flags = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_MEM_8AND16BIT; + ret = acpi_dev_get_resources(device, list, + acpi_dev_filter_resource_type_cb, + (void *)flags); + if (ret < 0) + dev_warn(&device->dev, + "failed to parse _CRS method, error code %d\n", ret); + else if (ret == 0) + dev_dbg(&device->dev, + "no IO and memory resources present in _CRS\n"); + else { + resource_list_for_each_entry_safe(entry, tmp, list) { + if (entry->res->flags & IORESOURCE_IO) { + res = entry->res; + res->start = PFN_ALIGN(res->start); + res->end += 1; + res->end = PFN_ALIGN(res->end); + res->end -= 1; + if (!entry->offset) { + entry->offset = LOONGSON_LIO_BASE; + res->start |= LOONGSON_LIO_BASE; + res->end |= LOONGSON_LIO_BASE; + } + arch_pci_root_remap_iospace(&device->fwnode, + entry); + } + if (entry->res->flags & IORESOURCE_DISABLED) + resource_list_destroy_entry(entry); + else + entry->res->name = info->name; + } + arch_pci_root_validate_resources(&device->dev, list, + IORESOURCE_MEM); + arch_pci_root_validate_resources(&device->dev, list, + IORESOURCE_IO); + } + + return ret; +} + +static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) +{ + struct acpi_device *device = ci->bridge; + struct resource_entry *entry, *tmp; + int status; + + status = arch_pci_probe_root_resources(ci); + if (status > 0) { + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + if (entry->res->flags & IORESOURCE_MEM) { + entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40); + entry->res->start |= entry->offset; + entry->res->end |= entry->offset; + } + } + return status; + } + + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + dev_printk(KERN_DEBUG, &device->dev, + "host bridge window %pR (ignored)\n", entry->res); + resource_list_destroy_entry(entry); + } + + return 0; +} + +/* + * Lookup the bus range for the domain in MCFG, and set up config space + * mapping. + */ +static struct pci_config_window * +pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) +{ + int ret, bus_shift; + u16 seg = root->segment; + struct device *dev = &root->device->dev; + struct resource cfgres; + struct resource *bus_res = &root->secondary; + struct pci_config_window *cfg; + const struct pci_ecam_ops *ecam_ops; + + ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops); + if (ret < 0) { + dev_err(dev, "%04x:%pR ECAM region not found, use default value\n", seg, bus_res); + ecam_ops = &loongson_pci_ecam_ops; + root->mcfg_addr = mcfg_addr_init(0); + } + + bus_shift = ecam_ops->bus_shift ? : 20; + + cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.flags = IORESOURCE_MEM; + + cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + if (IS_ERR(cfg)) { + dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); + return NULL; + } + + return cfg; +} + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + struct pci_bus *bus; + struct pci_root_info *info; + struct acpi_pci_root_ops *root_ops; + int domain = root->segment; + int busnum = root->secondary.start; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); + return NULL; + } + + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); + if (!root_ops) { + kfree(info); + return NULL; + } + + info->cfg = pci_acpi_setup_ecam_mapping(root); + if (!info->cfg) { + kfree(info); + kfree(root_ops); + return NULL; + } + + root_ops->release_info = acpi_release_root_info; + root_ops->prepare_resources = acpi_prepare_root_resources; + root_ops->pci_ops = (struct pci_ops *)&info->cfg->ops->pci_ops; + + bus = pci_find_bus(domain, busnum); + if (bus) { + memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window)); + kfree(info); + } else { + struct pci_bus *child; + + bus = acpi_pci_root_create(root, root_ops, + &info->common, info->cfg); + if (!bus) { + kfree(info); + kfree(root_ops); + return NULL; + } + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + } + + return bus; +} diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c new file mode 100644 index 000000000000..9cf6b184829b --- /dev/null +++ b/arch/loongarch/pci/pci.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PCI_DEVICE_ID_LOONGSON_HOSTBRIDGE 0x7a00 + +int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) +{ + struct pci_bus *bus_tmp = pci_find_bus(domain, bus); + + if (bus_tmp) + return bus_tmp->ops->read(bus_tmp, devfn, reg, len, val); + return -EINVAL; +} + +int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val) +{ + struct pci_bus *bus_tmp = pci_find_bus(domain, bus); + + if (bus_tmp) + return bus_tmp->ops->write(bus_tmp, devfn, reg, len, val); + return -EINVAL; +} + +phys_addr_t mcfg_addr_init(int node) +{ + return (((u64)node << 44) | MCFG_EXT_PCICFG_BASE); +} + +static int __init pcibios_init(void) +{ + unsigned int lsize; + + /* + * Set PCI cacheline size to that of the highest level in the + * cache hierarchy. + */ + lsize = cpu_dcache_line_size(); + lsize = cpu_vcache_line_size() ? : lsize; + lsize = cpu_scache_line_size() ? : lsize; + + BUG_ON(!lsize); + + pci_dfl_cache_line_size = lsize >> 2; + + pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + + return 0; +} + +subsys_initcall(pcibios_init); + +int pcibios_add_device(struct pci_dev *dev) +{ + int id = pci_domain_nr(dev->bus); + + dev_set_msi_domain(&dev->dev, pch_msi_domain[id]); + + return 0; +} + +int pcibios_alloc_irq(struct pci_dev *dev) +{ + if (acpi_disabled) + return 0; + if (pci_dev_msi_enabled(dev)) + return 0; + return acpi_pci_irq_enable(dev); +} + +static void pci_fixup_vgadev(struct pci_dev *pdev) +{ + struct pci_dev *devp = NULL; + + while ((devp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, devp))) { + if (devp->vendor != PCI_VENDOR_ID_LOONGSON) { + vga_set_default_device(devp); + dev_info(&pdev->dev, + "Overriding boot device as %X:%X\n", + devp->vendor, devp->device); + } + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev); diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile new file mode 100644 index 000000000000..6b6e16732c60 --- /dev/null +++ b/arch/loongarch/vdso/Makefile @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0 +# Objects to go into the VDSO. + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT +include $(srctree)/lib/vdso/Makefile + +obj-vdso-y := elf.o vgettimeofday.o sigreturn.o + +# Common compiler flags between ABIs. +ccflags-vdso := \ + $(filter -I%,$(KBUILD_CFLAGS)) \ + $(filter -E%,$(KBUILD_CFLAGS)) \ + $(filter -march=%,$(KBUILD_CFLAGS)) \ + $(filter -m%-float,$(KBUILD_CFLAGS)) \ + -D__VDSO__ + +ifeq ($(cc-name),clang) +ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) +endif + +cflags-vdso := $(ccflags-vdso) \ + $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ + -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ + -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ + $(call cc-option, -fno-asynchronous-unwind-tables) \ + $(call cc-option, -fno-stack-protector) +aflags-vdso := $(ccflags-vdso) \ + -D__ASSEMBLY__ -Wa,-gdwarf-2 + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) +endif + +# VDSO linker flags. +ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ + --hash-style=sysv --build-id -T + +GCOV_PROFILE := n + +# +# Shared build commands. +# + +quiet_cmd_vdsold_and_vdso_check = LD $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check) + +quiet_cmd_vdsoas_o_S = AS $@ + cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# +# Build native VDSO. +# + +native-abi := $(filter -mabi=%,$(KBUILD_CFLAGS)) + +targets += $(obj-vdso-y) +targets += vdso.lds vdso.so.dbg vdso.so + +obj-vdso := $(obj-vdso-y:%.o=$(obj)/%.o) + +$(obj-vdso): KBUILD_CFLAGS := $(cflags-vdso) $(native-abi) +$(obj-vdso): KBUILD_AFLAGS := $(aflags-vdso) $(native-abi) + +$(obj)/vdso.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) $(native-abi) + +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold_and_vdso_check) + +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +obj-y += vdso.o + +$(obj)/vdso.o : $(obj)/vdso.so + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/loongarch/vdso/elf.S b/arch/loongarch/vdso/elf.S new file mode 100644 index 000000000000..d5f16a6e3c72 --- /dev/null +++ b/arch/loongarch/vdso/elf.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/loongarch/vdso/gen_vdso_offsets.sh b/arch/loongarch/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000000..1bb4e12642ff --- /dev/null +++ b/arch/loongarch/vdso/gen_vdso_offsets.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Derived from RISC-V and ARM64: +# Author: Will Deacon +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# + +LC_ALL=C sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S new file mode 100644 index 000000000000..98f765b20ba9 --- /dev/null +++ b/arch/loongarch/vdso/sigreturn.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015 Imagination Technologies + * Author: Alex Smith + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include +#include + +#include +#include + + .section .text + .cfi_sections .debug_frame + +SYM_FUNC_START(__vdso_rt_sigreturn) + + li.w a7, __NR_rt_sigreturn + syscall 0 + +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/loongarch/vdso/vdso.S b/arch/loongarch/vdso/vdso.S new file mode 100644 index 000000000000..41951232ef45 --- /dev/null +++ b/arch/loongarch/vdso/vdso.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from RISC-V: + * Copyright (C) 2014 Regents of the University of California + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/loongarch/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S new file mode 100644 index 000000000000..d8d2a3f025be --- /dev/null +++ b/arch/loongarch/vdso/vdso.lds.S @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +OUTPUT_FORMAT("elf64-loongarch", "elf64-loongarch", "elf64-loongarch") + +OUTPUT_ARCH(loongarch) + +SECTIONS +{ + PROVIDE(_start = .); + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .text : { *(.text*) } :text + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + _end = .; + PROVIDE(end = .); + + /DISCARD/ : { + *(.gnu.attributes) + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +VERSION +{ + LINUX_4.4 { + global: + __vdso_clock_getres; + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_rt_sigreturn; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigreturn = __vdso_rt_sigreturn; diff --git a/arch/loongarch/vdso/vgettimeofday.c b/arch/loongarch/vdso/vgettimeofday.c new file mode 100644 index 000000000000..c54489a630cf --- /dev/null +++ b/arch/loongarch/vdso/vgettimeofday.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2020 Loongson Technologies + * + */ +#include +#include + +int __vdso_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 3f310cfc9e7d..a35891c8f86e 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -41,6 +41,8 @@ struct mcfg_fixup { static struct mcfg_fixup mcfg_quirks[] = { /* { OEM_ID, OEM_TABLE_ID, REV, SEGMENT, BUS_RANGE, ops, cfgres }, */ +#ifdef CONFIG_ARM64 + #define AL_ECAM(table_id, rev, seg, ops) \ { "AMAZON", table_id, rev, seg, MCFG_BUS_ANY, ops } @@ -166,6 +168,17 @@ static struct mcfg_fixup mcfg_quirks[] = { ALTRA_ECAM_QUIRK(1, 13), ALTRA_ECAM_QUIRK(1, 14), ALTRA_ECAM_QUIRK(1, 15), +#endif /* ARM64 */ + +#ifdef CONFIG_LOONGARCH +#define LOONGSON_ECAM_MCFG(table_id, seg) \ + { "LOONGS", table_id, 1, seg, MCFG_BUS_ANY, &loongson_pci_ecam_ops } + + LOONGSON_ECAM_MCFG("\0", 0), + LOONGSON_ECAM_MCFG("LOONGSON", 0), + LOONGSON_ECAM_MCFG("\0", 1), + LOONGSON_ECAM_MCFG("LOONGSON", 1), +#endif /* LOONGARCH */ }; static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 64e2f5e379aa..5c4e184ac9dc 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -291,7 +291,7 @@ config PCI_HYPERV_INTERFACE config PCI_LOONGSON bool "LOONGSON PCI Controller" depends on MACH_LOONGSON64 || COMPILE_TEST - depends on OF + depends on OF || ACPI depends on PCI_QUIRKS default MACH_LOONGSON64 help diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index 4f6755b343b4..b621cff5ea60 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "../pci.h" @@ -16,13 +18,30 @@ #define FLAG_CFG1 BIT(1) #define FLAG_DEV_FIX BIT(2) +struct pci_controller_data { + u32 flags; + struct pci_ops *ops; +}; + struct loongson_pci { void __iomem *cfg0_base; void __iomem *cfg1_base; struct platform_device *pdev; - u32 flags; + struct pci_controller_data *data; }; +static struct loongson_pci *pci_bus_to_loongson_pci(struct pci_bus *bus) +{ + struct pci_config_window *cfg; + + if (acpi_disabled) + return (struct loongson_pci *)(bus->sysdata); + else { + cfg = bus->sysdata; + return (struct loongson_pci *)(cfg->priv); + } +} + static void __iomem *cfg1_map(struct loongson_pci *priv, int bus, unsigned int devfn, int where) { @@ -50,16 +69,23 @@ static void __iomem *pci_loongson_map_bus(struct pci_bus *bus, unsigned int devf int where) { unsigned char busnum = bus->number; - struct pci_host_bridge *bridge = pci_find_host_bridge(bus); - struct loongson_pci *priv = pci_host_bridge_priv(bridge); + unsigned int device = PCI_SLOT(devfn); + unsigned int function = PCI_FUNC(devfn); + struct loongson_pci *priv = pci_bus_to_loongson_pci(bus); + + if (pci_is_root_bus(bus)) + busnum = 0; /* * Do not read more than one device on the bus other than * the host bus. For our hardware the root bus is always bus 0. */ - if (priv->flags & FLAG_DEV_FIX && busnum != 0 && - PCI_SLOT(devfn) > 0) - return NULL; + if ((priv->data->flags & FLAG_DEV_FIX) && bus->self) { + if ((busnum != 0) && (device > 0)) + return NULL; + if ((busnum == 0) && (device >= 9 && device <= 20 && function > 0)) + return NULL; + } /* CFG0 can only access standard space */ if (where < PCI_CFG_SPACE_SIZE && priv->cfg0_base) @@ -72,6 +98,8 @@ static void __iomem *pci_loongson_map_bus(struct pci_bus *bus, unsigned int devf return NULL; } +#ifdef CONFIG_OF + static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; @@ -90,27 +118,42 @@ static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return val; } -/* LS2K/LS7A accept 8/16/32-bit PCI operations */ +/* LS2K/LS7A accept 8/16/32-bit PCI config operations */ static struct pci_ops loongson_pci_ops = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, }; -/* RS780/SR5690 only accept 32-bit PCI operations */ +/* RS780/SR5690 only accept 32-bit PCI config operations */ static struct pci_ops loongson_pci_ops32 = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, }; +static const struct pci_controller_data ls2k_pci_data = { + .flags = FLAG_CFG1 | FLAG_DEV_FIX, + .ops = &loongson_pci_ops, +}; + +static const struct pci_controller_data ls7a_pci_data = { + .flags = FLAG_CFG1 | FLAG_DEV_FIX, + .ops = &loongson_pci_ops, +}; + +static const struct pci_controller_data rs780e_pci_data = { + .flags = FLAG_CFG0, + .ops = &loongson_pci_ops32, +}; + static const struct of_device_id loongson_pci_of_match[] = { { .compatible = "loongson,ls2k-pci", - .data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), }, + .data = (void *)&ls2k_pci_data, }, { .compatible = "loongson,ls7a-pci", - .data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), }, + .data = (void *)&ls7a_pci_data, }, { .compatible = "loongson,rs780e-pci", - .data = (void *)(FLAG_CFG0), }, + .data = (void *)&rs780e_pci_data, }, {} }; @@ -131,20 +174,20 @@ static int loongson_pci_probe(struct platform_device *pdev) priv = pci_host_bridge_priv(bridge); priv->pdev = pdev; - priv->flags = (unsigned long)of_device_get_match_data(dev); + priv->data = (struct pci_controller_data *)of_device_get_match_data(dev); - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) { - dev_err(dev, "missing mem resources for cfg0\n"); - return -EINVAL; + if (priv->data->flags & FLAG_CFG0) { + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + dev_err(dev, "missing mem resources for cfg0\n"); + else { + priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs); + if (IS_ERR(priv->cfg0_base)) + return PTR_ERR(priv->cfg0_base); + } } - priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs); - if (IS_ERR(priv->cfg0_base)) - return PTR_ERR(priv->cfg0_base); - - /* CFG1 is optional */ - if (priv->flags & FLAG_CFG1) { + if (priv->data->flags & FLAG_CFG1) { regs = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!regs) dev_info(dev, "missing mem resource for cfg1\n"); @@ -156,11 +199,8 @@ static int loongson_pci_probe(struct platform_device *pdev) } bridge->sysdata = priv; + bridge->ops = priv->data->ops; bridge->map_irq = loongson_map_irq; - if (!of_device_is_compatible(node, "loongson,rs780e-pci")) - bridge->ops = &loongson_pci_ops; - else - bridge->ops = &loongson_pci_ops32; return pci_host_probe(bridge); } @@ -173,3 +213,41 @@ static struct platform_driver loongson_pci_driver = { .probe = loongson_pci_probe, }; builtin_platform_driver(loongson_pci_driver); + +#endif + +#ifdef CONFIG_ACPI + +static int loongson_pci_ecam_init(struct pci_config_window *cfg) +{ + struct device *dev = cfg->parent; + struct loongson_pci *priv; + struct pci_controller_data *data; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + cfg->priv = priv; + data->flags = FLAG_CFG1 | FLAG_DEV_FIX, + priv->data = data; + priv->cfg1_base = cfg->win - (cfg->busr.start << 16); + + return 0; +} + +const struct pci_ecam_ops loongson_pci_ecam_ops = { + .bus_shift = 16, + .init = loongson_pci_ecam_init, + .pci_ops = { + .map_bus = pci_loongson_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, + } +}; + +#endif diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 033ce74f02e8..3e28f275a889 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -58,6 +58,7 @@ extern const struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ +extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #endif #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f8da3669b6b0..1eb125f5d12b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -149,6 +149,17 @@ /* Vendors and devices. Sort key: vendor first, device next. */ #define PCI_VENDOR_ID_LOONGSON 0x0014 +#define PCI_DEVICE_ID_LOONGSON_APB 0x7a02 +#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 +#define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36 +#define PCI_DEVICE_ID_LOONGSON_HDA 0x7a07 +#define PCI_DEVICE_ID_LOONGSON_GPU 0x7a15 +#define PCI_DEVICE_ID_LOONGSON_AHCI 0x7a08 +#define PCI_DEVICE_ID_LOONGSON_EHCI 0x7a14 +#define PCI_DEVICE_ID_LOONGSON_OHCI 0x7a24 +#define PCI_DEVICE_ID_LOONGSON_LPC 0x7a0c +#define PCI_DEVICE_ID_LOONGSON_DMA 0x7a0f #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index 5c716a952cbe..5105b0691ed1 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -4,7 +4,11 @@ #include +#ifndef __loongarch__ #define _NSIG 64 +#else +#define _NSIG 128 +#endif #define _NSIG_BPW __BITS_PER_LONG #define _NSIG_WORDS (_NSIG / _NSIG_BPW) diff --git a/scripts/subarch.include b/scripts/subarch.include index 650682821126..c79e0d0b1a19 100644 --- a/scripts/subarch.include +++ b/scripts/subarch.include @@ -10,4 +10,4 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ - -e s/riscv.*/riscv/) + -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/) -- Gitee From 40c30ce95aaa1a9408e9b3565643db9992bed946 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 1 Jun 2021 08:50:39 +0800 Subject: [PATCH 23/59] LoongArch: Add basic SMP support Change-Id: I026ec93484dd404ec4d49a080c62616280ad6cac Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 80 ++- arch/loongarch/include/asm/atomic.h | 4 + arch/loongarch/include/asm/barrier.h | 108 ++++ arch/loongarch/include/asm/cmpxchg.h | 1 + arch/loongarch/include/asm/futex.h | 1 + arch/loongarch/include/asm/hardirq.h | 2 + .../include/asm/mach-loongson64/irq.h | 5 + arch/loongarch/include/asm/percpu.h | 202 +++++++ arch/loongarch/include/asm/pgtable.h | 21 + arch/loongarch/include/asm/smp.h | 140 +++++ arch/loongarch/include/asm/stackframe.h | 17 +- arch/loongarch/include/asm/tlbflush.h | 24 +- arch/loongarch/include/asm/topology.h | 7 +- arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/acpi.c | 70 ++- arch/loongarch/kernel/asm-offsets.c | 8 + arch/loongarch/kernel/cmpxchg.c | 3 + arch/loongarch/kernel/head.S | 30 + arch/loongarch/kernel/irq.c | 1 + arch/loongarch/kernel/proc.c | 5 + arch/loongarch/kernel/reset.c | 12 + arch/loongarch/kernel/setup.c | 26 + arch/loongarch/kernel/smp.c | 416 ++++++++++++++ arch/loongarch/kernel/topology.c | 43 +- arch/loongarch/kernel/vmlinux.lds.S | 3 + arch/loongarch/loongson64/Makefile | 2 + arch/loongarch/loongson64/init.c | 3 + arch/loongarch/loongson64/irq.c | 10 + arch/loongarch/loongson64/smp.c | 520 ++++++++++++++++++ arch/loongarch/mm/tlbex.S | 69 +++ include/linux/cpuhotplug.h | 1 + 31 files changed, 1819 insertions(+), 17 deletions(-) create mode 100644 arch/loongarch/include/asm/smp.h create mode 100644 arch/loongarch/kernel/smp.c create mode 100644 arch/loongarch/loongson64/smp.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0d64f21d09b3..2016b0ed06e8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -59,6 +59,7 @@ config LOONGARCH select GENERIC_LIB_UCMPDI2 select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL @@ -89,7 +90,7 @@ config LOONGARCH select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ - select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT + select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_REL if MODULES @@ -117,6 +118,8 @@ config MACH_LOONGSON64 select NR_CPUS_DEFAULT_4 select SPARSE_IRQ select SYS_HAS_CPU_LOONGSON64 + select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_64BIT_KERNEL select ZONE_DMA32 help @@ -140,6 +143,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config SYS_SUPPORTS_HOTPLUG_CPU + bool + config GENERIC_CSUM def_bool y @@ -359,6 +365,78 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. +config SMP + bool "Multi-Processing support" + depends on SYS_SUPPORTS_SMP + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on uni- and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + uniprocessor machines. On a uniprocessor machine, the kernel + will run faster if you say N here. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. + + See also the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + select GENERIC_IRQ_MIGRATION + depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU + help + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + (Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. + +config SYS_SUPPORTS_SMP + bool + +config NR_CPUS_DEFAULT_4 + bool + +config NR_CPUS_DEFAULT_8 + bool + +config NR_CPUS_DEFAULT_16 + bool + +config NR_CPUS_DEFAULT_32 + bool + +config NR_CPUS_DEFAULT_64 + bool + +config NR_CPUS + int "Maximum number of CPUs (2-256)" + range 2 256 + depends on SMP + default "4" if NR_CPUS_DEFAULT_4 + default "8" if NR_CPUS_DEFAULT_8 + default "16" if NR_CPUS_DEFAULT_16 + default "32" if NR_CPUS_DEFAULT_32 + default "64" if NR_CPUS_DEFAULT_64 + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 32 for 32-bit + kernel and 64 for 64-bit kernels; the minimum value which makes + sense is 1 for Qemu (useful only for kernel debugging purposes) + and 2 for all others. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. For best + performance should round up your number of processors to the next + power of two. + source "kernel/Kconfig.hz" config SECCOMP diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index d2a4f29f9a5e..13b954b8140c 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -171,6 +171,7 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v) " sc.w %1, %2 \n" " beq $zero, %1, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "I" (-i)); @@ -183,6 +184,7 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v) " sc.w %1, %2 \n" " beq $zero, %1, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "r" (i)); @@ -333,6 +335,7 @@ static inline long atomic64_sub_if_positive(long i, atomic64_t *v) " sc.d %1, %2 \n" " beq %1, $zero, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "I" (-i)); @@ -345,6 +348,7 @@ static inline long atomic64_sub_if_positive(long i, atomic64_t *v) " sc.d %1, %2 \n" " beq %1, $zero, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "r" (i)); diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h index 3ed771ed63a8..894dca9bbcc3 100644 --- a/arch/loongarch/include/asm/barrier.h +++ b/arch/loongarch/include/asm/barrier.h @@ -20,6 +20,19 @@ #define mb() fast_mb() #define iob() fast_iob() +#define __smp_mb() __asm__ __volatile__("dbar 0" : : : "memory") +#define __smp_rmb() __asm__ __volatile__("dbar 0" : : : "memory") +#define __smp_wmb() __asm__ __volatile__("dbar 0" : : : "memory") + +#ifdef CONFIG_SMP +#define __WEAK_LLSC_MB " dbar 0 \n" +#else +#define __WEAK_LLSC_MB " \n" +#endif + +#define __smp_mb__before_atomic() barrier() +#define __smp_mb__after_atomic() barrier() + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -48,6 +61,101 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, return mask; } +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + unsigned long __tmp = 0; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + *(__u8 *)__u.__c = *(volatile __u8 *)p; \ + __smp_mb(); \ + break; \ + case 2: \ + *(__u16 *)__u.__c = *(volatile __u16 *)p; \ + __smp_mb(); \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amor_db.w %[val], %[tmp], %[mem] \n" \ + : [val] "=&r" (*(__u32 *)__u.__c) \ + : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amor_db.d %[val], %[tmp], %[mem] \n" \ + : [val] "=&r" (*(__u64 *)__u.__c) \ + : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \ + : "memory"); \ + break; \ + } \ + (typeof(*p))__u.__val; \ +}) + +#define __smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + unsigned long __tmp; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + __smp_mb(); \ + *(volatile __u8 *)p = *(__u8 *)__u.__c; \ + break; \ + case 2: \ + __smp_mb(); \ + *(volatile __u16 *)p = *(__u16 *)__u.__c; \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amswap_db.w %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u32 *)__u.__c) \ + : ); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amswap_db.d %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u64 *)__u.__c) \ + : ); \ + break; \ + } \ +} while (0) + +#define __smp_store_mb(p, v) \ +do { \ + union { typeof(p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(p)) (v) }; \ + unsigned long __tmp; \ + switch (sizeof(p)) { \ + case 1: \ + *(volatile __u8 *)&p = *(__u8 *)__u.__c; \ + __smp_mb(); \ + break; \ + case 2: \ + *(volatile __u16 *)&p = *(__u16 *)__u.__c; \ + __smp_mb(); \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amswap_db.w %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u32 *)&p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u32 *)__u.__c) \ + : ); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amswap_db.d %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u64 *)&p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u64 *)__u.__c) \ + : ); \ + break; \ + } \ +} while (0) + #include #endif /* __ASM_BARRIER_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 7233176d751b..97108fad2d4b 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -65,6 +65,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, " " st " $t0, %1 \n" \ " beq $zero, $t0, 1b \n" \ "2: \n" \ + __WEAK_LLSC_MB \ : "=&r" (__ret), "=ZB"(*m) \ : "ZB"(*m), "Jr" (old), "Jr" (new) \ : "t0", "memory"); \ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index fa0ccf9ea024..04e2893cf77d 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -86,6 +86,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "2: sc.w $t0, %2 \n" " beq $zero, $t0, 1b \n" "3: \n" + __WEAK_LLSC_MB " .section .fixup,\"ax\" \n" "4: li.d %0, %6 \n" " b 3b \n" diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 5effe88fce3e..a16d425a0367 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -21,4 +21,6 @@ typedef struct { #include /* Standard mappings for irq_cpustat_t above */ +#define __ARCH_IRQ_STAT + #endif /* _ASM_HARDIRQ_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/irq.h b/arch/loongarch/include/asm/mach-loongson64/irq.h index 78c1921f523b..b4dc6c667f78 100644 --- a/arch/loongarch/include/asm/mach-loongson64/irq.h +++ b/arch/loongarch/include/asm/mach-loongson64/irq.h @@ -2,6 +2,8 @@ #ifndef __ASM_MACH_LOONGSON64_IRQ_H_ #define __ASM_MACH_LOONGSON64_IRQ_H_ +#include + #define MAX_IO_PICS 2 #define NR_IRQS (64 + (256 * MAX_IO_PICS)) @@ -80,4 +82,7 @@ extern struct irq_domain *pch_lpc_domain; extern struct irq_domain *pch_msi_domain[MAX_IO_PICS]; extern struct irq_domain *pch_pic_domain[MAX_IO_PICS]; +extern void fixup_irqs(void); +extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); + #endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */ diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 06dbdd47145c..d4f42840e158 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -5,6 +5,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include + /* Use r21 for fast access */ register unsigned long __my_cpu_offset __asm__("$r21"); @@ -15,6 +17,206 @@ static inline void set_my_cpu_offset(unsigned long off) } #define __my_cpu_offset __my_cpu_offset +#define PERCPU_OP(op, asm_op, c_op) \ +static inline unsigned long __percpu_##op(void *ptr, \ + unsigned long val, int size) \ +{ \ + unsigned long ret; \ + \ + switch (size) { \ + case 4: \ + __asm__ __volatile__( \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ + : [val] "r" (val)); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ + : [val] "r" (val)); \ + break; \ + default: \ + ret = 0; \ + BUILD_BUG(); \ + } \ + \ + return ret c_op val; \ +} + +PERCPU_OP(add, add, +) +PERCPU_OP(and, and, &) +PERCPU_OP(or, or, |) +#undef PERCPU_OP + +static inline unsigned long __percpu_read(void *ptr, int size) +{ + unsigned long ret; + + switch (size) { + case 1: + __asm__ __volatile__ ("ldx.b %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 2: + __asm__ __volatile__ ("ldx.h %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 4: + __asm__ __volatile__ ("ldx.w %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 8: + __asm__ __volatile__ ("ldx.d %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + default: + ret = 0; + BUILD_BUG(); + } + + return ret; +} + +static inline void __percpu_write(void *ptr, unsigned long val, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("stx.b %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 2: + __asm__ __volatile__("stx.h %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 4: + __asm__ __volatile__("stx.w %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 8: + __asm__ __volatile__("stx.d %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + default: + BUILD_BUG(); + } +} + +static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) +{ + switch (size) { + case 1: + case 2: + return __xchg_small((volatile void *)ptr, val, size); + + case 4: + return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); + + case 8: + return __xchg_asm("amswap.d", (volatile u64 *)ptr, (u64)val); + + default: + BUILD_BUG(); + } + + return 0; +} + +/* this_cpu_cmpxchg */ +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable_notrace(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable_notrace(); \ + __ret; \ +}) + +#define _percpu_read(pcp) \ +({ \ + typeof(pcp) __retval; \ + __retval = (typeof(pcp))__percpu_read(&(pcp), sizeof(pcp)); \ + __retval; \ +}) + +#define _percpu_write(pcp, val) \ +do { \ + __percpu_write(&(pcp), (unsigned long)(val), sizeof(pcp)); \ +} while (0) \ + +#define _pcp_protect(operation, pcp, val) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable_notrace(); \ + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ + (val), sizeof(pcp)); \ + preempt_enable_notrace(); \ + __retval; \ +}) + +#define _percpu_add(pcp, val) \ + _pcp_protect(__percpu_add, pcp, val) + +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) + +#define _percpu_and(pcp, val) \ + _pcp_protect(__percpu_and, pcp, val) + +#define _percpu_or(pcp, val) \ + _pcp_protect(__percpu_or, pcp, val) + +#define _percpu_xchg(pcp, val) ((typeof(pcp)) \ + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))) + +#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) + +#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val) + +#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val) + +#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) + +#define this_cpu_read_1(pcp) _percpu_read(pcp) +#define this_cpu_read_2(pcp) _percpu_read(pcp) +#define this_cpu_read_4(pcp) _percpu_read(pcp) +#define this_cpu_read_8(pcp) _percpu_read(pcp) + +#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) + +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + #include #endif /* __ASM_PERCPU_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index f2b8341ec03e..eb054b7ffd51 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -57,8 +57,29 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + "1:" __LL "%[tmp], %[buddy] \n" + " bnez %[tmp], 2f \n" + " or %[tmp], %[tmp], %[global] \n" + __SC "%[tmp], %[buddy] \n" + " beqz %[tmp], 1b \n" + " nop \n" + "2: \n" + __WEAK_LLSC_MB + : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } } diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h new file mode 100644 index 000000000000..d638b88e8744 --- /dev/null +++ b/arch/loongarch/include/asm/smp.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +struct task_struct; + +struct plat_smp_ops { + void (*send_ipi_single)(int cpu, unsigned int action); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); + void (*smp_setup)(void); + void (*prepare_cpus)(unsigned int max_cpus); + int (*boot_secondary)(int cpu, struct task_struct *idle); + void (*init_secondary)(void); + void (*smp_finish)(void); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int cpu); +#endif +}; + +extern const struct plat_smp_ops *mp_ops; +void register_smp_ops(const struct plat_smp_ops *ops); + +static inline void plat_smp_setup(void) +{ + mp_ops->smp_setup(); +} + +#else /* !CONFIG_SMP */ + +struct plat_smp_ops; + +static inline void plat_smp_setup(void) +{ + /* UP, nothing to do ... */ +} + +static inline void register_smp_ops(const struct plat_smp_ops *ops) +{ +} + +#endif /* !CONFIG_SMP */ + +extern int smp_num_siblings; +extern int num_processors; +extern int disabled_cpus; +extern cpumask_t cpu_sibling_map[]; +extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map[]; + +static inline int raw_smp_processor_id(void) +{ +#if defined(__VDSO__) + extern int vdso_smp_processor_id(void) + __compiletime_error("VDSO should not call smp_processor_id()"); + return vdso_smp_processor_id(); +#else + return current_thread_info()->cpu; +#endif +} +#define raw_smp_processor_id raw_smp_processor_id + +/* Map from cpu id to sequential logical cpu number. This will only + not be idempotent when cpus failed to come on-line. */ +extern int __cpu_number_map[NR_CPUS]; +#define cpu_number_map(cpu) __cpu_number_map[cpu] + +/* The reverse map from sequential logical cpu number to cpu id. */ +extern int __cpu_logical_map[NR_CPUS]; +#define cpu_logical_map(cpu) __cpu_logical_map[cpu] + +#define cpu_physical_id(cpu) cpu_logical_map(cpu) + +#define SMP_BOOT_CPU 0x1 +#define SMP_RESCHEDULE 0x2 +#define SMP_CALL_FUNCTION 0x4 + +struct secondary_data { + unsigned long stack; + unsigned long thread_info; +}; +extern struct secondary_data cpuboot_data; + +extern asmlinkage void smpboot_entry(void); + +extern void calculate_cpu_foreign_map(void); + +/* + * Generate IPI list text + */ +extern void show_ipi_list(struct seq_file *p, int prec); + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +static inline void smp_send_reschedule(int cpu) +{ + mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE); +} + +#ifdef CONFIG_HOTPLUG_CPU +static inline int __cpu_disable(void) +{ + return mp_ops->cpu_disable(); +} + +static inline void __cpu_die(unsigned int cpu) +{ + mp_ops->cpu_die(cpu); +} + +extern void play_dead(void); +#endif + +static inline void arch_send_call_function_single_ipi(int cpu) +{ + mp_ops->send_ipi_single(cpu, SMP_CALL_FUNCTION); +} + +static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION); +} + +#endif /* __ASM_SMP_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index d5580e9de290..4f78e2dca625 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -77,17 +77,24 @@ * new value in sp. */ .macro get_saved_sp docfi=0 - la.abs t1, kernelsp - move t0, sp + la.abs t1, kernelsp +#ifdef CONFIG_SMP + csrrd t0, PERCPU_BASE_KS + LONG_ADDU t1, t1, t0 +#endif + move t0, sp .if \docfi .cfi_register sp, t0 .endif - LONG_L sp, t1, 0 + LONG_L sp, t1, 0 .endm .macro set_saved_sp stackp temp temp2 - la.abs \temp, kernelsp - LONG_S \stackp, \temp, 0 + la.abs \temp, kernelsp +#ifdef CONFIG_SMP + LONG_ADDU \temp, \temp, u0 +#endif + LONG_S \stackp, \temp, 0 .endm .macro SAVE_SOME docfi=0 diff --git a/arch/loongarch/include/asm/tlbflush.h b/arch/loongarch/include/asm/tlbflush.h index 67e08f8ad794..a7fa634de383 100644 --- a/arch/loongarch/include/asm/tlbflush.h +++ b/arch/loongarch/include/asm/tlbflush.h @@ -20,19 +20,29 @@ extern void local_flush_tlb_all(void); extern void local_flush_tlb_user(void); extern void local_flush_tlb_kernel(void); extern void local_flush_tlb_mm(struct mm_struct *mm); -extern void local_flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end); -extern void local_flush_tlb_kernel_range(unsigned long start, - unsigned long end); -extern void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long page); +extern void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void local_flush_tlb_one(unsigned long vaddr); +#ifdef CONFIG_SMP + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long, unsigned long); +extern void flush_tlb_kernel_range(unsigned long, unsigned long); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); +extern void flush_tlb_one(unsigned long vaddr); + +#else /* CONFIG_SMP */ + #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_range(vma, vmaddr, end) local_flush_tlb_range(vma, vmaddr, end) -#define flush_tlb_kernel_range(vmaddr,end) local_flush_tlb_kernel_range(vmaddr, end) +#define flush_tlb_kernel_range(vmaddr, end) local_flush_tlb_kernel_range(vmaddr, end) #define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) #define flush_tlb_one(vaddr) local_flush_tlb_one(vaddr) +#endif /* CONFIG_SMP */ + #endif /* __ASM_TLBFLUSH_H */ diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 51feaa64339a..b5eb43f57e25 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -7,7 +7,12 @@ #include -#define cpu_logical_map(cpu) 0 +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) (cpu_data[cpu].package) +#define topology_core_id(cpu) (cpu_core(&cpu_data[cpu])) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) +#endif #include diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index de13ac4fd2cb..4fc3909158cd 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_PROC_FS) += proc.o CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 5172581441da..04b28a10498a 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -159,6 +159,35 @@ void __init acpi_boot_table_init(void) } } +static int set_processor_mask(u32 id, u32 flags) +{ + + int cpu, cpuid = id; + + if (num_processors >= nr_cpu_ids) { + pr_warn("acpi: nr_cpus/possible_cpus limit of %i reached." + " processor 0x%x ignored.\n", nr_cpu_ids, cpuid); + + return -ENODEV; + + } + if (cpuid == loongson_sysconf.boot_cpu_id) + cpu = 0; + else + cpu = cpumask_next_zero(-1, cpu_present_mask); + + if (flags & ACPI_MADT_ENABLED) { + num_processors++; + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; + } else + disabled_cpus++; + + return cpu; +} + static int __init acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) { @@ -169,6 +198,7 @@ acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) return -EINVAL; acpi_table_print_madt_entry(&header->common); + set_processor_mask(processor->core_id, processor->flags); return 0; } @@ -432,7 +462,12 @@ static int legacy_madt_table_init(void) static void __init acpi_process_madt(void) { - int error; + int i, error; + + for (i = 0; i < NR_CPUS; i++) { + __cpu_number_map[i] = -1; + __cpu_logical_map[i] = -1; + } if (loongson_sysconf.bpi_ver <= BPI_VERSION_V1) { error = legacy_madt_table_init(); @@ -527,3 +562,36 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_reserve(addr, size); } + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + +#include + +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) +{ + int cpu; + + cpu = set_processor_mask(physid, ACPI_MADT_ENABLED); + if (cpu < 0) { + pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); + return cpu; + } + + *pcpu = cpu; + + return 0; +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + set_cpu_present(cpu, false); + num_processors--; + + pr_info("cpu%d hot remove!\n", cpu); + + return 0; +} +EXPORT_SYMBOL(acpi_unmap_cpu); + +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 776051030a98..f86210b17b4f 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -253,3 +253,11 @@ void output_signal_defines(void) DEFINE(_SIGXFSZ, SIGXFSZ); BLANK(); } + +void output_smpboot_defines(void) +{ + COMMENT("Linux smp cpu boot offsets."); + OFFSET(CPU_BOOT_STACK, secondary_data, stack); + OFFSET(CPU_BOOT_TINFO, secondary_data, thread_info); + BLANK(); +} diff --git a/arch/loongarch/kernel/cmpxchg.c b/arch/loongarch/kernel/cmpxchg.c index aac3547c1779..cf3a4f5c1987 100644 --- a/arch/loongarch/kernel/cmpxchg.c +++ b/arch/loongarch/kernel/cmpxchg.c @@ -91,7 +91,10 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, " or %1, %1, %6 \n" " sc.w %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" + __WEAK_LLSC_MB + "3: \n" : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (~mask), "Jr" (old), "Jr" (new) : "memory"); diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index b4223842f690..6e816b5369c0 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -69,4 +69,34 @@ SYM_CODE_START(kernel_entry) # kernel entry point SYM_CODE_END(kernel_entry) +#ifdef CONFIG_SMP + +/* + * SMP slave cpus entry point. Board specific code for bootstrap calls this + * function after setting up the stack and tp registers. + */ +SYM_CODE_START(smpboot_entry) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + li.w t0, 0x04 # PLV=0, PIE=1, PWE=0 + csrwr t0, LOONGARCH_CSR_PRMD + li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 + csrwr t0, LOONGARCH_CSR_EUEN + + la.abs t0, cpuboot_data + ld.d sp, t0, CPU_BOOT_STACK + ld.d tp, t0, CPU_BOOT_TINFO + + la.abs t0, 0f + jirl zero, t0, 0 +0: + bl start_secondary +SYM_CODE_END(smpboot_entry) + +#endif /* CONFIG_SMP */ + SYM_ENTRY(kernel_entry_end, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 3c97cbeac9f2..9446710ad586 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -34,6 +34,7 @@ atomic_t irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) { + show_ipi_list(p, prec); seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); return 0; } diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 23de61e565c9..8cae30881faa 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -35,6 +35,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int fp_version = cpu_data[n].fpu_vers; struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + /* * For the first processor also print the system type */ diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index f39d911254a2..d22de0669b4b 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -36,16 +36,28 @@ EXPORT_SYMBOL(pm_power_off); void machine_halt(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif machine_hang(); } void machine_power_off(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif pm_power_off(); } void machine_restart(char *command) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif do_kernel_restart(command); pm_restart(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 5b3b8e5fb8c0..e53844bc0e33 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -24,6 +24,7 @@ #include #include #include +#include DEFINE_PER_CPU(unsigned long, kernelsp); unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; @@ -373,6 +374,29 @@ static int __init reserve_memblock_reserved_regions(void) } arch_initcall(reserve_memblock_reserved_regions); +#ifdef CONFIG_SMP +static void __init prefill_possible_map(void) +{ + int i, possible; + + possible = num_processors + disabled_cpus; + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; + + pr_info("SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, max((possible - num_processors), 0)); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); + + nr_cpu_ids = possible; +} +#else +static inline void prefill_possible_map(void) {} +#endif + void __init setup_arch(char **cmdline_p) { cpu_probe(); @@ -387,6 +411,8 @@ void __init setup_arch(char **cmdline_p) arch_mem_init(cmdline_p); resource_init(); + plat_smp_setup(); + prefill_possible_map(); paging_init(); } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c new file mode 100644 index 000000000000..b4746805334d --- /dev/null +++ b/arch/loongarch/kernel/smp.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ +EXPORT_SYMBOL(__cpu_number_map); + +int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ +EXPORT_SYMBOL(__cpu_logical_map); + +/* Number of threads (siblings) per CPU core */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Representing the threads (siblings) of each logical CPU */ +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_sibling_map); + +/* Representing the core map of multi-core chips of each logical CPU */ +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_core_map); + +static DECLARE_COMPLETION(cpu_starting); +static DECLARE_COMPLETION(cpu_running); + +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + +/* representing cpus for which core maps can be computed */ +static cpumask_t cpu_core_setup_map; + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + + if (smp_num_siblings > 1) { + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); + } + } + } else + cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]); +} + +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpus_are_siblings(i, k)) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); +} + +const struct plat_smp_ops *mp_ops; +EXPORT_SYMBOL(mp_ops); + +void register_smp_ops(const struct plat_smp_ops *ops) +{ + if (mp_ops) + printk(KERN_WARNING "Overriding previously set SMP ops\n"); + + mp_ops = ops; +} + +/* + * First C code run on the secondary CPUs after being started up by + * the master. + */ +asmlinkage void start_secondary(void) +{ + unsigned int cpu; + + sync_counter(); + cpu = smp_processor_id(); + set_my_cpu_offset(per_cpu_offset(cpu)); + + cpu_probe(); + constant_clockevent_init(); + mp_ops->init_secondary(); + + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + + notify_cpu_starting(cpu); + + /* Notify boot CPU that we're starting */ + complete(&cpu_starting); + + /* The CPU is running, now mark it online */ + set_cpu_online(cpu, true); + + calculate_cpu_foreign_map(); + + /* + * Notify boot CPU that we're up & online and it can safely return + * from __cpu_up + */ + complete(&cpu_running); + + /* + * irq will be enabled in ->smp_finish(), enabling it too early + * is dangerous. + */ + WARN_ON_ONCE(!irqs_disabled()); + mp_ops->smp_finish(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + +static void stop_this_cpu(void *dummy) +{ + /* + * Remove this CPU: + */ + + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); + local_irq_disable(); + while (1); +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 0); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* called from main before smp_init() */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + init_new_context(current, &init_mm); + current_thread_info()->cpu = 0; + mp_ops->prepare_cpus(max_cpus); + set_cpu_sibling_map(0); + set_cpu_core_map(0); + calculate_cpu_foreign_map(); +#ifndef CONFIG_HOTPLUG_CPU + init_cpu_present(cpu_possible_mask); +#endif +} + +/* Preload SMP state for boot cpu */ +void smp_prepare_boot_cpu(void) +{ + unsigned int cpu; + + set_cpu_possible(0, true); + set_cpu_online(0, true); + set_my_cpu_offset(per_cpu_offset(0)); + + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, 0); +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int err; + + err = mp_ops->boot_secondary(cpu, tidle); + if (err) + return err; + + /* Wait for CPU to start and be ready to sync counters */ + if (!wait_for_completion_timeout(&cpu_starting, + msecs_to_jiffies(2000))) { + pr_crit("CPU%u: failed to start\n", cpu); + return -EIO; + } + + /* Wait for CPU to finish startup & mark itself online before return */ + wait_for_completion(&cpu_running); + return 0; +} + +/* Not really SMP stuff ... */ +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +static void flush_tlb_all_ipi(void *info) +{ + local_flush_tlb_all(); +} + +void flush_tlb_all(void) +{ + on_each_cpu(flush_tlb_all_ipi, NULL, 1); +} + +static void flush_tlb_mm_ipi(void *mm) +{ + local_flush_tlb_mm((struct mm_struct *)mm); +} + +/* + * Special Variant of smp_call_function for use by TLB functions: + * + * o No return value + * o collapses to normal function call on UP kernels + * o collapses to normal function call on systems with a single shared + * primary cache. + */ +static inline void smp_on_other_tlbs(void (*func) (void *info), void *info) +{ + smp_call_function(func, info, 1); +} + +static inline void smp_on_each_tlb(void (*func) (void *info), void *info) +{ + preempt_disable(); + + smp_on_other_tlbs(func, info); + func(info); + + preempt_enable(); +} + +/* + * The following tlb flush calls are invoked when old translations are + * being torn down, or pte attributes are changing. For single threaded + * address spaces, a new context is obtained on the current cpu, and tlb + * context on other cpus are invalidated to force a new context allocation + * at switch_mm time, should the mm ever be used on other cpus. For + * multithreaded address spaces, intercpu interrupts have to be sent. + * Another case where intercpu interrupts are required is when the target + * mm might be active on another cpu (eg debuggers doing the flushes on + * behalf of debugees, kswapd stealing pages from another process etc). + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + preempt_disable(); + + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { + on_each_cpu_mask(mm_cpumask(mm), flush_tlb_mm_ipi, mm, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, mm)) + cpu_context(cpu, mm) = 0; + } + local_flush_tlb_mm(mm); + } + + preempt_enable(); +} + +struct flush_tlb_data { + struct vm_area_struct *vma; + unsigned long addr1; + unsigned long addr2; +}; + +static void flush_tlb_range_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_range(fd->vma, fd->addr1, fd->addr2); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + preempt_disable(); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { + struct flush_tlb_data fd = { + .vma = vma, + .addr1 = start, + .addr2 = end, + }; + + on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, mm)) + cpu_context(cpu, mm) = 0; + } + local_flush_tlb_range(vma, start, end); + } + preempt_enable(); +} + +static void flush_tlb_kernel_range_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_kernel_range(fd->addr1, fd->addr2); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct flush_tlb_data fd = { + .addr1 = start, + .addr2 = end, + }; + + on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1); +} + +static void flush_tlb_page_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_page(fd->vma, fd->addr1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + preempt_disable(); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { + struct flush_tlb_data fd = { + .vma = vma, + .addr1 = page, + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), flush_tlb_page_ipi, &fd, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm)) + cpu_context(cpu, vma->vm_mm) = 0; + } + local_flush_tlb_page(vma, page); + } + preempt_enable(); +} + +static void flush_tlb_one_ipi(void *info) +{ + unsigned long vaddr = (unsigned long) info; + + local_flush_tlb_one(vaddr); +} + +void flush_tlb_one(unsigned long vaddr) +{ + smp_on_each_tlb(flush_tlb_one_ipi, (void *) vaddr); +} + +EXPORT_SYMBOL(flush_tlb_page); +EXPORT_SYMBOL(flush_tlb_one); diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index 3b2cbb95875b..ab1a75c0b5a6 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -1,13 +1,52 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include +#include +#include #include -static struct cpu cpu_device; +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +#ifdef CONFIG_HOTPLUG_CPU +int arch_register_cpu(int cpu) +{ + int ret; + struct cpu *c = &per_cpu(cpu_devices, cpu); + + c->hotpluggable = 1; + ret = register_cpu(c, cpu); + if (ret < 0) + pr_warn("register_cpu %d failed (%d)\n", cpu, ret); + + return ret; +} +EXPORT_SYMBOL(arch_register_cpu); + +void arch_unregister_cpu(int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + + c->hotpluggable = 0; + unregister_cpu(c); +} +EXPORT_SYMBOL(arch_unregister_cpu); +#endif static int __init topology_init(void) { - return register_cpu(&cpu_device, 0); + int i, ret; + + for_each_present_cpu(i) { + struct cpu *c = &per_cpu(cpu_devices, i); + + c->hotpluggable = !!i; + ret = register_cpu(c, i); + if (ret < 0) + pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret); + } + + return 0; } subsys_initcall(topology_init); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 3ae212100de5..9a919f512d06 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -70,6 +70,9 @@ SECTIONS .exit.data : { EXIT_DATA } +#ifdef CONFIG_SMP + PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT) +#endif /* * Align to 64K in attempt to eliminate holes before the diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile index d89d3c8fe382..134ebebaf5fb 100644 --- a/arch/loongarch/loongson64/Makefile +++ b/arch/loongarch/loongson64/Makefile @@ -3,3 +3,5 @@ # obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o + +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 5f65558425e0..5ecbb2a75caa 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -130,4 +131,6 @@ void __init platform_init(void) pr_info("The BIOS Version: %s\n", b_info.bios_version); efi_runtime_init(); + + register_smp_ops(&loongson3_smp_ops); } diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c index 7ba15255e5d6..be01ce1e9b32 100644 --- a/arch/loongarch/loongson64/irq.c +++ b/arch/loongarch/loongson64/irq.c @@ -79,10 +79,20 @@ void __init setup_IRQ(void) void __init arch_init_irq(void) { + int r, ipi_irq; + static int ipi_dummy_dev; + clear_csr_ecfg(ECFG0_IM); clear_csr_estat(ESTATF_IP); setup_IRQ(); +#ifdef CONFIG_SMP + ipi_irq = get_ipi_irq(); + irq_set_percpu_devid(ipi_irq); + r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + if (r < 0) + panic("IPI IRQ request failed\n"); +#endif set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); } diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c new file mode 100644 index 000000000000..44218d69f872 --- /dev/null +++ b/arch/loongarch/loongson64/smp.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct secondary_data cpuboot_data; + +static DEFINE_PER_CPU(int, cpu_state); +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +#define MAX_CPUS 64 + +#define STATUS 0x00 +#define EN 0x04 +#define SET 0x08 +#define CLEAR 0x0c +#define MBUF 0x20 + +extern unsigned long long smp_group[MAX_PACKAGES]; +static u32 core_offsets[4] = {0x000, 0x100, 0x200, 0x300}; + +static volatile void *ipi_set_regs[MAX_CPUS]; +static volatile void *ipi_clear_regs[MAX_CPUS]; +static volatile void *ipi_status_regs[MAX_CPUS]; +static volatile void *ipi_en_regs[MAX_CPUS]; +static volatile void *ipi_mailbox_buf[MAX_CPUS]; + +static u32 (*ipi_read_clear)(int cpu); +static void (*ipi_write_action)(int cpu, u32 action); + +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNCTION, +}; + +static const char *ipi_types[NR_IPI] __tracepoint_string = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNCTION] = "Call Function interrupts", +}; + +void show_ipi_list(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]); + seq_printf(p, " LoongArch %d %s\n", i + 1, ipi_types[i]); + } +} + +/* Send mail buffer via Mail_Send */ +static void csr_mail_send(uint64_t data, int cpu, int mailbox) +{ + uint64_t val; + + /* Send high 32 bits */ + val = IOCSR_MBUF_SEND_BLOCKING; + val |= (IOCSR_MBUF_SEND_BOX_HI(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT); + val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT); + val |= (data & IOCSR_MBUF_SEND_H32_MASK); + iocsr_writeq(val, LOONGARCH_IOCSR_MBUF_SEND); + + /* Send low 32 bits */ + val = IOCSR_MBUF_SEND_BLOCKING; + val |= (IOCSR_MBUF_SEND_BOX_LO(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT); + val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT); + val |= (data << IOCSR_MBUF_SEND_BUF_SHIFT); + iocsr_writeq(val, LOONGARCH_IOCSR_MBUF_SEND); +}; + +static u32 csr_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = iocsr_readl(LOONGARCH_IOCSR_IPI_STATUS); + /* Clear the ipi register to clear the interrupt */ + iocsr_writel(action, LOONGARCH_IOCSR_IPI_CLEAR); + + return action; +} + +static void csr_ipi_write_action(int cpu, u32 action) +{ + unsigned int irq = 0; + + while ((irq = ffs(action))) { + uint32_t val = IOCSR_IPI_SEND_BLOCKING; + val |= (irq - 1); + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); + iocsr_writel(val, LOONGARCH_IOCSR_IPI_SEND); + action &= ~BIT(irq - 1); + } +} + +static u32 legacy_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = xconf_readl(ipi_status_regs[cpu]); + /* Clear the ipi register to clear the interrupt */ + xconf_writel(action, ipi_clear_regs[cpu]); + + return action; +} + +static void legacy_ipi_write_action(int cpu, u32 action) +{ + xconf_writel((u32)action, ipi_set_regs[cpu]); +} + +static void ipi_method_init(void) +{ + if (cpu_has_csripi) { + ipi_read_clear = csr_ipi_read_clear; + ipi_write_action = csr_ipi_write_action; + } else { + ipi_read_clear = legacy_ipi_read_clear; + ipi_write_action = legacy_ipi_write_action; + } +} + +static void ipi_regaddrs_init(void) +{ + int i, node, core; + + for (i = 0; i < MAX_CPUS; i++) { + node = i / 4; + core = i % 4; + ipi_set_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + SET); + ipi_clear_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + CLEAR); + ipi_status_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + STATUS); + ipi_en_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + EN); + ipi_mailbox_buf[i] = (void *) + (smp_group[node] + core_offsets[core] + MBUF); + } +} + +/* + * Simple enough, just poke the appropriate ipi register + */ +static void loongson3_send_ipi_single(int cpu, unsigned int action) +{ + ipi_write_action(cpu_logical_map(cpu), (u32)action); +} + +static void +loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned int i; + + for_each_cpu(i, mask) + ipi_write_action(cpu_logical_map(i), (u32)action); +} + +irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +{ + unsigned int action; + unsigned int cpu = smp_processor_id(); + + action = ipi_read_clear(cpu_logical_map(cpu)); + + smp_mb(); + + if (action & SMP_RESCHEDULE) { + scheduler_ipi(); + per_cpu(irq_stat, cpu).ipi_irqs[IPI_RESCHEDULE]++; + } + + if (action & SMP_CALL_FUNCTION) { + generic_smp_call_function_interrupt(); + per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++; + } + + return IRQ_HANDLED; +} + +/* + * SMP init and finish on secondary CPUs + */ +static void loongson3_init_secondary(void) +{ + unsigned int cpu = smp_processor_id(); + unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | + ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; + + change_csr_ecfg(ECFG0_IM, imask); + + if (cpu_has_csripi) + iocsr_writel(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(cpu)]); + + per_cpu(cpu_state, cpu) = CPU_ONLINE; + cpu_set_core(&cpu_data[cpu], + cpu_logical_map(cpu) % loongson_sysconf.cores_per_package); + cpu_set_cluster(&cpu_data[cpu], + cpu_logical_map(cpu) / loongson_sysconf.cores_per_package); + cpu_data[cpu].package = + cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; +} + +static void loongson3_smp_finish(void) +{ + int cpu = smp_processor_id(); + + local_irq_enable(); + + if (cpu_has_csripi) + iocsr_writeq(0, LOONGARCH_IOCSR_MBUF0); + else + xconf_writeq(0, ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0); + + pr_info("CPU#%d finished\n", smp_processor_id()); +} + +static void __init loongson3_smp_setup(void) +{ + ipi_method_init(); + ipi_regaddrs_init(); + + if (cpu_has_csripi) + iocsr_writel(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(0)]); + + pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); + + cpu_set_core(&cpu_data[0], + cpu_logical_map(0) % loongson_sysconf.cores_per_package); + cpu_set_cluster(&cpu_data[0], + cpu_logical_map(0) / loongson_sysconf.cores_per_package); + cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; +} + +static void __init loongson3_prepare_cpus(unsigned int max_cpus) +{ + int i = 0; + + for (i = 0; i < loongson_sysconf.nr_cpus; i++) { + set_cpu_present(i, true); + + if (cpu_has_csripi) + csr_mail_send(0, __cpu_logical_map[i], 0); + else + xconf_writeq(0, ipi_mailbox_buf[__cpu_logical_map[i]]+0x0); + } + + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; +} + +/* + * Setup the PC, SP, and TP of a secondary processor and start it runing! + */ +static int loongson3_boot_secondary(int cpu, struct task_struct *idle) +{ + unsigned long entry; + + pr_info("Booting CPU#%d...\n", cpu); + + entry = __pa_symbol(&smpboot_entry); + + if (loongson_sysconf.bpi_ver < BPI_VERSION_V3) + entry = TO_CAC(entry); + + cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle); + cpuboot_data.thread_info = (unsigned long)task_thread_info(idle); + + if (cpu_has_csripi) + csr_mail_send(entry, cpu_logical_map(cpu), 0); + else + xconf_writeq(entry, ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0); + + loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static bool io_master(int cpu) +{ + int i, node, master; + + if (cpu == 0) + return true; + + for (i = 1; i < loongson_sysconf.nr_io_pics; i++) { + node = eiointc_get_node(i); + master = cpu_number_map(node * CORES_PER_EIO_NODE); + if (cpu == master) + return true; + } + + return false; +} + +static int loongson3_cpu_disable(void) +{ + unsigned long flags; + unsigned int cpu = smp_processor_id(); + + if (io_master(cpu)) + return -EBUSY; + + set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); + local_irq_save(flags); + irq_migrate_all_off_this_cpu(); + clear_csr_ecfg(ECFG0_IM); + local_irq_restore(flags); + local_flush_tlb_all(); + + return 0; +} + + +static void loongson3_cpu_die(unsigned int cpu) +{ + while (per_cpu(cpu_state, cpu) != CPU_DEAD) + cpu_relax(); + + mb(); +} + +/* To shutdown a core in Loongson 3, the target core should go to XKPRANGE + * and flush all L1 entries at first. Then, another core (usually Core 0) + * can safely disable the clock of the target core. loongson3_play_dead() + * is called via XKPRANGE (uncached and unmmaped) */ +static void loongson3_play_dead(int *state_addr) +{ + register int val; + register long cpuid, core, node, count; + register void *addr, *base, *initfunc; + + __asm__ __volatile__( + " li.d %[addr], 0x8000000000000000\n" + "1: cacop 0x8, %[addr], 0 \n" /* flush L1 ICache */ + " cacop 0x8, %[addr], 1 \n" + " cacop 0x8, %[addr], 2 \n" + " cacop 0x8, %[addr], 3 \n" + " cacop 0x9, %[addr], 0 \n" /* flush L1 DCache */ + " cacop 0x9, %[addr], 1 \n" + " cacop 0x9, %[addr], 2 \n" + " cacop 0x9, %[addr], 3 \n" + " addi.w %[sets], %[sets], -1 \n" + " addi.d %[addr], %[addr], 0x40 \n" + " bnez %[sets], 1b \n" + " li.d %[addr], 0x8000000000000000\n" + "2: cacop 0xa, %[addr], 0 \n" /* flush L1 VCache */ + " cacop 0xa, %[addr], 1 \n" + " cacop 0xa, %[addr], 2 \n" + " cacop 0xa, %[addr], 3 \n" + " cacop 0xa, %[addr], 4 \n" + " cacop 0xa, %[addr], 5 \n" + " cacop 0xa, %[addr], 6 \n" + " cacop 0xa, %[addr], 7 \n" + " cacop 0xa, %[addr], 8 \n" + " cacop 0xa, %[addr], 9 \n" + " cacop 0xa, %[addr], 10 \n" + " cacop 0xa, %[addr], 11 \n" + " cacop 0xa, %[addr], 12 \n" + " cacop 0xa, %[addr], 13 \n" + " cacop 0xa, %[addr], 14 \n" + " cacop 0xa, %[addr], 15 \n" + " addi.w %[vsets], %[vsets], -1 \n" + " addi.d %[addr], %[addr], 0x40 \n" + " bnez %[vsets], 2b \n" + " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ + " st.w %[val], %[state_addr], 0 \n" + " dbar 0 \n" + " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */ + : [addr] "=&r" (addr), [val] "=&r" (val) + : [state_addr] "r" (state_addr), + [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), + [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); + + local_irq_enable(); + change_csr_ecfg(ECFG0_IM, ECFGF_IPI); + + __asm__ __volatile__( + " idle 0 \n" + " csrrd %[cpuid], 0x20 \n" + " andi %[cpuid], %[cpuid], 0x1ff \n" + " li.d %[base], 0x800000001fe01000 \n" + " andi %[core], %[cpuid], 0x3 \n" + " slli.w %[core], %[core], 8 \n" /* get core id */ + " or %[base], %[base], %[core] \n" + " andi %[node], %[cpuid], 0x3c \n" + " slli.d %[node], %[node], 42 \n" /* get node id */ + " or %[base], %[base], %[node] \n" + " ld.d %[initfunc], %[base], 0x20 \n" /* Get init PC via */ + " jirl $zero, %[initfunc], 0 \n" /* jump to initial PC */ + " nop \n" + : [core] "=&r" (core), [node] "=&r" (node), + [base] "=&r" (base), [cpuid] "=&r" (cpuid), + [count] "=&r" (count), [initfunc] "=&r" (initfunc) + : /* No Input */ + : "a1"); + + unreachable(); +} + +void play_dead(void) +{ + int *state_addr; + unsigned int cpu = smp_processor_id(); + void (*play_dead_uncached)(int *); + + idle_task_exit(); + play_dead_uncached = (void *)TO_UNCAC(__pa((unsigned long)loongson3_play_dead)); + state_addr = &per_cpu(cpu_state, cpu); + mb(); + play_dead_uncached(state_addr); +} + +static int loongson3_disable_clock(unsigned int cpu) +{ + uint64_t core_id = cpu_core(&cpu_data[cpu]); + uint64_t package_id = cpu_data[cpu].package; + + LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3)); + + return 0; +} + +static int loongson3_enable_clock(unsigned int cpu) +{ + uint64_t core_id = cpu_core(&cpu_data[cpu]); + uint64_t package_id = cpu_data[cpu].package; + + LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3); + + return 0; +} + +static int register_loongson3_notifier(void) +{ + return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE, + "loongarch/loongson:prepare", + loongson3_enable_clock, + loongson3_disable_clock); +} +early_initcall(register_loongson3_notifier); + +#endif + +const struct plat_smp_ops loongson3_smp_ops = { + .send_ipi_single = loongson3_send_ipi_single, + .send_ipi_mask = loongson3_send_ipi_mask, + .smp_setup = loongson3_smp_setup, + .prepare_cpus = loongson3_prepare_cpus, + .boot_secondary = loongson3_boot_secondary, + .init_secondary = loongson3_init_secondary, + .smp_finish = loongson3_smp_finish, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = loongson3_cpu_disable, + .cpu_die = loongson3_cpu_die, +#endif +}; + +/* + * Power management + */ +#ifdef CONFIG_PM + +static int loongson3_ipi_suspend(void) +{ + return 0; +} + +static void loongson3_ipi_resume(void) +{ + if (cpu_has_csripi) + iocsr_writel(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(0)]); +} + +static struct syscore_ops loongson3_ipi_syscore_ops = { + .resume = loongson3_ipi_resume, + .suspend = loongson3_ipi_suspend, +}; + +/* + * Enable boot cpu ipi before enabling nonboot cpus + * during syscore_resume. + * */ +static int __init ipi_pm_init(void) +{ + register_syscore_ops(&loongson3_ipi_syscore_ops); + return 0; +} + +core_initcall(ipi_pm_init); +#endif diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 0336ba72c2d1..c35f26d34f7b 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -92,7 +92,14 @@ vmalloc_done_load: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_load: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_PRESENT_SHIFT @@ -100,7 +107,12 @@ vmalloc_done_load: beq ra, $r0, nopage_tlb_load ori t0, t0, _PAGE_VALID +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_load +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 ld.d t0, t1, 0 @@ -124,14 +136,24 @@ vmalloc_load: * spots a huge page. */ tlb_huge_update_load: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_PRESENT_SHIFT andi ra, ra, 1 beq ra, $r0, nopage_tlb_load tlbsrch ori t0, t0, _PAGE_VALID +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_load + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX @@ -177,6 +199,7 @@ tlb_huge_update_load: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_load: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_0 jirl $r0, t0, 0 @@ -233,7 +256,14 @@ vmalloc_done_store: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_store: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_PRESENT_SHIFT @@ -242,7 +272,12 @@ vmalloc_done_store: bne ra, $r0, nopage_tlb_store ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_store +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 @@ -267,7 +302,11 @@ vmalloc_store: * spots a huge page. */ tlb_huge_update_store: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_PRESENT_SHIFT andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) @@ -276,7 +315,13 @@ tlb_huge_update_store: tlbsrch ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_store + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX @@ -322,6 +367,7 @@ tlb_huge_update_store: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_store: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jirl $r0, t0, 0 @@ -377,7 +423,14 @@ vmalloc_done_modify: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_modify: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_WRITE_SHIFT @@ -385,7 +438,12 @@ vmalloc_done_modify: beq ra, $r0, nopage_tlb_modify ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_modify +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 ld.d t0, t1, 0 @@ -409,7 +467,11 @@ vmalloc_modify: * build_tlbchange_handler_head spots a huge page. */ tlb_huge_update_modify: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_WRITE_SHIFT andi ra, ra, 1 @@ -418,7 +480,13 @@ tlb_huge_update_modify: tlbsrch ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_modify + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -458,6 +526,7 @@ tlb_huge_update_modify: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_modify: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jirl $r0, t0, 0 diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d71e1c144aad..fbe96b098999 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -91,6 +91,7 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, + CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, -- Gitee From 78c074c74418591e3e84107b736ea5967583ab8b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 24/59] LoongArch: Add basic NUMA support Change-Id: Id0d178b820de46a05cc608c630c2d4b31a992218 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 43 ++ arch/loongarch/include/asm/bootinfo.h | 1 + .../include/asm/mach-loongson64/topology.h | 26 + arch/loongarch/include/asm/mmzone.h | 18 + arch/loongarch/include/asm/numa.h | 71 +++ arch/loongarch/include/asm/pgtable.h | 12 + arch/loongarch/include/asm/topology.h | 1 + arch/loongarch/kernel/acpi.c | 97 ++++ arch/loongarch/kernel/module.c | 1 + arch/loongarch/kernel/setup.c | 2 +- arch/loongarch/kernel/smp.c | 29 +- arch/loongarch/kernel/topology.c | 5 + arch/loongarch/loongson64/Makefile | 4 +- arch/loongarch/loongson64/dma.c | 46 ++ arch/loongarch/loongson64/init.c | 4 + arch/loongarch/loongson64/irq.c | 1 + arch/loongarch/loongson64/numa.c | 472 ++++++++++++++++++ arch/loongarch/loongson64/smp.c | 6 + arch/loongarch/mm/init.c | 30 ++ arch/loongarch/pci/acpi.c | 3 + 20 files changed, 867 insertions(+), 5 deletions(-) create mode 100644 arch/loongarch/include/asm/mach-loongson64/topology.h create mode 100644 arch/loongarch/include/asm/mmzone.h create mode 100644 arch/loongarch/include/asm/numa.h create mode 100644 arch/loongarch/loongson64/dma.c create mode 100644 arch/loongarch/loongson64/numa.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2016b0ed06e8..d001fe6e8476 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -35,6 +35,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_SUPPORTS_ACPI + select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_QUEUED_RWLOCKS @@ -120,6 +121,7 @@ config MACH_LOONGSON64 select SYS_HAS_CPU_LOONGSON64 select SYS_SUPPORTS_SMP select SYS_SUPPORTS_HOTPLUG_CPU + select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL select ZONE_DMA32 help @@ -165,6 +167,7 @@ choice config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 + select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL select GPIOLIB select SWIOTLB @@ -331,6 +334,7 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_FLATMEM_ENABLE def_bool y + depends on !NUMA config ARCH_SPARSEMEM_ENABLE def_bool y @@ -340,6 +344,41 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. +config NUMA + bool "NUMA Support" + depends on SYS_SUPPORTS_NUMA + select ACPI_NUMA if ACPI + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option improves performance on systems with more + than two nodes; on two node systems it is generally better to + leave it disabled; on single node systems disable this option + disabled. + +config SYS_SUPPORTS_NUMA + bool + +config NODES_SHIFT + int + default "6" + depends on NEED_MULTIPLE_NODES + +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + depends on NUMA + config DMI bool "Enable DMI scanning" select DMI_SCAN_MACHINE_NON_EFI_FALLBACK @@ -474,6 +513,10 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG +config HAVE_ARCH_NODEDATA_EXTENSION + def_bool y + depends on NUMA && ARCH_ENABLE_MEMORY_HOTPLUG + config LOCKDEP_SUPPORT bool default y diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 669d45d958be..90e35850cb26 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -19,6 +19,7 @@ extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_ad extern void early_init(void); extern void platform_init(void); +extern void plat_swiotlb_setup(void); /* * Initial kernel command line, usually setup by fw_init_cmdline() diff --git a/arch/loongarch/include/asm/mach-loongson64/topology.h b/arch/loongarch/include/asm/mach-loongson64/topology.h new file mode 100644 index 000000000000..2bfad6a6ce30 --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/topology.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_MACH_TOPOLOGY_H +#define _ASM_MACH_TOPOLOGY_H + +#ifdef CONFIG_NUMA + +extern cpumask_t cpus_on_node[]; + +#define cpumask_of_node(node) (&cpus_on_node[node]) + +struct pci_bus; +extern int pcibus_to_node(struct pci_bus *); + +#define cpumask_of_pcibus(bus) (cpu_online_mask) + +extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; + +void numa_set_distance(int from, int to, int distance); + +#define node_distance(from, to) (__node_distances[(from)][(to)]) + +#else +#define pcibus_to_node(bus) 0 +#endif + +#endif /* _ASM_MACH_TOPOLOGY_H */ diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h new file mode 100644 index 000000000000..db106f92dd9f --- /dev/null +++ b/arch/loongarch/include/asm/mmzone.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Written by Huacai Chen (chenhuacai@loongson.cn) + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#include +#include + +extern struct pglist_data *node_data[]; + +#define NODE_DATA(nid) (node_data[(nid)]) + +extern void setup_zero_pages(void); + +#endif /* _ASM_MMZONE_H_ */ diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h new file mode 100644 index 000000000000..a6cadb24846b --- /dev/null +++ b/arch/loongarch/include/asm/numa.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch specific ACPICA environments and implementation + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: lvjianmin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_NUMA_H +#define _ASM_LOONGARCH_NUMA_H + +#include + +#define NODE_ADDRSPACE_SHIFT 44 + +#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT) +#define nid_to_addrbase(nid) (_ULCAST_(nid) << NODE_ADDRSPACE_SHIFT) + +#ifdef CONFIG_NUMA + +extern int numa_off; +extern s16 __cpuid_to_node[CONFIG_NR_CPUS]; +extern nodemask_t numa_nodes_parsed __initdata; + +struct numa_memblk { + u64 start; + u64 end; + int nid; +}; + +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) +struct numa_meminfo { + int nr_blks; + struct numa_memblk blk[NR_NODE_MEMBLKS]; +}; + +extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); + +extern void __init early_numa_add_cpu(int cpuid, s16 node); +extern void numa_add_cpu(unsigned int cpu); +extern void numa_remove_cpu(unsigned int cpu); + +static inline void numa_clear_node(int cpu) +{ +} + +static inline void set_cpuid_to_node(int cpuid, s16 node) +{ + __cpuid_to_node[cpuid] = node; +} + +extern int early_cpu_to_node(int cpu); + +#else + +static inline void early_numa_add_cpu(int cpuid, s16 node) { } +static inline void numa_add_cpu(unsigned int cpu) { } +static inline void numa_remove_cpu(unsigned int cpu) { } + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_LOONGARCH_NUMA_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index eb054b7ffd51..27c7a775e9ca 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,6 +349,18 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef CONFIG_NUMA_BALANCING +static inline long pte_protnone(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PROTNONE); +} + +static inline long pmd_protnone(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_PROTNONE); +} +#endif /* CONFIG_NUMA_BALANCING */ + /* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index b5eb43f57e25..3e3c8c3b1990 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -5,6 +5,7 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H +#include #include #ifdef CONFIG_SMP diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 04b28a10498a..18b9153cfa6f 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define MSI_MSG_ADDRESS 0x2FF00000 @@ -558,6 +559,81 @@ int __init acpi_boot_init(void) return 0; } +#ifdef CONFIG_ACPI_NUMA + +static __init int setup_node(int pxm) +{ + return acpi_map_pxm_to_node(pxm); +} + +/* + * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for + * I/O localities since SRAT does not list them. I/O localities are + * not supported at this point. + */ +extern unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; +unsigned int numa_distance_cnt; + +static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) +{ + return slit->locality_count; +} + +void __init numa_set_distance(int from, int to, int distance) +{ + if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + __node_distances[from][to] = distance; +} + +/* Callback for Proximity Domain -> CPUID mapping */ +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { + bad_srat(); + return; + } + if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + return; + pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) { + pxm |= (pa->proximity_domain_hi[0] << 8); + pxm |= (pa->proximity_domain_hi[1] << 16); + pxm |= (pa->proximity_domain_hi[2] << 24); + } + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + if (pa->apic_id >= CONFIG_NR_CPUS) { + printk(KERN_INFO "SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n", + pxm, pa->apic_id, node); + return; + } + + early_numa_add_cpu(pa->apic_id, node); + + set_cpuid_to_node(pa->apic_id, node); + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", + pxm, pa->apic_id, node); +} + +void __init acpi_numa_arch_fixup(void) {} +#endif + void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_reserve(addr, size); @@ -567,6 +643,22 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) #include +static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +{ +#ifdef CONFIG_ACPI_NUMA + int nid; + + nid = acpi_get_node(handle); + if (nid != NUMA_NO_NODE) { + set_cpuid_to_node(physid, nid); + node_set(nid, numa_nodes_parsed); + set_cpu_numa_node(cpu, nid); + cpumask_set_cpu(cpu, cpumask_of_node(nid)); + } +#endif + return 0; +} + int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) { int cpu; @@ -577,6 +669,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu return cpu; } + acpi_map_cpu2node(handle, cpu, physid); + *pcpu = cpu; return 0; @@ -585,6 +679,9 @@ EXPORT_SYMBOL(acpi_map_cpu); int acpi_unmap_cpu(int cpu) { +#ifdef CONFIG_ACPI_NUMA + set_cpuid_to_node(cpu_logical_map(cpu), NUMA_NO_NODE); +#endif set_cpu_present(cpu, false); num_processors--; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index daef311069c9..f7bd627ec629 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index e53844bc0e33..5f27b1fe88a0 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -292,7 +292,7 @@ static void __init arch_mem_init(char **cmdline_p) sparse_init(); memblock_set_bottom_up(true); - swiotlb_init(1); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index b4746805334d..59d2736888c2 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -211,14 +212,36 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { - unsigned int cpu; + unsigned int cpu, node, rr_node; set_cpu_possible(0, true); set_cpu_online(0, true); set_my_cpu_offset(per_cpu_offset(0)); - for_each_possible_cpu(cpu) - set_cpu_numa_node(cpu, 0); + rr_node = first_node(node_online_map); + for_each_possible_cpu(cpu) { + node = early_cpu_to_node(cpu); + + /* + * The mapping between present cpus and nodes has been + * built during MADT and SRAT parsing. + * + * If possible cpus = present cpus here, early_cpu_to_node + * will return valid node. + * + * If possible cpus > present cpus here (e.g. some possible + * cpus will be added by cpu-hotplug later), for possible but + * not present cpus, early_cpu_to_node will return NUMA_NO_NODE, + * and we just map them to online nodes in round-robin way. + * Once hotplugged, new correct mapping will be built for them. + * */ + if (node != NUMA_NO_NODE) + set_cpu_numa_node(cpu, node); + else { + set_cpu_numa_node(cpu, rr_node); + rr_node = next_node_in(rr_node, node_online_map); + } + } } int __cpu_up(unsigned int cpu, struct task_struct *tidle) diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index ab1a75c0b5a6..ae5f0e5414a3 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -37,6 +37,11 @@ static int __init topology_init(void) { int i, ret; +#ifdef CONFIG_NUMA + for_each_online_node(i) + register_one_node(i); +#endif /* CONFIG_NUMA */ + for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile index 134ebebaf5fb..3e54460d8d00 100644 --- a/arch/loongarch/loongson64/Makefile +++ b/arch/loongarch/loongson64/Makefile @@ -2,6 +2,8 @@ # All Loongson based systems # -obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o +obj-y += setup.o init.o env.o reset.o irq.o mem.o dma.o rtc.o obj-$(CONFIG_SMP) += smp.o + +obj-$(CONFIG_NUMA) += numa.o diff --git a/arch/loongarch/loongson64/dma.c b/arch/loongarch/loongson64/dma.c new file mode 100644 index 000000000000..1743f3088de2 --- /dev/null +++ b/arch/loongarch/loongson64/dma.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int node_id_offset; + +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + /* We extract 4bit node id (bit 44~47) from Loongson-3's + * 48bit address space and embed it into 40bit */ + long nid = (paddr >> 44) & 0xf; + return ((nid << 44) ^ paddr) | (nid << node_id_offset); +} + +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + /* We extract 4bit node id (bit 44~47) from Loongson-3's + * 48bit address space and embed it into 40bit */ + long nid = (daddr >> node_id_offset) & 0xf; + return ((nid << node_id_offset) ^ daddr) | (nid << 44); +} + +void __init plat_swiotlb_setup(void) +{ + swiotlb_init(1); + node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF) + 36; +} diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 5ecbb2a75caa..39b4162ccc07 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -125,7 +125,11 @@ void __init platform_init(void) acpi_boot_init(); #endif +#ifndef CONFIG_NUMA fw_init_memory(); +#else + fw_init_numa_memory(); +#endif dmi_setup(); smbios_parse(); pr_info("The BIOS Version: %s\n", b_info.bios_version); diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c index be01ce1e9b32..468c80f0d1d4 100644 --- a/arch/loongarch/loongson64/irq.c +++ b/arch/loongarch/loongson64/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c new file mode 100644 index 000000000000..c3be1af68661 --- /dev/null +++ b/arch/loongarch/loongson64/numa.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Xiang Gao + * Huacai Chen + * + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int numa_off; +struct pglist_data *node_data[MAX_NUMNODES]; +unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; + +EXPORT_SYMBOL(node_data); +EXPORT_SYMBOL(__node_distances); + +static struct numa_meminfo numa_meminfo; +cpumask_t cpus_on_node[MAX_NUMNODES]; +cpumask_t phys_cpus_on_node[MAX_NUMNODES]; +EXPORT_SYMBOL(cpus_on_node); + +/* + * apicid, cpu, node mappings + */ +s16 __cpuid_to_node[CONFIG_NR_CPUS] = { + [0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE +}; +EXPORT_SYMBOL(__cpuid_to_node); + +nodemask_t numa_nodes_parsed __initdata; + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, early_cpu_to_node(cpu)); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud; + pmd_t *pmd; + + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); +#endif + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); +#endif + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc = -EINVAL; + + if (pcpu_chosen_fc == PCPU_FC_AUTO) { + if (nr_node_ids >= 8) + pcpu_chosen_fc = PCPU_FC_PAGE; + else + pcpu_chosen_fc = PCPU_FC_EMBED; + } + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PMD_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + pr_warn("%s allocator failed (%d), falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + if (rc < 0) + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpu_populate_pte); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + +/* + * Get nodeid by logical cpu number. + * __cpuid_to_node maps phyical cpu id to node, so we + * should use cpu_logical_map(cpu) to index it. + * + * This routine is only used in early phase during + * booting, after setup_per_cpu_areas calling and numa_node + * initialization, cpu_to_node will be used instead. + * */ +int early_cpu_to_node(int cpu) +{ + int physid = cpu_logical_map(cpu); + + if (physid < 0) + return NUMA_NO_NODE; + + return __cpuid_to_node[physid]; +} + +void __init early_numa_add_cpu(int cpuid, s16 node) +{ + int cpu = __cpu_number_map[cpuid]; + + if (cpu < 0) + return; + + cpumask_set_cpu(cpu, &cpus_on_node[node]); + cpumask_set_cpu(cpuid, &phys_cpus_on_node[node]); +} + +void numa_add_cpu(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + cpumask_set_cpu(cpu, &cpus_on_node[nid]); +} + +void numa_remove_cpu(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + cpumask_clear_cpu(cpu, &cpus_on_node[nid]); +} + +static int __init numa_add_memblk_to(int nid, u64 start, u64 end, + struct numa_meminfo *mi) +{ + /* ignore zero length blks */ + if (start == end) + return 0; + + /* whine about and ignore invalid blks */ + if (start > end || nid < 0 || nid >= MAX_NUMNODES) { + pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); + return 0; + } + + if (mi->nr_blks >= NR_NODE_MEMBLKS) { + pr_err("NUMA: too many memblk ranges\n"); + return -EINVAL; + } + + mi->blk[mi->nr_blks].start = PFN_ALIGN(start); + mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1); + mi->blk[mi->nr_blks].nid = nid; + mi->nr_blks++; + return 0; +} + +/** + * numa_add_memblk - Add one numa_memblk to numa_meminfo + * @nid: NUMA node ID of the new memblk + * @start: Start address of the new memblk + * @end: End address of the new memblk + * + * Add a new memblk to the default numa_meminfo. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_add_memblk(int nid, u64 start, u64 end) +{ + return numa_add_memblk_to(nid, start, end, &numa_meminfo); +} + +static void __init alloc_node_data(int nid) +{ + void *nd; + unsigned long nd_pa; + size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE); + + nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid); + if (!nd_pa) { + pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid); + return; + } + + nd = __va(nd_pa); + + node_data[nid] = nd; + memset(nd, 0, sizeof(pg_data_t)); +} + +static void __init node_mem_init(unsigned int node) +{ + unsigned long start_pfn, end_pfn; + unsigned long node_addrspace_offset; + + node_addrspace_offset = nid_to_addrbase(node); + pr_info("Node%d's addrspace_offset is 0x%lx\n", + node, node_addrspace_offset); + + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n", + node, start_pfn, end_pfn); + + alloc_node_data(node); +} + +#ifdef CONFIG_ACPI_NUMA + +/* + * Sanity check to catch more bad NUMA configurations (they are amazingly + * common). Make sure the nodes cover all memory. + */ +static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) +{ + u64 numaram, biosram; + int i; + + numaram = 0; + for (i = 0; i < mi->nr_blks; i++) { + u64 s = mi->blk[i].start >> PAGE_SHIFT; + u64 e = mi->blk[i].end >> PAGE_SHIFT; + numaram += e - s; + numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); + if ((s64)numaram < 0) + numaram = 0; + } + max_pfn = max_low_pfn; + biosram = max_pfn - absent_pages_in_range(0, max_pfn); + + BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT))); + return true; +} + +static void __init add_node_intersection(u32 node, u64 start, u64 size) +{ + static unsigned long num_physpages; + + num_physpages += (size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + node, ADDRESS_TYPE_SYSRAM, start, size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages); + memblock_set_node(start, size, &memblock.memory, node); +} + +/* + * add_numamem_region + * + * Add a uasable memory region described by BIOS. The + * routine gets each intersection between BIOS's region + * and node's region, and adds them into node's memblock + * pool. + * + */ +static void __init add_numamem_region(u64 start, u64 end) +{ + u32 i; + u64 ofs = start; + + if (start >= end) { + pr_debug("Invalid region: %016llx-%016llx\n", start, end); + return; + } + + for (i = 0; i < numa_meminfo.nr_blks; i++) { + struct numa_memblk *mb = &numa_meminfo.blk[i]; + + if (ofs > mb->end) + continue; + + if (end > mb->end) { + add_node_intersection(mb->nid, ofs, mb->end - ofs); + ofs = mb->end; + } else { + add_node_intersection(mb->nid, ofs, end - ofs); + break; + } + } +} + +static void __init init_node_memblock(void) +{ + u32 i, mem_type; + u64 mem_end, mem_start, mem_size; + + /* Parse memory information and activate */ + for (i = 0; i < loongson_mem_map->map_count; i++) { + mem_type = loongson_mem_map->map[i].mem_type; + mem_start = loongson_mem_map->map[i].mem_start; + mem_size = loongson_mem_map->map[i].mem_size; + mem_end = loongson_mem_map->map[i].mem_start + mem_size; + switch (mem_type) { + case ADDRESS_TYPE_SYSRAM: + mem_start = PFN_ALIGN(mem_start); + mem_end = PFN_ALIGN(mem_end - PAGE_SIZE + 1); + add_numamem_region(mem_start, mem_end); + break; + case ADDRESS_TYPE_ACPI: + mem_start = PFN_ALIGN(mem_start); + mem_end = PFN_ALIGN(mem_end - PAGE_SIZE + 1); + memblock_add(mem_start, mem_size); + add_numamem_region(mem_start, mem_end); + fallthrough; + case ADDRESS_TYPE_RESERVED: + pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + mem_type, mem_start, mem_size); + memblock_reserve(mem_start, mem_size); + break; + } + } +} + +static void __init numa_default_distance(void) +{ + int row, col; + + for (row = 0; row < MAX_NUMNODES; row++) + for (col = 0; col < MAX_NUMNODES; col++) { + if (col == row) + __node_distances[row][col] = LOCAL_DISTANCE; + else + /* We assume that one node per package here! + * + * A SLIT should be used for multiple nodes per + * package to override default setting. + */ + __node_distances[row][col] = REMOTE_DISTANCE; + } +} + +static int __init numa_mem_init(int (*init_func)(void)) +{ + int i; + int ret; + int node; + + for (i = 0; i < NR_CPUS; i++) + set_cpuid_to_node(i, NUMA_NO_NODE); + + numa_default_distance(); + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + memset(&numa_meminfo, 0, sizeof(numa_meminfo)); + + /* Parse SRAT and SLIT if provided by firmware. */ + ret = init_func(); + if (ret < 0) + return ret; + + node_possible_map = numa_nodes_parsed; + if (WARN_ON(nodes_empty(node_possible_map))) + return -EINVAL; + + init_node_memblock(); + if (numa_meminfo_cover_memory(&numa_meminfo) == false) + return -EINVAL; + + for_each_node_mask(node, node_possible_map) { + node_mem_init(node); + node_set_online(node); + } + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + + return 0; +} +#endif +void __init paging_init(void) +{ + unsigned node; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; + + for_each_online_node(node) { + unsigned long start_pfn, end_pfn; + + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + + if (end_pfn > max_low_pfn) + max_low_pfn = end_pfn; + } +#ifdef CONFIG_ZONE_DMA32 + zones_size[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + zones_size[ZONE_NORMAL] = max_low_pfn; + free_area_init(zones_size); +} + +void __init mem_init(void) +{ + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + memblock_free_all(); + setup_zero_pages(); /* This comes from node 0 */ + mem_init_print_info(NULL); +} + +int pcibus_to_node(struct pci_bus *bus) +{ + return dev_to_node(&bus->dev); +} +EXPORT_SYMBOL(pcibus_to_node); + +void __init fw_init_numa_memory(void) +{ + numa_mem_init(acpi_numa_init); + setup_nr_node_ids(); + loongson_sysconf.nr_nodes = nr_node_ids; + loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]); +} +EXPORT_SYMBOL(fw_init_numa_memory); diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 44218d69f872..d16f6112d3d5 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -215,6 +215,9 @@ static void loongson3_init_secondary(void) else xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(cpu)]); +#ifdef CONFIG_NUMA + numa_add_cpu(cpu); +#endif per_cpu(cpu_state, cpu) = CPU_ONLINE; cpu_set_core(&cpu_data[cpu], cpu_logical_map(cpu) % loongson_sysconf.cores_per_package); @@ -327,6 +330,9 @@ static int loongson3_cpu_disable(void) if (io_master(cpu)) return -EBUSY; +#ifdef CONFIG_NUMA + numa_remove_cpu(cpu); +#endif set_cpu_online(cpu, false); calculate_cpu_foreign_map(); local_irq_save(flags); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index a34e2a3cd08b..d0b530001257 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -94,6 +94,7 @@ void copy_from_user_page(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(copy_from_user_page); +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -118,6 +119,7 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ mem_init_print_info(NULL); } +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __ref free_initmem(void) { @@ -140,6 +142,34 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) return ret; } +#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION +pg_data_t *arch_alloc_nodedata(int nid) +{ + return kzalloc(sizeof(pg_data_t), GFP_KERNEL); +} + +void arch_free_nodedata(pg_data_t *pgdat) +{ + kfree(pgdat); +} + +void arch_refresh_nodedata(int nid, pg_data_t *pgdat) +{ + BUG(); +} +#endif + +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + int nid; + + nid = pa_to_nid(start); + return nid; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 28d82f2fe031..c390c069e2b7 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -28,8 +29,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, pa_to_nid(cfg->res.start)); return 0; } -- Gitee From 54e9bde120e4b077519a6a47e561fcb13e38bc99 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 24 Jun 2021 12:16:11 +0800 Subject: [PATCH 25/59] LoongArch: Add sparse memory vmemmap support Change-Id: I73f485cb75c26b0e3ec964174a4f47331517ba75 Signed-off-by: Min Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/pgtable-64.h | 5 +- arch/loongarch/include/asm/sparsemem.h | 8 ++ arch/loongarch/mm/init.c | 184 +++++++++++++++++++++++- 4 files changed, 196 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d001fe6e8476..e5c14ed6dc5b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -338,6 +338,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y + select SPARSEMEM_VMEMMAP_ENABLE help Say Y to support efficient handling of sparse physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index b6cf99d47833..cc542a621758 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -72,7 +72,10 @@ #define VMALLOC_START MODULES_END #define VMALLOC_END \ (vm_map_base + \ - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE) + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) + +#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) +#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) #define pte_ERROR(e) \ printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h index 3d18cdf1b069..a1e440f6bec7 100644 --- a/arch/loongarch/include/asm/sparsemem.h +++ b/arch/loongarch/include/asm/sparsemem.h @@ -11,6 +11,14 @@ #define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ #define MAX_PHYSMEM_BITS 48 +#ifndef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_SIZE 0 +#else +#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT))) +#endif + +#include + #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index d0b530001257..43116dd4af63 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -186,6 +186,188 @@ void arch_remove_memory(int nid, u64 start, #endif #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP +void __meminit arch_vmemmap_verify(pte_t *pte, int node, + unsigned long start, unsigned long end) +{ + unsigned long pfn = pte_pfn(*pte); + int actual_node = early_pfn_to_nid(pfn); + + if (node_distance(actual_node, node) > LOCAL_DISTANCE) + pr_warn("[%lx-%lx] potential offnode page_structs\n", + start, end - 1); +} + +void * __meminit arch_vmemmap_alloc_block_zero(unsigned long size, int node) +{ + void *p = vmemmap_alloc_block(size, node); + + if (!p) + return NULL; + memset(p, 0, size); + + return p; +} + +pte_t * __meminit arch_vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) { + pte_t entry; + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } + return pte; +} + +pmd_t * __meminit arch_vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) +{ + pmd_t *pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + pmd_populate_kernel(&init_mm, pmd, p); + } + return pmd; +} + +pud_t * __meminit arch_vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) +{ + pud_t *pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)p, (unsigned long)invalid_pte_table); +#endif + pud_populate(&init_mm, pud, p); + } + return pud; +} + +p4d_t * __meminit arch_vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)p, (unsigned long)invalid_pmd_table); +#endif + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + +pgd_t * __meminit arch_vmemmap_pgd_populate(unsigned long addr, int node) +{ + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + pgd_populate(&init_mm, pgd, p); + } + return pgd; +} + +int __meminit arch_vmemmap_populate_basepages(unsigned long start, + unsigned long end, int node) +{ + unsigned long addr = start; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + for (; addr < end; addr += PAGE_SIZE) { + pgd = arch_vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + p4d = arch_vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = arch_vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + pmd = arch_vmemmap_pmd_populate(pud, addr, node); + if (!pmd) + return -ENOMEM; + pte = arch_vmemmap_pte_populate(pmd, addr, node); + if (!pte) + return -ENOMEM; + arch_vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + } + + return 0; +} + +int __meminit arch_vmemmap_populate_hugepages(unsigned long start, + unsigned long end, int node) +{ + unsigned long addr = start; + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = arch_vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + p4d = arch_vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = arch_vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = NULL; + + p = arch_vmemmap_alloc_block_zero(PMD_SIZE, node); + if (p) { + pmd_t entry; + + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); + entry = pmd_mkhuge(entry); + set_pmd_at(&init_mm, addr, pmd, entry); + + continue; + } + } else if (pmd_huge(*pmd)) { + arch_vmemmap_verify((pte_t *)pmd, node, addr, next); + continue; + } + if (arch_vmemmap_populate_basepages(addr, next, node)) + return -ENOMEM; + } + + return 0; +} + +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) +{ + return arch_vmemmap_populate_hugepages(start, end, node); +} +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) +{ +} +#endif + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used -- Gitee From dd43dc4b6412618906b35f273e2ed341efffbe9a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 26/59] LoongArch: Add vector extensions support Add LoongArch's vector extensions support, which including 128bit LSX (i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced SIMD eXtension). Change-Id: I60cd571fadda8a13ee23749b520ee8fc02f21538 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 32 ++ arch/loongarch/include/asm/asmmacro.h | 638 +++++++++++++++++++++++ arch/loongarch/include/asm/fpu.h | 221 +++++++- arch/loongarch/include/uapi/asm/ptrace.h | 10 + arch/loongarch/kernel/cpu-probe.c | 12 + arch/loongarch/kernel/fpu.S | 296 +++++++++++ arch/loongarch/kernel/process.c | 10 +- arch/loongarch/kernel/ptrace.c | 110 ++++ arch/loongarch/kernel/signal-common.h | 10 + arch/loongarch/kernel/signal.c | 150 ++++++ arch/loongarch/kernel/traps.c | 85 ++- 11 files changed, 1567 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e5c14ed6dc5b..941b08296e53 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -169,6 +169,8 @@ config CPU_LOONGSON64 depends on SYS_HAS_CPU_LOONGSON64 select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_LSX + select CPU_SUPPORTS_LASX select GPIOLIB select SWIOTLB select ARCH_SUPPORTS_ATOMIC_RMW @@ -329,6 +331,36 @@ config CPU_HAS_FPU bool default y +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on CPU_SUPPORTS_LSX + depends on 64BIT + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_SUPPORTS_LASX + depends on 64BIT && CPU_HAS_LSX + help + Loongson Advanced SIMD Extension is 256 bit wide SIMD extension. + + If unsure, say Y. + +config CPU_SUPPORTS_LSX + bool + +config CPU_SUPPORTS_LASX + bool + config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index a88d05a271f4..cdb38c7a4bbe 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -124,6 +124,212 @@ .endif .endm + .macro parse_vr var vr + \var = -1 + .ifc \vr, $vr0 + \var = 0 + .endif + .ifc \vr, $vr1 + \var = 1 + .endif + .ifc \vr, $vr2 + \var = 2 + .endif + .ifc \vr, $vr3 + \var = 3 + .endif + .ifc \vr, $vr4 + \var = 4 + .endif + .ifc \vr, $vr5 + \var = 5 + .endif + .ifc \vr, $vr6 + \var = 6 + .endif + .ifc \vr, $vr7 + \var = 7 + .endif + .ifc \vr, $vr8 + \var = 8 + .endif + .ifc \vr, $vr9 + \var = 9 + .endif + .ifc \vr, $vr10 + \var = 10 + .endif + .ifc \vr, $vr11 + \var = 11 + .endif + .ifc \vr, $vr12 + \var = 12 + .endif + .ifc \vr, $vr13 + \var = 13 + .endif + .ifc \vr, $vr14 + \var = 14 + .endif + .ifc \vr, $vr15 + \var = 15 + .endif + .ifc \vr, $vr16 + \var = 16 + .endif + .ifc \vr, $vr17 + \var = 17 + .endif + .ifc \vr, $vr18 + \var = 18 + .endif + .ifc \vr, $vr19 + \var = 19 + .endif + .ifc \vr, $vr20 + \var = 20 + .endif + .ifc \vr, $vr21 + \var = 21 + .endif + .ifc \vr, $vr22 + \var = 22 + .endif + .ifc \vr, $vr23 + \var = 23 + .endif + .ifc \vr, $vr24 + \var = 24 + .endif + .ifc \vr, $vr25 + \var = 25 + .endif + .ifc \vr, $vr26 + \var = 26 + .endif + .ifc \vr, $vr27 + \var = 27 + .endif + .ifc \vr, $vr28 + \var = 28 + .endif + .ifc \vr, $vr29 + \var = 29 + .endif + .ifc \vr, $vr30 + \var = 30 + .endif + .ifc \vr, $vr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro parse_xr var xr + \var = -1 + .ifc \xr, $xr0 + \var = 0 + .endif + .ifc \xr, $xr1 + \var = 1 + .endif + .ifc \xr, $xr2 + \var = 2 + .endif + .ifc \xr, $xr3 + \var = 3 + .endif + .ifc \xr, $xr4 + \var = 4 + .endif + .ifc \xr, $xr5 + \var = 5 + .endif + .ifc \xr, $xr6 + \var = 6 + .endif + .ifc \xr, $xr7 + \var = 7 + .endif + .ifc \xr, $xr8 + \var = 8 + .endif + .ifc \xr, $xr9 + \var = 9 + .endif + .ifc \xr, $xr10 + \var = 10 + .endif + .ifc \xr, $xr11 + \var = 11 + .endif + .ifc \xr, $xr12 + \var = 12 + .endif + .ifc \xr, $xr13 + \var = 13 + .endif + .ifc \xr, $xr14 + \var = 14 + .endif + .ifc \xr, $xr15 + \var = 15 + .endif + .ifc \xr, $xr16 + \var = 16 + .endif + .ifc \xr, $xr17 + \var = 17 + .endif + .ifc \xr, $xr18 + \var = 18 + .endif + .ifc \xr, $xr19 + \var = 19 + .endif + .ifc \xr, $xr20 + \var = 20 + .endif + .ifc \xr, $xr21 + \var = 21 + .endif + .ifc \xr, $xr22 + \var = 22 + .endif + .ifc \xr, $xr23 + \var = 23 + .endif + .ifc \xr, $xr24 + \var = 24 + .endif + .ifc \xr, $xr25 + \var = 25 + .endif + .ifc \xr, $xr26 + \var = 26 + .endif + .ifc \xr, $xr27 + \var = 27 + .endif + .ifc \xr, $xr28 + \var = 28 + .endif + .ifc \xr, $xr29 + \var = 29 + .endif + .ifc \xr, $xr30 + \var = 30 + .endif + .ifc \xr, $xr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + .macro fpu_save_csr thread tmp movfcsr2gr \tmp, fcsr0 stptr.w \tmp, \thread, THREAD_FCSR @@ -134,6 +340,16 @@ movgr2fcsr fcsr0, \tmp .endm + .macro fpu_save_vcsr thread tmp + movfcsr2gr \tmp, vcsr16 + stptr.w \tmp, \thread, THREAD_VCSR + .endm + + .macro fpu_restore_vcsr thread tmp + ldptr.w \tmp, \thread, THREAD_VCSR + movgr2fcsr vcsr16, \tmp + .endm + .macro fpu_save_cc thread tmp0 tmp1 movcf2gr \tmp0, $fcc0 move \tmp1, \tmp0 @@ -248,6 +464,428 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \thread, \tmp + /* vst opcode is 0xb1 */ + .word (0xb1 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb1 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb1 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb1 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb1 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb1 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb1 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb1 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb1 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb1 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb1 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb1 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb1 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb1 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb1 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb1 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb1 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb1 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb1 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb1 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb1 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb1 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb1 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb1 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb1 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb1 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb1 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb1 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb1 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb1 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb1 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb1 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \thread, \tmp + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb0 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb0 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb0 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb0 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb0 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb0 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb0 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb0 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb0 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb0 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb0 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb0 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb0 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb0 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb0 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb0 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb0 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb0 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb0 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb0 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb0 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb0 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb0 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb0 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb0 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb0 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb0 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb0 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb0 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb0 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb0 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + fpu_save_vcsr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + fpu_restore_vcsr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vpickve2gr opcode is 0xe5dfe */ + .word (0xe5dfe << 11 | 1 << 10 | __vd << 5 | __tmp) + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + ld.d \tmp, \base, (\off+8) + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \thread, \tmp + /* xvst opcode is 0xb3 */ + .word (0xb3 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb3 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb3 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb3 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb3 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb3 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb3 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb3 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb3 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb3 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb3 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb3 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb3 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb3 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb3 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb3 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb3 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb3 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb3 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb3 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb3 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb3 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb3 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb3 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb3 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb3 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb3 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb3 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb3 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb3 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb3 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb3 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \tmp, \thread, \tmp + /* xvld opcode is 0xb2 */ + .word (0xb2 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb2 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb2 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb2 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb2 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb2 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb2 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb2 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb2 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb2 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb2 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb2 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb2 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb2 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb2 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb2 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb2 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb2 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb2 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb2 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb2 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb2 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb2 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb2 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb2 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb2 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb2 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb2 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb2 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb2 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb2 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb2 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + fpu_save_vcsr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + fpu_restore_vcsr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp off + parse_xr __xd, \xd + parse_xr __xt, \tmp + parse_r __base, \base + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | (\off+16) << 10 | __base << 5 | __xt) + /* xvpermi.q opcode is 0x1dfb */ + .word (0x1dfb << 18 | 0x2 << 10 | __xt << 5 | __xd) + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADDU \base, \thread, \tmp + /* Save $vr31, xvpickve2gr opcode is 0x76efe */ + .word (0x76efe << 12 | 0 << 10 | 31 << 5 | 0x11) + .word (0x76efe << 12 | 1 << 10 | 31 << 5 | 0x12) + lasx_restore_upper $xr0, \base, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31, xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 0 << 10 | 0x11 << 5 | 31) + .word (0x76ebe << 12 | 1 << 10 | 0x12 << 5 | 31) + .endm + + .macro lasx_init_upper xd tmp + parse_xr __xd, \xd + parse_r __tmp, \tmp + /* xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 2 << 10 | __tmp << 5 | __xd) + .word (0x76ebe << 12 | 3 << 10 | __tmp << 5 | __xd) + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro jr dst jirl zero, \dst, 0 .endm diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 7c71fc56602f..9cd3d93aa8d6 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -26,6 +26,26 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -42,6 +62,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_readl(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_readl(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() \ do { \ set_csr_euen(CSR_EUEN_FPEN); \ @@ -85,9 +128,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (is_simd_enabled()) { + if (save) { + if (is_lasx_enabled()) + save_lasx(tsk); + else + save_lsx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } else { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -133,4 +189,163 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +enum { + CTX_LSX = 1, + CTX_LASX = 2, +}; + +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchgl(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchgl(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) + csr_xchgl(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchgl(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + goto out; + + ret = test_thread_flag(TIF_LSX_CTX_LIVE) ? CTX_LSX : 0; +out: + return ret; +} + +static inline int thread_lasx_context_live(void) +{ + int ret = 0; + + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + goto out; + + ret = test_thread_flag(TIF_LASX_CTX_LIVE) ? CTX_LASX : 0; +out: + return ret; +} + +#define __BUILD_VCTL_REG(name, cs) \ +static inline unsigned int read_v##name(void) \ +{ \ + unsigned int reg; \ + __asm__ __volatile__( \ + " movfcsr2gr %0, $r" #cs "\n" \ + : "=r"(reg)); \ + return reg; \ +} \ + \ +static inline void write_v##name(unsigned int val) \ +{ \ + __asm__ __volatile__( \ + " movgr2fcsr $r" #cs ", %0\n" \ + : : "r"(val)); \ +} + +__BUILD_VCTL_REG(csr0, 0) +__BUILD_VCTL_REG(csr1, 1) +__BUILD_VCTL_REG(csr2, 2) +__BUILD_VCTL_REG(csr3, 3) +__BUILD_VCTL_REG(csr16, 16) + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index dc78bbeab3b0..a5870ed6f08f 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -42,6 +42,16 @@ struct user_fp_state { uint32_t fcsr; }; +struct user_lsx_state { + /* 32 registers, 128 bits width per register. */ + uint64_t regs[32*2]; +}; + +struct user_lasx_state { + /* 32 registers, 256 bits width per register. */ + uint64_t regs[32*4]; +}; + #define PTRACE_SYSEMU 0x1f #define PTRACE_SYSEMU_SINGLESTEP 0x20 diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index dcf58e464dce..cba0eeb029c2 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -111,6 +111,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 52a956a1a64c..deded76cb864 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -32,6 +32,32 @@ .previous .endm + .macro EX_V insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_vr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + + .macro EX_XV insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_xr __reg, \reg + +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + .macro sc_save_fp base EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) @@ -162,6 +188,146 @@ movgr2fcsr vcsr16, \tmp0 .endm + .macro sc_save_lsx base + EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb1 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb1 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb1 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb1 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb1 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb1 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb1 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb1 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb1 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb1 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb1 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb1 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb1 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb1 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb1 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb1 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb1 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb1 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb1 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb1 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb1 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb1 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb1 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb1 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb1 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb1 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb1 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb1 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb1 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb1 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_restore_lsx base + EX_V 0xb0 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb0 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb0 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb0 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb0 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb0 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb0 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb0 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb0 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb0 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb0 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb0 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb0 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb0 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb0 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb0 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb0 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb0 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb0 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb0 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb0 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb0 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb0 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb0 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb0 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb0 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb0 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb0 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb0 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb0 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb0 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb0 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_save_lasx base + EX_XV 0xb3 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + + .macro sc_restore_lasx base + EX_XV 0xb2 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + /* * Save a thread's fp context. */ @@ -183,6 +349,76 @@ SYM_FUNC_START(_restore_fp) jirl zero, ra, 0 SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -261,6 +497,66 @@ SYM_FUNC_START(_restore_fp_context) jirl zero, ra, 0 SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + * a3: vcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_vcsr a3, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + * a3: vcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + sc_restore_vcsr a3, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + * a3: vcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_vcsr a3, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + * a3: vcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + sc_restore_vcsr a3, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jirl zero, ra, 0 diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 15dec62bbe63..856e17894425 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -98,8 +98,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 5d1429d737af..01300f5ca88e 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -232,6 +232,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + struct pt_regs_offset { const char *name; int offset; @@ -303,6 +387,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif }; static const struct user_regset loongarch64_regsets[] = { @@ -330,6 +420,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif }; static const struct user_regset_view user_loongarch64_view = { diff --git a/arch/loongarch/kernel/signal-common.h b/arch/loongarch/kernel/signal-common.h index 7cfb80e087cc..f949c9e0e5ca 100644 --- a/arch/loongarch/kernel/signal-common.h +++ b/arch/loongarch/kernel/signal-common.h @@ -33,5 +33,15 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr, void __user *vcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr, void __user *vcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr, void __user *vcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr, void __user *vcsr); +extern asmlinkage int _save_lsx_all_upper(void __user *buf); +extern asmlinkage int _restore_lsx_all_upper(void __user *buf); #endif /* __SIGNAL_COMMON_H */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 286291db7b36..c0f85af61644 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -67,6 +67,43 @@ static int copy_fp_to_sigcontext(struct sigcontext __user *sc) return err; } +static int copy_lsx_upper_to_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= + __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + &fpregs[4*i+1]); + } + err |= __put_user(current->thread.fpu.vcsr, vcsr); + + return err; +} + +static int copy_lasx_upper_to_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= + __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + &fpregs[4*i+2]); + err |= + __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + &fpregs[4*i+3]); + } + + return err; +} + static int copy_fp_from_sigcontext(struct sigcontext __user *sc) { int i; @@ -87,6 +124,42 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc) return err; } +static int copy_lsx_upper_from_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + u64 fpr_val; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= __get_user(fpr_val, &fpregs[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.vcsr, vcsr); + + return err; +} + +static int copy_lasx_upper_from_sigcontext(struct sigcontext __user *sc) +{ + int i; + int err = 0; + int inc = 1; + u64 fpr_val; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + for (i = 0; i < NUM_FPU_REGS; i += inc) { + err |= __get_user(fpr_val, &fpregs[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, &fpregs[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -108,6 +181,45 @@ static int restore_hw_fp_context(struct sigcontext __user *sc) return _restore_fp_context(fpregs, fcc, csr); } +static int save_lsx_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _save_lsx_context(fpregs, fcc, fcsr, vcsr); +} + +static int restore_lsx_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _restore_lsx_context(fpregs, fcc, fcsr, vcsr); +} + +static int save_lasx_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _save_lasx_context(fpregs, fcc, fcsr, vcsr); +} + +static int restore_lasx_context(struct sigcontext __user *sc) +{ + uint64_t __user *fcc = &sc->sc_fcc; + uint32_t __user *fcsr = &sc->sc_fcsr; + uint32_t __user *vcsr = &sc->sc_vcsr; + uint64_t __user *fpregs = (uint64_t *)&sc->sc_fpregs; + + return _restore_lasx_context(fpregs, fcc, fcsr, vcsr); +} /* * Helper routines @@ -128,6 +240,24 @@ static int protected_save_fp_context(struct sigcontext __user *sc) while (1) { lock_fpu_owner(); + if (thread_lasx_context_live()) { + if (is_lasx_enabled()) { + err = save_lasx_context(sc); + goto finish; + } else { + err |= copy_lasx_upper_to_sigcontext(sc); + /* LASX contains LSX */ + BUG_ON(!thread_lsx_context_live()); + } + } + if (thread_lsx_context_live()) { + if (is_lsx_enabled()) { + err = save_lsx_context(sc); + goto finish; + } else { + err |= copy_lsx_upper_to_sigcontext(sc); + } + } if (is_fpu_owner()) err = save_fp_context(sc); else @@ -184,6 +314,26 @@ static int protected_restore_fp_context(struct sigcontext __user *sc) while (1) { lock_fpu_owner(); + if (thread_lasx_context_live()) { + if (is_lasx_enabled()) { + err = restore_lasx_context(sc); + goto finish; + } else { + err |= copy_lasx_upper_from_sigcontext(sc); + /* LASX contains LSX */ + BUG_ON(!thread_lsx_context_live()); + } + } + if (thread_lsx_context_live()) { + if (is_lsx_enabled()) { + err = restore_lsx_context(sc); + goto finish; + } else { + err |= copy_lsx_upper_from_sigcontext(sc); + /* LSX contains FP */ + BUG_ON(!used_math()); + } + } if (is_fpu_owner()) err = restore_fp_context(sc); else diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 1919db64eb3d..571498fcce5e 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -494,12 +494,68 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + write_vcsr16(current->thread.fpu.vcsr); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -512,8 +568,21 @@ asmlinkage void noinstr do_fpu(struct pt_regs *regs) asmlinkage void noinstr do_lsx(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); } @@ -521,8 +590,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) asmlinkage void noinstr do_lasx(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); } -- Gitee From 7d32c5be29c45ab9bea416d4153d3e0d7afb3bca Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 27/59] LoongArch: Add zboot (compressed kernel) support Change-Id: I9ea9f574ffe7938d6ec25a80d09d397330faaac0 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 11 +++ arch/loongarch/Makefile | 42 +++++++- arch/loongarch/boot/Makefile | 54 +++++++++++ arch/loongarch/boot/compressed/Makefile | 81 ++++++++++++++++ .../boot/compressed/calc_vmlinuz_load_addr.c | 58 +++++++++++ arch/loongarch/boot/compressed/decompress.c | 97 +++++++++++++++++++ arch/loongarch/boot/compressed/dummy.c | 4 + arch/loongarch/boot/compressed/head.S | 50 ++++++++++ arch/loongarch/boot/compressed/ld.script | 51 ++++++++++ arch/loongarch/boot/compressed/string.c | 46 +++++++++ arch/loongarch/tools/Makefile | 5 + arch/loongarch/tools/elf-entry.c | 66 +++++++++++++ 12 files changed, 564 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/boot/Makefile create mode 100644 arch/loongarch/boot/compressed/Makefile create mode 100644 arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c create mode 100644 arch/loongarch/boot/compressed/decompress.c create mode 100644 arch/loongarch/boot/compressed/dummy.c create mode 100644 arch/loongarch/boot/compressed/head.S create mode 100644 arch/loongarch/boot/compressed/ld.script create mode 100644 arch/loongarch/boot/compressed/string.c create mode 100644 arch/loongarch/tools/Makefile create mode 100644 arch/loongarch/tools/elf-entry.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 941b08296e53..25bbca50bef9 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -120,6 +120,7 @@ config MACH_LOONGSON64 select SPARSE_IRQ select SYS_HAS_CPU_LOONGSON64 select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_ZBOOT select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL @@ -180,6 +181,16 @@ config CPU_LOONGSON64 endchoice +config SYS_SUPPORTS_ZBOOT + bool + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KERNEL_ZSTD + config SYS_HAS_CPU_LOONGSON64 bool diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a6e609d392ff..0fd1884e07bc 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -3,6 +3,9 @@ # Author: Huacai Chen # Copyright (C) 2020 Loongson Technology Corporation Limited +archscripts: scripts_basic + $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry + # # Select the object file format to substitute into the linker script. # @@ -31,6 +34,7 @@ cflags-y += -mabi=lp64 endif all-y := vmlinux +all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz # # GCC uses -G0 -mabicalls -fpic as default. We don't want PIC in the kernel @@ -48,6 +52,15 @@ KBUILD_CFLAGS_MODULE += -fno-plt -Wa,-mla-global-with-abs,-mla-local-with-abs cflags-y += -ffreestanding +# +# Kernel compression +# +ifdef CONFIG_SYS_SUPPORTS_ZBOOT +KBUILD_IMAGE = vmlinuz +else +KBUILD_IMAGE = vmlinux +endif + # # Board-dependent options and extra files # @@ -57,13 +70,15 @@ ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif +entry-y = $(shell $(objtree)/arch/loongarch/tools/elf-entry vmlinux) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) PLATFORM="$(platform-y)" +bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + VMLINUX_ENTRY_ADDRESS=$(entry-y) PLATFORM="$(platform-y)" ifdef CONFIG_64BIT bootvars-y += ADDR_BITS=64 @@ -102,6 +117,10 @@ vdso_install: # boot image targets (arch/loongarch/boot/) boot-y := vmlinux.bin +# compressed boot image targets (arch/loongarch/boot/compressed/) +bootz-y := vmlinuz +bootz-y += vmlinuz.bin + all: $(all-y) # boot @@ -109,15 +128,36 @@ $(boot-y): vmlinux FORCE $(Q)$(MAKE) $(build)=arch/loongarch/boot VMLINUX=vmlinux \ $(bootvars-y) arch/loongarch/boot/$@ +ifdef CONFIG_SYS_SUPPORTS_ZBOOT +# boot/compressed +$(bootz-y): vmlinux FORCE + $(Q)$(MAKE) $(build)=arch/loongarch/boot/compressed \ + $(bootvars-y) 64bit-bfd=$(64bit-bfd) $@ +else +vmlinuz: FORCE + @echo ' CONFIG_SYS_SUPPORTS_ZBOOT is not enabled' + /bin/false +endif + CLEAN_FILES += vmlinux install: $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) +ifdef CONFIG_SYS_SUPPORTS_ZBOOT + $(Q)install -D -m 755 vmlinuz $(INSTALL_PATH)/vmlinuz-$(KERNELRELEASE) +endif $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) +archclean: + $(Q)$(MAKE) $(clean)=arch/loongarch/boot + $(Q)$(MAKE) $(clean)=arch/loongarch/boot/compressed + $(Q)$(MAKE) $(clean)=arch/loongarch/tools + define archhelp echo ' install - install kernel into $(INSTALL_PATH)' echo ' vmlinux.bin - Raw binary boot image' + echo ' vmlinuz - Compressed boot(zboot) image' + echo ' vmlinuz.bin - Raw binary zboot image' echo endef diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile new file mode 100644 index 000000000000..b09257a75756 --- /dev/null +++ b/arch/loongarch/boot/Makefile @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +# +# Drop some uninteresting sections in the kernel. +# This is only relevant for ELF kernels but doesn't hurt a.out +# +drop-sections := .comment .note .options +strip-flags := $(addprefix --remove-section=,$(drop-sections)) + +suffix-y := bin +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 +suffix-$(CONFIG_KERNEL_ZSTD) := zst + +targets += vmlinux.bin +quiet_cmd_bin = OBJCOPY $@ + cmd_bin = $(OBJCOPY) -O binary $(strip-flags) $(VMLINUX) $@ +$(obj)/vmlinux.bin: $(VMLINUX) FORCE + $(call if_changed,bin) + +# +# Compressed vmlinux images +# + +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.bz2 +extra-y += vmlinux.bin.lzma +extra-y += vmlinux.bin.lzo +extra-y += vmlinux.bin.lz4 +extra-y += vmlinux.bin.zst + +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2) + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) + +$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzo) + +$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE + $(call if_changed,lz4) + +$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE + $(call if_changed,zst22) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile new file mode 100644 index 000000000000..e3ed0f54a264 --- /dev/null +++ b/arch/loongarch/boot/compressed/Makefile @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +include $(srctree)/arch/loongarch/Kbuild.platforms + +# set the default size of the mallocing area for decompressing +BOOT_HEAP_SIZE := 0x400000 + +# Disable Function Tracer +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) + +KBUILD_CFLAGS := $(filter-out -fstack-protector, $(KBUILD_CFLAGS)) + +KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ \ + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" + +KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ + -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) + +# decompressor objects (linked with vmlinuz) +vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o + +vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o + +extra-y += ashldi3.c +$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE + $(call if_changed,shipped) + +targets := $(notdir $(vmlinuzobjs-y)) + +targets += vmlinux.bin +OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +tool_$(CONFIG_KERNEL_GZIP) = gzip +tool_$(CONFIG_KERNEL_BZIP2) = bzip2 +tool_$(CONFIG_KERNEL_LZ4) = lz4 +tool_$(CONFIG_KERNEL_LZMA) = lzma +tool_$(CONFIG_KERNEL_LZO) = lzo +tool_$(CONFIG_KERNEL_XZ) = xzkern +tool_$(CONFIG_KERNEL_ZSTD) = zstd22 + +targets += vmlinux.bin.z +$(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE + $(call if_changed,$(tool_y)) + +targets += piggy.o dummy.o +OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ + --set-section-flags=.image=contents,alloc,load,readonly,data +$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE + $(call if_changed,objcopy) + +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + +# Calculate the load address of the compressed kernel image +hostprogs := calc_vmlinuz_load_addr + +ifneq ($(zload-y),) +VMLINUZ_LOAD_ADDRESS := $(zload-y) +else +VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ + $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) +endif +UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) + +vmlinuzobjs-y += $(obj)/piggy.o + +quiet_cmd_zld = LD $@ + cmd_zld = $(LD) $(KBUILD_LDFLAGS) -Ttext $(VMLINUZ_LOAD_ADDRESS) -T $< $(vmlinuzobjs-y) -o $@ +quiet_cmd_strip = STRIP $@ + cmd_strip = $(STRIP) -s $@ +vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr + $(call cmd,zld) + $(call cmd,strip) + +clean-files += $(objtree)/vmlinuz +clean-files += $(objtree)/vmlinuz.bin diff --git a/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c b/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c new file mode 100644 index 000000000000..d14f75ec8273 --- /dev/null +++ b/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2010 "Wu Zhangjin" + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; + struct stat sb; + + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", + argv[0]); + return EXIT_FAILURE; + } + + if (stat(argv[1], &sb) == -1) { + perror("stat"); + return EXIT_FAILURE; + } + + /* Convert hex characters to dec number */ + errno = 0; + if (sscanf(argv[2], "%llx", &vmlinux_load_addr) != 1) { + if (errno != 0) + perror("sscanf"); + else + fprintf(stderr, "No matching characters\n"); + + return EXIT_FAILURE; + } + + vmlinux_size = (uint64_t)sb.st_size; + vmlinuz_load_addr = vmlinux_load_addr + vmlinux_size; + + /* + * Align with 64KB: KEXEC needs load sections to be aligned to PAGE_SIZE, + * which may be as large as 64KB depending on the kernel configuration. + */ + + vmlinuz_load_addr += (SZ_64K - vmlinux_size % SZ_64K); + + printf("0x%llx\n", vmlinuz_load_addr); + + return EXIT_SUCCESS; +} diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c new file mode 100644 index 000000000000..099aad621aba --- /dev/null +++ b/arch/loongarch/boot/compressed/decompress.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include + +#include + +/* + * These two variables specify the free mem region + * that can be used for temporary malloc area + */ +unsigned long free_mem_ptr; +unsigned long free_mem_end_ptr; + +/* The linker tells us where the image is. */ +extern unsigned char __image_begin, __image_end; + +/* debug interfaces */ +#define puts(s) do {} while (0) +#define puthex(val) do {} while (0) + +void error(char *x) +{ + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + while (1) + ; /* Halt */ +} + +/* activate the code for pre-boot environment */ +#define STATIC static + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif + +void decompress_kernel(unsigned long boot_heap_start) +{ + unsigned long zimage_start, zimage_size; + + zimage_start = (unsigned long)(&__image_begin); + zimage_size = (unsigned long)(&__image_end) - + (unsigned long)(&__image_begin); + + puts("zimage at: "); + puthex(zimage_start); + puts(" "); + puthex(zimage_size + zimage_start); + puts("\n"); + + /* This area are prepared for mallocing when decompressing */ + free_mem_ptr = boot_heap_start; + free_mem_end_ptr = boot_heap_start + BOOT_HEAP_SIZE; + + /* Display standard Linux/LoongArch boot prompt */ + puts("Uncompressing Linux at load address "); + puthex(VMLINUX_LOAD_ADDRESS_ULL); + puts("\n"); + + /* Decompress the kernel with according algorithm */ + __decompress((char *)zimage_start, zimage_size, 0, 0, + (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); + + puts("Now, booting the kernel...\n"); +} diff --git a/arch/loongarch/boot/compressed/dummy.c b/arch/loongarch/boot/compressed/dummy.c new file mode 100644 index 000000000000..31dbf45bf99c --- /dev/null +++ b/arch/loongarch/boot/compressed/dummy.c @@ -0,0 +1,4 @@ +int main(void) +{ + return 0; +} diff --git a/arch/loongarch/boot/compressed/head.S b/arch/loongarch/boot/compressed/head.S new file mode 100644 index 000000000000..eb41c7ec9f6f --- /dev/null +++ b/arch/loongarch/boot/compressed/head.S @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include +#include + +SYM_CODE_START(start) + /* Save boot rom start args */ + move s0, a0 + move s1, a1 + move s2, a2 + move s3, a3 + + /* Config Direct Mapping */ + li.d t0, CSR_DMW0_INIT + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT + csrwr t0, LOONGARCH_CSR_DMWIN1 + + /* Clear BSS */ + la.abs a0, _edata + la.abs a2, _end +1: st.d zero, a0, 0 + addi.d a0, a0, 8 + bne a2, a0, 1b + + la.abs a0, .heap /* heap address */ + la.abs sp, .stack + 8192 /* stack address */ + + la ra, 2f + la t4, decompress_kernel + jirl zero, t4, 0 +2: + move a0, s0 + move a1, s1 + move a2, s2 + move a3, s3 + PTR_LI t4, KERNEL_ENTRY + jirl zero, t4, 0 +3: + b 3b +SYM_CODE_END(start) + + .comm .heap,BOOT_HEAP_SIZE,4 + .comm .stack,4096*2,4 diff --git a/arch/loongarch/boot/compressed/ld.script b/arch/loongarch/boot/compressed/ld.script new file mode 100644 index 000000000000..2e32d8a72fdf --- /dev/null +++ b/arch/loongarch/boot/compressed/ld.script @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ld.script for compressed kernel support of LoongArch + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +OUTPUT_ARCH(loongarch) +ENTRY(start) +PHDRS { + text PT_LOAD FLAGS(7); /* RWX */ +} +SECTIONS +{ + /* Text and read-only data */ + /* . = VMLINUZ_LOAD_ADDRESS; */ + .text : { + *(.text) + *(.rodata) + }: text + /* End of text section */ + + /* Writable data */ + .data : { + *(.data) + /* Put the compressed image here */ + __image_begin = .; + *(.image) + __image_end = .; + CONSTRUCTORS + . = ALIGN(16); + } + + _edata = .; + /* End of data section */ + + /* BSS */ + .bss : { + *(.bss) + } + . = ALIGN(16); + _end = .; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.options) + *(.comment) + *(.note) + } +} diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c new file mode 100644 index 000000000000..67e2355ae50e --- /dev/null +++ b/arch/loongarch/boot/compressed/string.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/loongarch/boot/compressed/string.c + * + * Very small subset of simple string routines + */ + +#include + +void __weak *memset(void *s, int c, size_t n) +{ + int i; + char *ss = s; + + for (i = 0; i < n; i++) + ss[i] = c; + return s; +} + +void __weak *memcpy(void *dest, const void *src, size_t n) +{ + int i; + const char *s = src; + char *d = dest; + + for (i = 0; i < n; i++) + d[i] = s[i]; + return dest; +} + +void __weak *memmove(void *dest, const void *src, size_t n) +{ + int i; + const char *s = src; + char *d = dest; + + if (d < s) { + for (i = 0; i < n; i++) + d[i] = s[i]; + } else if (d > s) { + for (i = n - 1; i >= 0; i--) + d[i] = s[i]; + } + + return dest; +} diff --git a/arch/loongarch/tools/Makefile b/arch/loongarch/tools/Makefile new file mode 100644 index 000000000000..0fc5c25937ba --- /dev/null +++ b/arch/loongarch/tools/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs := elf-entry +PHONY += elf-entry +elf-entry: $(obj)/elf-entry + @: diff --git a/arch/loongarch/tools/elf-entry.c b/arch/loongarch/tools/elf-entry.c new file mode 100644 index 000000000000..c80721e0dee1 --- /dev/null +++ b/arch/loongarch/tools/elf-entry.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +__attribute__((noreturn)) +static void die(const char *msg) +{ + fputs(msg, stderr); + exit(EXIT_FAILURE); +} + +int main(int argc, const char *argv[]) +{ + uint64_t entry; + size_t nread; + FILE *file; + union { + Elf32_Ehdr ehdr32; + Elf64_Ehdr ehdr64; + } hdr; + + if (argc != 2) + die("Usage: elf-entry \n"); + + file = fopen(argv[1], "r"); + if (!file) { + perror("Unable to open input file"); + return EXIT_FAILURE; + } + + nread = fread(&hdr, 1, sizeof(hdr), file); + if (nread != sizeof(hdr)) { + fclose(file); + perror("Unable to read input file"); + return EXIT_FAILURE; + } + + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); + die("Input is not an ELF\n"); + } + + switch (hdr.ehdr32.e_ident[EI_CLASS]) { + case ELFCLASS32: + /* Sign extend to form a canonical address */ + entry = (int64_t)(int32_t)hdr.ehdr32.e_entry; + break; + + case ELFCLASS64: + entry = hdr.ehdr64.e_entry; + break; + + default: + fclose(file); + die("Invalid ELF class\n"); + } + + fclose(file); + printf("0x%016" PRIx64 "\n", entry); + + return EXIT_SUCCESS; +} -- Gitee From f0b5feeb2b54a77084840f4e8d9e2afe2ed3dca3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 30 May 2021 13:04:34 +0800 Subject: [PATCH 28/59] LoongArch: Add STACKTRACE and TRACE_IRQFLAGS support Change-Id: I22ff4860cf7af157c96d880de0142a7d7181658b Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 4 + arch/loongarch/Kconfig.debug | 25 +++ arch/loongarch/include/asm/inst.h | 12 ++ arch/loongarch/include/asm/processor.h | 9 ++ arch/loongarch/include/asm/stacktrace.h | 17 ++ arch/loongarch/include/asm/switch_to.h | 8 +- arch/loongarch/include/asm/unwind.h | 43 +++++ arch/loongarch/kernel/Makefile | 4 + arch/loongarch/kernel/asm-offsets.c | 2 + arch/loongarch/kernel/process.c | 118 +++++++++++++- arch/loongarch/kernel/stacktrace.c | 106 +++++++++++++ arch/loongarch/kernel/switch.S | 5 +- arch/loongarch/kernel/traps.c | 29 ++-- arch/loongarch/kernel/unwind_guess.c | 67 ++++++++ arch/loongarch/kernel/unwind_prologue.c | 198 ++++++++++++++++++++++++ 15 files changed, 631 insertions(+), 16 deletions(-) create mode 100644 arch/loongarch/include/asm/unwind.h create mode 100644 arch/loongarch/kernel/stacktrace.c create mode 100644 arch/loongarch/kernel/unwind_guess.c create mode 100644 arch/loongarch/kernel/unwind_prologue.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 25bbca50bef9..b9593d5eb1e6 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -565,6 +565,10 @@ config LOCKDEP_SUPPORT bool default y +config STACKTRACE_SUPPORT + bool + default y + config TRACE_IRQFLAGS_SUPPORT bool default y diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index e69de29bb2d1..8d90f57cc019 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -0,0 +1,25 @@ +choice + prompt "Choose kernel unwinder" + default UNWINDER_PROLOGUE if KALLSYMS + help + This determines which method will be used for unwinding kernel stack + traces for panics, oopses, bugs, warnings, perf, /proc//stack, + livepatch, lockdep, and more. + +config UNWINDER_GUESS + bool "Guess unwinder" + help + This option enables the "guess" unwinder for unwinding kernel stack + traces. It scans the stack and reports every kernel text address it + finds. Some of the addresses it reports may be incorrect. + +config UNWINDER_PROLOGUE + bool "Prologue unwinder" + depends on KALLSYMS + help + This option enables the "prologue" unwinder for unwinding kernel stack + traces. It unwind the stack frame based on prologue code analyze. Symbol + information is needed, at least the address and length of each function. + Some of the addresses it reports may be incorrect. + +endchoice diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index d027c4ab1531..d72072584989 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -252,4 +252,16 @@ do { \ #define StoreW(addr, value, res) _StoreW(addr, value, res) #define StoreDW(addr, value, res) _StoreDW(addr, value, res) +static inline bool is_branch_insn(union loongarch_instruction insn) +{ + return insn.reg1i21_format.opcode >= beqz_op && + insn.reg1i21_format.opcode <= bgeu_op; +} + +static inline bool is_pc_insn(union loongarch_instruction insn) +{ + return insn.reg1i20_format.opcode >= pcaddi_op && + insn.reg1i20_format.opcode <= pcaddu18i_op; +} + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index c8ce892e3aa8..6625401d8448 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -101,6 +101,10 @@ struct thread_struct { unsigned long reg23, reg24, reg25, reg26; /* s0-s3 */ unsigned long reg27, reg28, reg29, reg30, reg31; /* s4-s8 */ + /* __schedule() return address / call frame address */ + unsigned long sched_ra; + unsigned long sched_cfa; + /* CSR registers */ unsigned long csr_prmd; unsigned long csr_crmd; @@ -129,6 +133,9 @@ struct thread_struct { struct loongarch_fpu fpu FPU_ALIGN; }; +#define thread_saved_ra(tsk) (tsk->thread.sched_ra) +#define thread_saved_fp(tsk) (tsk->thread.sched_cfa) + #define INIT_THREAD { \ /* \ * Main processor registers \ @@ -145,6 +152,8 @@ struct thread_struct { .reg29 = 0, \ .reg30 = 0, \ .reg31 = 0, \ + .sched_ra = 0, \ + .sched_cfa = 0, \ .csr_crmd = 0, \ .csr_prmd = 0, \ .csr_euen = 0, \ diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h index 07adab35ab8a..8263ab9bc27a 100644 --- a/arch/loongarch/include/asm/stacktrace.h +++ b/arch/loongarch/include/asm/stacktrace.h @@ -10,6 +10,23 @@ #include #include +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, + STACK_TYPE_IRQ, +}; + +struct stack_info { + enum stack_type type; + unsigned long begin, end, next_sp; +}; + +bool in_task_stack(unsigned long stack, struct task_struct *task, + struct stack_info *info); +bool in_irq_stack(unsigned long stack, struct stack_info *info); +int get_stack_info(unsigned long stack, struct task_struct *task, + struct stack_info *info); + #define STR_LONG_L __stringify(LONG_L) #define STR_LONG_S __stringify(LONG_S) #define STR_LONGSIZE __stringify(LONGSIZE) diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index e2d4afefce24..0987ac30475b 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -15,12 +15,15 @@ struct task_struct; * @prev: The task previously executed. * @next: The task to begin executing. * @next_ti: task_thread_info(next). + * @sched_ra: __schedule return address. + * @sched_cfa: __schedule call frame address. * * This function is used whilst scheduling to save the context of prev & load * the context of next. Returns prev. */ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next, struct thread_info *next_ti); + struct task_struct *next, struct thread_info *next_ti, + void *sched_ra, void *sched_cfa); /* * For newly created kernel threads switch_to() will return to @@ -31,7 +34,8 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ - (last) = __switch_to(prev, next, task_thread_info(next)); \ + (last) = __switch_to(prev, next, task_thread_info(next), \ + __builtin_return_address(0), __builtin_frame_address(0)); \ } while (0) #endif /* _ASM_SWITCH_TO_H */ diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h new file mode 100644 index 000000000000..71b36ccf786d --- /dev/null +++ b/arch/loongarch/include/asm/unwind.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Most of this ideas comes from x86. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_UNWIND_H +#define _ASM_UNWIND_H + +#include + +#include +#include + +struct unwind_state { + struct stack_info stack_info; + struct task_struct *task; +#ifdef CONFIG_UNWINDER_PROLOGUE + unsigned long sp, pc, ra; + bool enable; + bool first; +#else /* CONFIG_UNWINDER_GUESS */ + unsigned long sp, pc; + bool first; +#endif + bool error; +}; + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs); +bool unwind_next_frame(struct unwind_state *state); +unsigned long unwind_get_return_address(struct unwind_state *state); + +static inline bool unwind_done(struct unwind_state *state) +{ + return state->stack_info.type == STACK_TYPE_UNKNOWN; +} + +static inline bool unwind_error(struct unwind_state *state) +{ + return state->error; +} +#endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fc3909158cd..dd1cf2d8e77b 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o @@ -21,4 +22,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o +obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index f86210b17b4f..a752679b6e53 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -104,6 +104,8 @@ void output_thread_defines(void) OFFSET(THREAD_REG29, task_struct, thread.reg29); OFFSET(THREAD_REG30, task_struct, thread.reg30); OFFSET(THREAD_REG31, task_struct, thread.reg31); + OFFSET(THREAD_SCHED_RA, task_struct, thread.sched_ra); + OFFSET(THREAD_SCHED_CFA, task_struct, thread.sched_cfa); OFFSET(THREAD_CSRCRMD, task_struct, thread.csr_crmd); OFFSET(THREAD_CSRPRMD, task_struct, diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 856e17894425..61a37573019c 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -37,7 +37,9 @@ #include #include #include +#include #include +#include #include /* @@ -132,6 +134,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; + p->thread.sched_cfa = 0; p->thread.csr_euen = 0; p->thread.csr_crmd = csr_readl(LOONGARCH_CSR_CRMD); p->thread.csr_prmd = csr_readl(LOONGARCH_CSR_PRMD); @@ -142,6 +145,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg24 = kthread_arg; p->thread.reg03 = childksp; p->thread.reg01 = (unsigned long) ret_from_kernel_thread; + p->thread.sched_ra = (unsigned long) ret_from_kernel_thread; memset(childregs, 0, sizeof(struct pt_regs)); childregs->csr_euen = p->thread.csr_euen; childregs->csr_crmd = p->thread.csr_crmd; @@ -158,6 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg03 = (unsigned long) childregs; p->thread.reg01 = (unsigned long) ret_from_fork; + p->thread.sched_ra = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context @@ -176,9 +181,120 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +bool in_task_stack(unsigned long stack, struct task_struct *task, + struct stack_info *info) +{ + unsigned long begin = (unsigned long)task_stack_page(task); + unsigned long end = begin + THREAD_SIZE - 32; + + if (stack < begin || stack >= end) + return false; + + info->type = STACK_TYPE_TASK; + info->begin = begin; + info->end = end; + info->next_sp = 0; + + return true; +} + +bool in_irq_stack(unsigned long stack, struct stack_info *info) +{ + unsigned long nextsp; + unsigned long begin = (unsigned long)this_cpu_read(irq_stack); + unsigned long end = begin + IRQ_STACK_START; + + if (stack < begin || stack >= end) + return false; + + nextsp = *(unsigned long *)end; + if (nextsp & 0x7) + return false; + + info->type = STACK_TYPE_IRQ; + info->begin = begin; + info->end = end; + info->next_sp = nextsp; + + return true; +} + +int get_stack_info(unsigned long stack, struct task_struct *task, + struct stack_info *info) +{ + task = task ? : current; + + if (!stack || stack & 0x7) + goto unknown; + + if (in_task_stack(stack, task, info)) + return 0; + + if (task != current) + goto unknown; + + if (in_irq_stack(stack, info)) + return 0; + +unknown: + info->type = STACK_TYPE_UNKNOWN; + return -EINVAL; +} + unsigned long get_wchan(struct task_struct *task) { - return 0; + unsigned long stack_page; + unsigned long pc; + unsigned long stack_page_end __maybe_unused; + unsigned long frame __maybe_unused; + struct unwind_state state __maybe_unused; + + if (!task || task == current || task->state == TASK_RUNNING) + return 0; + + stack_page = (unsigned long)try_get_task_stack(task); + if (!stack_page) + return 0; + + pc = thread_saved_ra(task); + +#ifdef CONFIG_UNWINDER_GUESS + /* + * Use "guess" unwinder get wchan is not reliable. + * Just calculate a simpler wchan value here. + * Do nothing here. + */ +#else /* CONFIG_UNWINDER_PROLOGUE */ + stack_page_end = stack_page + THREAD_SIZE - 32; + frame = thread_saved_fp(task); + + if (frame >= stack_page_end || frame < stack_page) + goto out; + + memset(&state, 0, sizeof(state)); + state.task = task; + state.sp = frame; + state.stack_info.type = STACK_TYPE_TASK; + state.stack_info.begin = stack_page; + state.stack_info.end = stack_page_end; + state.stack_info.next_sp = 0; + state.enable = true; + state.first = true; + state.pc = pc; + + while (in_sched_functions(pc)) { + if (!unwind_next_frame(&state)) { + pc = 0; + goto out; + } + pc = unwind_get_return_address(&state); + } + +out: +#endif /* CONFIG_UNWINDER_GUESS */ + + put_task_stack(task); + return pc; } unsigned long stack_top(void) diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c new file mode 100644 index 000000000000..8a13d1136288 --- /dev/null +++ b/arch/loongarch/kernel/stacktrace.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Stack trace management functions + * + * Copyright (C) 2006 Atsushi Nemoto + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include + +#include +#include + +typedef bool (*stack_trace_consume_fn)(struct stack_trace *trace, + unsigned long addr); + +static bool consume_entry(struct stack_trace *trace, unsigned long addr) +{ + if (trace->nr_entries >= trace->max_entries) + return false; + + if (trace->skip > 0) { + trace->skip--; + return true; + } + + trace->entries[trace->nr_entries++] = addr; + return trace->nr_entries < trace->max_entries; +} + +static bool consume_entry_nosched(struct stack_trace *trace, + unsigned long addr) +{ + if (in_sched_functions(addr)) + return true; + return consume_entry(trace, addr); +} + +static void save_context_stack(struct task_struct *tsk, + struct stack_trace *trace, + struct pt_regs *regs, + stack_trace_consume_fn fn) +{ + struct pt_regs dummyregs; + struct unwind_state state; + unsigned long addr; + + regs = &dummyregs; + + if (tsk == current) { + regs->csr_era = (unsigned long)__builtin_return_address(0); + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + } else { + regs->csr_era = thread_saved_ra(tsk); + regs->regs[3] = thread_saved_fp(tsk); + } + + regs->regs[1] = 0; + regs->regs[22] = 0; + + for (unwind_start(&state, tsk, regs); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || !fn(trace, addr)) + return; + } +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +void save_stack_trace(struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(current, trace, NULL, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry; + + /* We don't want this function nor the caller */ + trace->skip += 7; + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(current, trace, regs, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); + +void save_stack_trace_tsk(struct task_struct *tsk, + struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry_nosched; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(tsk, trace, NULL, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index ac145f7c1583..df1c46b6cbe3 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -17,7 +17,8 @@ /* * task_struct *__switch_to(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, + * void *sched_ra, void *sched_cfa) */ .align 5 SYM_FUNC_START(__switch_to) @@ -26,6 +27,8 @@ SYM_FUNC_START(__switch_to) cpu_save_nonscratch a0 stptr.d ra, a0, THREAD_REG01 + stptr.d a3, a0, THREAD_SCHED_RA + stptr.d a4, a0, THREAD_SCHED_CFA move tp, a2 cpu_restore_nonscratch a1 diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 571498fcce5e..9cf02d619670 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "access-helper.h" @@ -64,20 +65,24 @@ extern asmlinkage void handle_vint(void); static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { - unsigned long addr; - unsigned long *sp = (unsigned long *)(regs->regs[3] & ~3); + unsigned long pc; + struct unwind_state state; + struct pt_regs *pregs = (struct pt_regs *)regs; - printk("%sCall Trace:", loglvl); -#ifdef CONFIG_KALLSYMS - printk("%s\n", loglvl); + if (!task) + task = current; + + unwind_start(&state, task, pregs); + +#ifdef CONFIG_UNWINDER_PROLOGUE + if (user_mode(regs)) + state.enable = false; #endif - while (!kstack_end(sp)) { - if (__get_addr(&addr, sp++, user)) { - printk("%s (Bad stack address)", loglvl); - break; - } - if (__kernel_text_address(addr)) - print_ip_sym(loglvl, addr); + + printk("%sCall Trace:\n", loglvl); + for (; !unwind_done(&state); unwind_next_frame(&state)) { + pc = unwind_get_return_address(&state); + print_ip_sym(loglvl, pc); } printk("%s\n", loglvl); } diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c new file mode 100644 index 000000000000..a3c47a9e2d8c --- /dev/null +++ b/arch/loongarch/kernel/unwind_guess.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include + +#include +#include + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + if (state->first) + return state->pc; + + return *(unsigned long *)(state->sp); +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + unsigned long addr; + + if (unwind_done(state)) + return false; + + if (state->first) + state->first = false; + + do { + for (state->sp += sizeof(unsigned long); + state->sp < info->end; + state->sp += sizeof(unsigned long)) { + addr = *(unsigned long *)(state->sp); + + if (__kernel_text_address(addr)) + return true; + } + + state->sp = info->next_sp; + + } while (!get_stack_info(state->sp, state->task, info)); + + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + + state->task = task; + + state->sp = regs->regs[3]; + state->pc = regs->csr_era; + state->first = true; + + get_stack_info(state->sp, state->task, &state->stack_info); + + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c new file mode 100644 index 000000000000..687644e0994f --- /dev/null +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include + +#include +#include +#include + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + if (state->enable) { + return state->pc; + } else { + if (state->first) + return state->pc; + + return *(unsigned long *)(state->sp); + } +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +static inline bool is_stack_open_ins(union loongarch_instruction *ip) +{ + /* addi.d $sp, $sp, -imm */ + return ip->reg2i12_format.opcode == addid_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 3 && + ip->reg2i12_format.simmediate < 0; +} + +static inline bool is_ra_save_ins(union loongarch_instruction *ip) +{ + /* st.d $ra, $sp, offset */ + return ip->reg2i12_format.opcode == std_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 1; +} + +static inline bool is_branch_ins(union loongarch_instruction *ip) +{ + return is_branch_insn(*ip); +} + +static bool unwind_by_prologue(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + union loongarch_instruction *ip, *ip_end; + unsigned long frame_size = 0, frame_ra = -1; + unsigned long size, offset, pc = state->pc; + + if (state->sp >= info->end || state->sp < info->begin) + return false; + + if (!kallsyms_lookup_size_offset(pc, &size, &offset)) + return false; + + ip = (union loongarch_instruction *)(pc - offset); + ip_end = (union loongarch_instruction *)pc; + + while (ip < ip_end) { + if (is_stack_open_ins(ip)) { + frame_size = -ip->reg2i12_format.simmediate; + ip++; + break; + } + ip++; + } + + if (!frame_size) { + if (state->first) + goto first; + + return false; + } + + while (ip < ip_end) { + if (is_ra_save_ins(ip)) { + frame_ra = ip->reg2i12_format.simmediate; + break; + } + if (is_branch_ins(ip)) + break; + ip++; + } + + if (frame_ra < 0) { + if (state->first) { + state->sp = state->sp + frame_size; + goto first; + } + return false; + } + + if (state->first) + state->first = false; + + state->pc = *(unsigned long *)(state->sp + frame_ra); + state->sp = state->sp + frame_size; + return !!__kernel_text_address(state->pc); + +first: + state->first = false; + if (state->pc == state->ra) + return false; + + state->pc = state->ra; + + return !!__kernel_text_address(state->ra); +} + +static bool unwind_by_guess(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + unsigned long addr; + + for (state->sp += sizeof(unsigned long); + state->sp < info->end; + state->sp += sizeof(unsigned long)) { + addr = *(unsigned long *)(state->sp); + + if (__kernel_text_address(addr)) + return true; + } + + return false; +} + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + struct pt_regs *regs; + unsigned long pc; + + if (unwind_done(state)) + return false; + + do { + if (state->enable) { + if (unwind_by_prologue(state)) + return true; + + if (info->type == STACK_TYPE_IRQ && + info->end == state->sp) { + regs = (struct pt_regs *)info->next_sp; + pc = regs->csr_era; + if (user_mode(regs) || !__kernel_text_address(pc)) + return false; + + state->pc = pc; + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->first = true; + get_stack_info(state->sp, state->task, info); + + return true; + } + } else { + if (state->first) + state->first = false; + + if (unwind_by_guess(state)) + return true; + } + + state->sp = info->next_sp; + + } while (!get_stack_info(state->sp, state->task, info)); + + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + + if (__kernel_text_address(regs->csr_era)) { + state->enable = true; + } + + state->task = task; + state->pc = regs->csr_era; + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->first = true; + + get_stack_info(state->sp, state->task, &state->stack_info); + + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); -- Gitee From c81072f34af8ba1e43009deff9c2e81141c62c79 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 29/59] LoongArch: Add kenel hacking options support Add some kenel hacking options support for LoongArch, including: sysrq, builtin command line and zboot debugging. Change-Id: I70a27252d435cade03fc407cd809dac3899f5d5b Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig.debug | 68 +++++++++++++++++++++ arch/loongarch/boot/compressed/Makefile | 5 ++ arch/loongarch/boot/compressed/dbg.c | 37 +++++++++++ arch/loongarch/boot/compressed/decompress.c | 5 ++ arch/loongarch/boot/compressed/uart-16550.c | 40 ++++++++++++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/setup.c | 23 +++++++ arch/loongarch/kernel/sysrq.c | 65 ++++++++++++++++++++ 8 files changed, 244 insertions(+) create mode 100644 arch/loongarch/boot/compressed/dbg.c create mode 100644 arch/loongarch/boot/compressed/uart-16550.c create mode 100644 arch/loongarch/kernel/sysrq.c diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 8d90f57cc019..3a0e6385eb32 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -1,3 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +config CMDLINE_BOOL + bool "Built-in kernel command line" + default n + help + For most systems, it is firmware or second stage bootloader that + by default specifies the kernel command line options. However, + it might be necessary or advantageous to either override the + default kernel command line or add a few extra options to it. + For such cases, this option allows you to hardcode your own + command line options directly into the kernel. For that, you + should choose 'Y' here, and fill in the extra boot arguments + in CONFIG_CMDLINE. + + The built-in options will be concatenated to the default command + line if CMDLINE_OVERRIDE is set to 'N'. Otherwise, the default + command line will be ignored and replaced by the built-in string. + + Most LoongArch systems will normally expect 'N' here and rely upon + the command line from the firmware or the second-stage bootloader. + +config CMDLINE + string "Default kernel command string" + depends on CMDLINE_BOOL + default "" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, and for the cases + when you want to add some extra options to the command line or ignore + the default command line, you can supply some command-line options at + build time by entering them here. In other cases you can specify + kernel args so that you don't have to set them up in board prom + initialization routines. + + For more information, see the CMDLINE_BOOL and CMDLINE_OVERRIDE + options. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides firmware arguments" + default n + depends on CMDLINE_BOOL + help + By setting this option to 'Y' you will have your kernel ignore + command line arguments from firmware or second stage bootloader. + Instead, the built-in command line will be used exclusively. + + Normally, you will choose 'N' here. + choice prompt "Choose kernel unwinder" default UNWINDER_PROLOGUE if KALLSYMS @@ -23,3 +72,22 @@ config UNWINDER_PROLOGUE Some of the addresses it reports may be incorrect. endchoice + +config DEBUG_ZBOOT + bool "Enable compressed kernel support debugging" + depends on DEBUG_KERNEL && SYS_SUPPORTS_ZBOOT + default n + help + If you want to add compressed kernel support to a new board, and the + board supports uart16550 compatible serial port, please select + SYS_SUPPORTS_ZBOOT_UART16550 for your board and enable this option to + debug it. + + If your board doesn't support uart16550 compatible serial port, you + can try to select SYS_SUPPORTS_ZBOOT and use the other methods to + debug it. for example, add a new serial port support just as + arch/loongarch/boot/compressed/uart-16550.c does. + + After the compressed kernel support works, please disable this option + to reduce the kernel image size and speed up the booting procedure a + little. diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index e3ed0f54a264..76c5b19cb0e0 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -23,6 +23,11 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o +ifdef CONFIG_DEBUG_ZBOOT +vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o +vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/uart-16550.o +endif + vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o extra-y += ashldi3.c diff --git a/arch/loongarch/boot/compressed/dbg.c b/arch/loongarch/boot/compressed/dbg.c new file mode 100644 index 000000000000..329193f0b059 --- /dev/null +++ b/arch/loongarch/boot/compressed/dbg.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch-specific debug support for pre-boot environment + * + * NOTE: putc() is board specific, if your board have a 16550 compatible uart, + * please select SYS_SUPPORTS_ZBOOT_UART16550 for your machine. othewise, you + * need to implement your own putc(). + */ +#include +#include + +void __weak putc(char c) +{ +} + +void puts(const char *s) +{ + char c; + while ((c = *s++) != '\0') { + putc(c); + if (c == '\n') + putc('\r'); + } +} + +void puthex(unsigned long long val) +{ + + unsigned char buf[10]; + int i; + for (i = 7; i >= 0; i--) { + buf[i] = "0123456789ABCDEF"[val & 0x0F]; + val >>= 4; + } + buf[8] = '\0'; + puts(buf); +} diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c index 099aad621aba..e3076445d2f4 100644 --- a/arch/loongarch/boot/compressed/decompress.c +++ b/arch/loongarch/boot/compressed/decompress.c @@ -22,8 +22,13 @@ unsigned long free_mem_end_ptr; extern unsigned char __image_begin, __image_end; /* debug interfaces */ +#ifdef CONFIG_DEBUG_ZBOOT +extern void puts(const char *s); +extern void puthex(unsigned long long val); +#else #define puts(s) do {} while (0) #define puthex(val) do {} while (0) +#endif void error(char *x) { diff --git a/arch/loongarch/boot/compressed/uart-16550.c b/arch/loongarch/boot/compressed/uart-16550.c new file mode 100644 index 000000000000..98934132103f --- /dev/null +++ b/arch/loongarch/boot/compressed/uart-16550.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * 16550 compatible uart based serial debug support for zboot + */ + +#include +#include + +#include + +#define UART_BASE 0x1fe001e0 +#define PORT(offset) (TO_UNCAC(UART_BASE) + (offset)) + +#ifndef IOTYPE +#define IOTYPE char +#endif + +#ifndef PORT +#error please define the serial port address for your own machine +#endif + +static inline unsigned int serial_in(int offset) +{ + return *((volatile IOTYPE *)PORT(offset)) & 0xFF; +} + +static inline void serial_out(int offset, int value) +{ + *((volatile IOTYPE *)PORT(offset)) = value & 0xFF; +} + +void putc(char c) +{ + int timeout = 1000000; + + while (((serial_in(UART_LSR) & UART_LSR_THRE) == 0) && (timeout-- > 0)) + ; + + serial_out(UART_TX, c); +} diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index dd1cf2d8e77b..315b9d1d436a 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 5f27b1fe88a0..c849dc8b7907 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -45,6 +45,12 @@ struct screen_info screen_info; char __initdata arcs_cmdline[COMMAND_LINE_SIZE]; static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static const char builtin_cmdline[] __initconst = CONFIG_CMDLINE; +#else +static const char builtin_cmdline[] __initconst = ""; +#endif + static int num_standard_resources; static struct resource *standard_resources; @@ -254,6 +260,16 @@ static void __init bootcmdline_append(const char *s, size_t max) static void __init bootcmdline_init(char **cmdline_p) { + /* + * If CMDLINE_OVERRIDE is enabled then initializing the command line is + * trivial - we simply use the built-in command line unconditionally & + * unmodified. + */ + if (IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) { + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + return; + } + boot_command_line[0] = 0; /* @@ -262,6 +278,13 @@ static void __init bootcmdline_init(char **cmdline_p) */ bootcmdline_append(arcs_cmdline, COMMAND_LINE_SIZE); + /* + * If the user specified a built-in command line & we didn't already + * prepend it, we append it to boot_command_line here. + */ + if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) + bootcmdline_append(builtin_cmdline, COMMAND_LINE_SIZE); + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c new file mode 100644 index 000000000000..1f449b1610ca --- /dev/null +++ b/arch/loongarch/kernel/sysrq.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch specific sysrq operations. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + +#include +#include + +/* + * Dump TLB entries on all CPUs. + */ + +static DEFINE_SPINLOCK(show_lock); + +static void sysrq_tlbdump_single(void *dummy) +{ + unsigned long flags; + + spin_lock_irqsave(&show_lock, flags); + + pr_info("CPU%d:\n", smp_processor_id()); + dump_tlb_regs(); + pr_info("\n"); + dump_tlb_all(); + pr_info("\n"); + + spin_unlock_irqrestore(&show_lock, flags); +} + +#ifdef CONFIG_SMP +static void sysrq_tlbdump_othercpus(struct work_struct *dummy) +{ + smp_call_function(sysrq_tlbdump_single, NULL, 0); +} + +static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); +#endif + +static void sysrq_handle_tlbdump(int key) +{ + sysrq_tlbdump_single(NULL); +#ifdef CONFIG_SMP + schedule_work(&sysrq_tlbdump); +#endif +} + +static struct sysrq_key_op sysrq_tlbdump_op = { + .handler = sysrq_handle_tlbdump, + .help_msg = "show-tlbs(x)", + .action_msg = "Show TLB entries", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init loongarch_sysrq_init(void) +{ + return register_sysrq_key('x', &sysrq_tlbdump_op); +} +arch_initcall(loongarch_sysrq_init); -- Gitee From b321f05a658b15a01e5d6ec6ea9270cd724df1da Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 30/59] LoongArch: Add relocatable kernel support Change-Id: Ie1c4e7015301406db7dde0168a71f75ea591bc91 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 61 +++ arch/loongarch/Makefile | 6 + arch/loongarch/Makefile.postlink | 36 ++ arch/loongarch/boot/tools/Makefile | 9 + arch/loongarch/boot/tools/relocs.c | 622 ++++++++++++++++++++++++ arch/loongarch/boot/tools/relocs.h | 34 ++ arch/loongarch/boot/tools/relocs_32.c | 18 + arch/loongarch/boot/tools/relocs_64.c | 18 + arch/loongarch/boot/tools/relocs_main.c | 85 ++++ arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/head.S | 18 + arch/loongarch/kernel/relocate.c | 348 +++++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 20 + scripts/kallsyms.c | 3 +- scripts/link-vmlinux.sh | 2 + 15 files changed, 1281 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/Makefile.postlink create mode 100644 arch/loongarch/boot/tools/Makefile create mode 100644 arch/loongarch/boot/tools/relocs.c create mode 100644 arch/loongarch/boot/tools/relocs.h create mode 100644 arch/loongarch/boot/tools/relocs_32.c create mode 100644 arch/loongarch/boot/tools/relocs_64.c create mode 100644 arch/loongarch/boot/tools/relocs_main.c create mode 100644 arch/loongarch/kernel/relocate.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b9593d5eb1e6..2d6f842e197c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -124,6 +124,7 @@ config MACH_LOONGSON64 select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_RELOCATABLE select ZONE_DMA32 help This enables the support of Loongson 64-bit family of machines. These @@ -152,6 +153,11 @@ config SYS_SUPPORTS_HOTPLUG_CPU config GENERIC_CSUM def_bool y +config SYS_SUPPORTS_RELOCATABLE + bool + help + Selected if the platform supports relocating the kernel. + config SYS_SUPPORTS_HUGETLBFS def_bool y @@ -402,6 +408,61 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config RELOCATABLE + bool "Relocatable kernel" + depends on SYS_SUPPORTS_RELOCATABLE + help + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations make the kernel binary about 15% larger, + but are discarded at runtime + +config RELOCATION_TABLE_SIZE + hex "Relocation table size" + depends on RELOCATABLE + range 0x0 0x01000000 + default "0x00100000" + help + A table of relocation data will be appended to the kernel binary + and parsed at boot to fix up the relocated kernel. + + This option allows the amount of space reserved for the table to be + adjusted, although the default of 1Mb should be ok in most cases. + + The build will fail and a valid size suggested if this is too small. + + If unsure, leave at the default value. + +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + help + Randomizes the physical and virtual address at which the + kernel image is loaded, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using any coprocessor 0 registers available. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET. + + If unsure, say N. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum kASLR offset" if EXPERT + depends on RANDOMIZE_BASE + range 0x0 0x40000000 if 64BIT + range 0x0 0x08000000 + default "0x01000000" + help + When kASLR is active, this provides the maximum offset that will + be applied to the kernel image. It should be set according to the + amount of physical RAM available in the target system minus + PHYSICAL_START and must be a power of 2. + + This is limited by the size of KPRANGE1, 256Mb on 32-bit or 1Gb with + 64-bit. The default is 16Mb. + config NODES_SHIFT int default "6" diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 0fd1884e07bc..d43920ae3bd1 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -5,6 +5,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry + $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs # # Select the object file format to substitute into the linker script. @@ -50,6 +51,10 @@ KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fno-plt -Wa,-mla-global-with-abs,-mla-local-with-abs +ifeq ($(CONFIG_RELOCATABLE),y) +LDFLAGS_vmlinux += --emit-relocs +endif + cflags-y += -ffreestanding # @@ -152,6 +157,7 @@ endif archclean: $(Q)$(MAKE) $(clean)=arch/loongarch/boot $(Q)$(MAKE) $(clean)=arch/loongarch/boot/compressed + $(Q)$(MAKE) $(clean)=arch/loongarch/boot/tools $(Q)$(MAKE) $(clean)=arch/loongarch/tools define archhelp diff --git a/arch/loongarch/Makefile.postlink b/arch/loongarch/Makefile.postlink new file mode 100644 index 000000000000..c01574f47846 --- /dev/null +++ b/arch/loongarch/Makefile.postlink @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# Post-link LoongArch pass +# =========================================================================== +# +# 1. Insert relocations into vmlinux + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include scripts/Kbuild.include + +CMD_RELOCS = arch/loongarch/boot/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $@ + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifeq ($(CONFIG_RELOCATABLE),y) + $(call if_changed,relocs) +endif + +%.ko: FORCE + @true + +clean: + @true + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/loongarch/boot/tools/Makefile b/arch/loongarch/boot/tools/Makefile new file mode 100644 index 000000000000..592e05a51a4a --- /dev/null +++ b/arch/loongarch/boot/tools/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +hostprogs += relocs +relocs-objs += relocs_32.o +relocs-objs += relocs_64.o +relocs-objs += relocs_main.o +PHONY += relocs +relocs: $(obj)/relocs + @: diff --git a/arch/loongarch/boot/tools/relocs.c b/arch/loongarch/boot/tools/relocs.c new file mode 100644 index 000000000000..6fa6f8a64af5 --- /dev/null +++ b/arch/loongarch/boot/tools/relocs.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is included from relocs_32/64.c */ + +#define ElfW(type) _ElfW(ELF_BITS, type) +#define _ElfW(bits, type) __ElfW(bits, type) +#define __ElfW(bits, type) Elf##bits##_##type + +#define Elf_Addr ElfW(Addr) +#define Elf_Ehdr ElfW(Ehdr) +#define Elf_Phdr ElfW(Phdr) +#define Elf_Shdr ElfW(Shdr) +#define Elf_Sym ElfW(Sym) + +static Elf_Ehdr ehdr; + +struct relocs { + uint32_t *offset; + unsigned long count; + unsigned long size; +}; + +static struct relocs relocs; + +struct section { + Elf_Shdr shdr; + struct section *link; + Elf_Sym *symtab; + Elf_Rel *reltab; + char *strtab; + long shdr_offset; +}; +static struct section *secs; + +static const char * const regex_sym_kernel = { +/* Symbols matching these regex's should never be relocated */ + "^(__crc_)", +}; + +static regex_t sym_regex_c; + +static int regex_skip_reloc(const char *sym_name) +{ + return !regexec(&sym_regex_c, sym_name, 0, NULL, 0); +} + +static void regex_init(void) +{ + char errbuf[128]; + int err; + + err = regcomp(&sym_regex_c, regex_sym_kernel, + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c, errbuf, sizeof(errbuf)); + die("%s", errbuf); + } +} + +static const char *rel_type(unsigned type) +{ + static const char * const type_name[] = { +#define REL_TYPE(X)[X] = #X + REL_TYPE(R_LARCH_32), + REL_TYPE(R_LARCH_64), + REL_TYPE(R_LARCH_MARK_LA), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + + if (type < ARRAY_SIZE(type_name) && type_name[type]) + name = type_name[type]; + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + + sec_strtab = secs[ehdr.e_shstrndx].strtab; + if (shndx < ehdr.e_shnum) + name = sec_strtab + secs[shndx].shdr.sh_name; + else if (shndx == SHN_ABS) + name = "ABSOLUTE"; + else if (shndx == SHN_COMMON) + name = "COMMON"; + else + name = ""; + return name; +} + +static struct section *sec_lookup(const char *secname) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) + if (strcmp(secname, sec_name(i)) == 0) + return &secs[i]; + + return NULL; +} + +static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) +{ + const char *name; + + if (sym->st_name) + name = sym_strtab + sym->st_name; + else + name = sec_name(sym->st_shndx); + return name; +} + +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#define le64_to_cpu(val) (val) +#define be16_to_cpu(val) bswap_16(val) +#define be32_to_cpu(val) bswap_32(val) +#define be64_to_cpu(val) bswap_64(val) + +#define cpu_to_le16(val) (val) +#define cpu_to_le32(val) (val) +#define cpu_to_le64(val) (val) +#define cpu_to_be16(val) bswap_16(val) +#define cpu_to_be32(val) bswap_32(val) +#define cpu_to_be64(val) bswap_64(val) + +static uint16_t elf16_to_cpu(uint16_t val) +{ + if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return le16_to_cpu(val); + else + return be16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return le32_to_cpu(val); + else + return be32_to_cpu(val); +} + +static uint32_t cpu_to_elf32(uint32_t val) +{ + if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return cpu_to_le32(val); + else + return cpu_to_be32(val); +} + +#define elf_half_to_cpu(x) elf16_to_cpu(x) +#define elf_word_to_cpu(x) elf32_to_cpu(x) + +#if ELF_BITS == 64 +static uint64_t elf64_to_cpu(uint64_t val) +{ + if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return le64_to_cpu(val); + else + return be64_to_cpu(val); +} +#define elf_addr_to_cpu(x) elf64_to_cpu(x) +#define elf_off_to_cpu(x) elf64_to_cpu(x) +#define elf_xword_to_cpu(x) elf64_to_cpu(x) +#else +#define elf_addr_to_cpu(x) elf32_to_cpu(x) +#define elf_off_to_cpu(x) elf32_to_cpu(x) +#define elf_xword_to_cpu(x) elf32_to_cpu(x) +#endif + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + die("Cannot read ELF header: %s\n", strerror(errno)); + + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) + die("No ELF magic\n"); + + if (ehdr.e_ident[EI_CLASS] != ELF_CLASS) + die("Not a %d bit executable\n", ELF_BITS); + + if ((ehdr.e_ident[EI_DATA] != ELFDATA2LSB) && + (ehdr.e_ident[EI_DATA] != ELFDATA2MSB)) + die("Unknown ELF Endianness\n"); + + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) + die("Unknown ELF version\n"); + + /* Convert the fields to native endian */ + ehdr.e_type = elf_half_to_cpu(ehdr.e_type); + ehdr.e_machine = elf_half_to_cpu(ehdr.e_machine); + ehdr.e_version = elf_word_to_cpu(ehdr.e_version); + ehdr.e_entry = elf_addr_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf_off_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf_off_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf_word_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf_half_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf_half_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf_half_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf_half_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) + die("Unsupported ELF header type\n"); + + if (ehdr.e_machine != ELF_MACHINE) + die("Not for %s\n", ELF_MACHINE_NAME); + + if (ehdr.e_version != EV_CURRENT) + die("Unknown ELF version\n"); + + if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) + die("Bad Elf header size\n"); + + if (ehdr.e_phentsize != sizeof(Elf_Phdr)) + die("Bad program header entry\n"); + + if (ehdr.e_shentsize != sizeof(Elf_Shdr)) + die("Bad section header entry\n"); + + if (ehdr.e_shstrndx >= ehdr.e_shnum) + die("String table index out of bounds\n"); +} + +static void read_shdrs(FILE *fp) +{ + int i; + Elf_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) + die("Unable to allocate %d section headers\n", ehdr.e_shnum); + + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + sec->shdr_offset = ftell(fp); + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf_addr_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf_off_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf_xword_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf_word_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf_word_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf_xword_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; + } +} + +static void read_strtabs(FILE *fp) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_STRTAB) + continue; + + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) + die("malloc of %d bytes for strtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + } +} + +static void read_symtabs(FILE *fp) +{ + int i, j; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) + continue; + + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) + die("malloc of %d bytes for symtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) { + Elf_Sym *sym = &sec->symtab[j]; + + sym->st_name = elf_word_to_cpu(sym->st_name); + sym->st_value = elf_addr_to_cpu(sym->st_value); + sym->st_size = elf_xword_to_cpu(sym->st_size); + sym->st_shndx = elf_half_to_cpu(sym->st_shndx); + } + } +} + +static void read_relocs(FILE *fp) +{ + static unsigned long base; + int i, j; + + if (!base) { + struct section *sec = sec_lookup(".text"); + + if (!sec) + die("Could not find .text section\n"); + + base = sec->shdr.sh_addr; + } + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) + die("malloc of %d bytes for relocs failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + + rel->r_offset = elf_addr_to_cpu(rel->r_offset); + /* Set offset into kernel image */ + rel->r_offset -= base; +#if (ELF_BITS == 32) + rel->r_info = elf_xword_to_cpu(rel->r_info); +#else + /* Convert MIPS64 RELA format - only the symbol + * index needs converting to native endianness + */ + rel->r_info = rel->r_info; +#endif +#if (SHT_REL_TYPE == SHT_RELA) + rel->r_addend = elf_xword_to_cpu(rel->r_addend); +#endif + } + } +} + +static void remove_relocs(FILE *fp) +{ + int i; + Elf_Shdr shdr; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + + /* Set relocation section size to 0, effectively removing it. + * This is necessary due to lack of support for relocations + * in objcopy when creating 32bit elf from 64bit elf. + */ + shdr.sh_size = 0; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot write ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + } +} + +static void add_reloc(struct relocs *r, uint32_t offset, unsigned type) +{ + /* Relocation representation in binary table: + * |76543210|76543210|76543210|76543210| + * | Type | offset from _text >> 2 | + */ + offset >>= 2; + if (offset > 0x00FFFFFF) + die("Kernel image exceeds maximum size for relocation!\n"); + + offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24); + + if (r->count == r->size) { + unsigned long newsize = r->size + 50000; + void *mem = realloc(r->offset, newsize * sizeof(r->offset[0])); + + if (!mem) + die("realloc failed\n"); + + r->offset = mem; + r->size = newsize; + } + r->offset[r->count++] = offset; +} + +static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, + Elf_Sym *sym, const char *symname)) +{ + int i; + + /* Walk through the relocations */ + for (i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf_Sym *sh_symtab; + struct section *sec_applies, *sec_symtab; + int j; + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) + continue; + + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)]; + const char *symname = sym_name(sym_strtab, sym); + + process(sec, rel, sym, symname); + } + } +} + +static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, + const char *symname) +{ + unsigned r_type = ELF_R_TYPE(rel->r_info); + unsigned bind = ELF_ST_BIND(sym->st_info); + + if ((bind == STB_WEAK) && (sym->st_value == 0)) { + /* Don't relocate weak symbols without a target */ + return 0; + } + + if (regex_skip_reloc(symname)) + return 0; + + switch (r_type) { + case R_LARCH_32: + case R_LARCH_64: + case R_LARCH_MARK_LA: + add_reloc(&relocs, rel->r_offset, r_type); + break; + } + + return 0; +} + +static int write_reloc_as_bin(uint32_t v, FILE *f) +{ + unsigned char buf[4]; + + v = cpu_to_elf32(v); + + memcpy(buf, &v, sizeof(uint32_t)); + return fwrite(buf, 1, 4, f); +} + +static int write_reloc_as_text(uint32_t v, FILE *f) +{ + int res; + + res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v); + if (res < 0) + return res; + else + return sizeof(uint32_t); +} + +static void emit_relocs(int as_text, int as_bin, FILE *outf) +{ + int i; + int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin; + int size = 0; + int size_reserved; + struct section *sec_reloc; + + sec_reloc = sec_lookup(".data.reloc"); + if (!sec_reloc) + die("Could not find relocation section\n"); + + size_reserved = sec_reloc->shdr.sh_size; + + /* Collect up the relocations */ + walk_relocs(do_reloc); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + /* Output text to stdout */ + write_reloc = write_reloc_as_text; + outf = stdout; + } else if (as_bin) { + /* Output raw binary to stdout */ + outf = stdout; + } else { + /* Seek to offset of the relocation section. + * Each relocation is then written into the + * vmlinux kernel image. + */ + if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec_reloc->shdr.sh_offset, strerror(errno)); + } + } + + for (i = 0; i < relocs.count; i++) + size += write_reloc(relocs.offset[i], outf); + + /* Print a stop, but only if we've actually written some relocs */ + if (size) + size += write_reloc(0, outf); + + if (size > size_reserved) + /* Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE + * which will fix this problem and allow a bit of headroom + * if more kernel features are enabled + */ + die("Relocations overflow available space!\n" \ + "Please adjust CONFIG_RELOCATION_TABLE_SIZE " \ + "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF); +} + +/* + * As an aid to debugging problems with different linkers + * print summary information about the relocs. + * Since different linkers tend to emit the sections in + * different orders we use the section names in the output. + */ +static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, + const char *symname) +{ + printf("%16s 0x%08x %16s %40s %16s\n", + sec_name(sec->shdr.sh_info), + (unsigned int)rel->r_offset, + rel_type(ELF_R_TYPE(rel->r_info)), + symname, + sec_name(sym->st_shndx)); + return 0; +} + +static void print_reloc_info(void) +{ + printf("%16s %10s %16s %40s %16s\n", + "reloc section", + "offset", + "reloc type", + "symbol", + "symbol section"); + walk_relocs(do_reloc_info); +} + +#if ELF_BITS == 64 +# define process process_64 +#else +# define process process_32 +#endif + +void process(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs) +{ + regex_init(); + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_reloc_info) { + print_reloc_info(); + return; + } + emit_relocs(as_text, as_bin, fp); + if (!keep_relocs) + remove_relocs(fp); +} diff --git a/arch/loongarch/boot/tools/relocs.h b/arch/loongarch/boot/tools/relocs.h new file mode 100644 index 000000000000..1f8c4e46cbde --- /dev/null +++ b/arch/loongarch/boot/tools/relocs.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef RELOCS_H +#define RELOCS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void die(char *fmt, ...); + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +void process_32(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs); +void process_64(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs); + +#endif /* RELOCS_H */ diff --git a/arch/loongarch/boot/tools/relocs_32.c b/arch/loongarch/boot/tools/relocs_32.c new file mode 100644 index 000000000000..fa36eee6c5b7 --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_32.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "relocs.h" + +#define ELF_BITS 32 + +#define ELF_MACHINE EM_LOONGARCH +#define ELF_MACHINE_NAME "LOONGARCH32" +#define SHT_REL_TYPE SHT_REL +#define Elf_Rel ElfW(Rel) + +#define ELF_CLASS ELFCLASS32 +#define ELF_R_SYM(val) ELF32_R_SYM(val) +#define ELF_R_TYPE(val) ELF32_R_TYPE(val) +#define ELF_ST_TYPE(o) ELF32_ST_TYPE(o) +#define ELF_ST_BIND(o) ELF32_ST_BIND(o) +#define ELF_ST_VISIBILITY(o) ELF32_ST_VISIBILITY(o) + +#include "relocs.c" diff --git a/arch/loongarch/boot/tools/relocs_64.c b/arch/loongarch/boot/tools/relocs_64.c new file mode 100644 index 000000000000..1024133af8ec --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_64.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "relocs.h" + +#define ELF_BITS 64 + +#define ELF_MACHINE EM_LOONGARCH +#define ELF_MACHINE_NAME "LOONGARCH64" +#define SHT_REL_TYPE SHT_RELA +#define Elf_Rel Elf64_Rela + +#define ELF_CLASS ELFCLASS64 +#define ELF_R_SYM(val) ELF64_R_SYM(val) +#define ELF_R_TYPE(val) ELF64_R_TYPE(val) +#define ELF_ST_TYPE(o) ELF64_ST_TYPE(o) +#define ELF_ST_BIND(o) ELF64_ST_BIND(o) +#define ELF_ST_VISIBILITY(o) ELF64_ST_VISIBILITY(o) + +#include "relocs.c" diff --git a/arch/loongarch/boot/tools/relocs_main.c b/arch/loongarch/boot/tools/relocs_main.c new file mode 100644 index 000000000000..e2453a564b11 --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_main.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "relocs.h" + +void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static void usage(void) +{ + die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_reloc_info, as_text, as_bin, keep_relocs; + const char *fname; + FILE *fp; + int i; + unsigned char e_ident[EI_NIDENT]; + + show_reloc_info = 0; + as_text = 0; + as_bin = 0; + keep_relocs = 0; + fname = NULL; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + + if (*arg == '-') { + if (strcmp(arg, "--reloc-info") == 0) { + show_reloc_info = 1; + continue; + } + if (strcmp(arg, "--text") == 0) { + as_text = 1; + continue; + } + if (strcmp(arg, "--bin") == 0) { + as_bin = 1; + continue; + } + if (strcmp(arg, "--keep") == 0) { + keep_relocs = 1; + continue; + } + } else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) + usage(); + + fp = fopen(fname, "r+"); + if (!fp) + die("Cannot open %s: %s\n", fname, strerror(errno)); + + if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT) + die("Cannot read %s: %s", fname, strerror(errno)); + + rewind(fp); + if (e_ident[EI_CLASS] == ELFCLASS64) + process_64(fp, as_text, as_bin, show_reloc_info, keep_relocs); + else + process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs); + fclose(fp); + return 0; +} diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 315b9d1d436a..97ccc933bf51 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -23,6 +23,8 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o +obj-$(CONFIG_RELOCATABLE) += relocate.o + obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 6e816b5369c0..7c47592d1aa3 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -65,7 +65,25 @@ SYM_CODE_START(kernel_entry) # kernel entry point set_saved_sp sp, t0, t1 PTR_ADDIU sp, sp, -4 * SZREG # init stack pointer +#ifdef CONFIG_RELOCATABLE + /* Copy kernel and apply the relocations */ + bl relocate_kernel + + /* Repoint the sp into the new kernel image */ + PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) + PTR_ADDU sp, sp, tp + set_saved_sp sp, t0, t1 + PTR_ADDIU sp, sp, -4 * SZREG # init stack pointer + + /* + * relocate_kernel returns the entry point either + * in the relocated kernel or the original if for + * some reason relocation failed. + */ + jirl zero, v0, 0 +#else bl start_kernel +#endif SYM_CODE_END(kernel_entry) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c new file mode 100644 index 000000000000..63fc0b2ea52f --- /dev/null +++ b/arch/loongarch/kernel/relocate.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for Kernel relocation at boot time + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Authors: Huacai Chen (chenhuacai@loongson.cn) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RELOCATED(x) ((void *)((long)x + offset)) + +extern u32 _relocation_start[]; /* End kernel image / start relocation table */ +extern u32 _relocation_end[]; /* End relocation table */ + +extern long __start___ex_table; /* Start exception table */ +extern long __stop___ex_table; /* End exception table */ + +static int __init apply_r_loongarch_32_rel(u32 *loc_orig, u32 *loc_new, long offset) +{ + *loc_new += offset; + + return 0; +} + +static int __init apply_r_loongarch_64_rel(u32 *loc_orig, u32 *loc_new, long offset) +{ + *(u64 *)loc_new += offset; + + return 0; +} + +/* + * The details about la.abs $r1, x on LoongArch + * + * lu12i.w $r1, 0 + * ori $r1, $r1, 0x0 + * lu32i.d $r1, 0 + * lu52i.d $r1, $r1, 0 + * + * LoongArch use lu12i.w, ori, lu32i.d, lu52i.d to load a 64bit imm. + * lu12i.w load bit31~bit12, ori load bit12~bit0, + * lu32i.d load bit51~bit32, lu32i.d load bit63~bit52 + */ +#define ORI_IMMMSK 0xfff +#define ORI_IMMPOS 10 +#define LU12I_IMMMSK 0xfffff +#define LU12I_IMMPOS 5 +#define LU12I_OFFSET 12 +#define LU32I_IMMMSK 0xfffff +#define LU32I_IMMPOS 5 +#define LU32I_OFFSET 32 +#define LU52I_IMMMSK 0xfff +#define LU52I_IMMPOS 10 +#define LU52I_OFFSET 52 +#define RD_MASK 0x1f + +static int __init apply_r_loongarch_mark_la_rel(u32 *loc_orig, u32 *loc_new, long offset) +{ + unsigned int ins, rd; + unsigned long long dest; + + ins = loc_new[1]; + rd = ins & RD_MASK; + dest = (ins >> ORI_IMMPOS) & ORI_IMMMSK; + ins = loc_new[0]; + dest |= ((ins >> LU12I_IMMPOS) & LU12I_IMMMSK) << LU12I_OFFSET; + ins = loc_new[2]; + dest |= ((unsigned long long)((ins >> LU32I_IMMPOS) & LU32I_IMMMSK)) << LU32I_OFFSET; + ins = loc_new[3]; + dest |= ((unsigned long long)((ins >> LU52I_IMMPOS) & LU52I_IMMMSK)) << LU52I_OFFSET; + dest += offset; + + loc_new[0] = (loc_new[0] & ~(LU12I_IMMMSK << LU12I_IMMPOS)) | (((dest >> LU12I_OFFSET) & LU12I_IMMMSK) << LU12I_IMMPOS); + loc_new[1] = (loc_new[1] & ~(ORI_IMMMSK << ORI_IMMPOS)) | ((dest & ORI_IMMMSK) << ORI_IMMPOS); + loc_new[2] = (loc_new[2] & ~(LU32I_IMMMSK << LU32I_IMMPOS)) | (((dest >> LU32I_OFFSET) & LU32I_IMMMSK) << LU32I_IMMPOS); + loc_new[3] = (loc_new[3] & ~(LU52I_IMMMSK << LU52I_IMMPOS)) | (((dest >> LU52I_OFFSET) & LU52I_IMMMSK) << LU52I_IMMPOS); + + return 0; +} + +static int (*reloc_handlers_rel[]) (u32 *, u32 *, long) __initdata = { + [R_LARCH_32] = apply_r_loongarch_32_rel, + [R_LARCH_64] = apply_r_loongarch_64_rel, + [R_LARCH_MARK_LA] = apply_r_loongarch_mark_la_rel, +}; + +int __init do_relocations(void *kbase_old, void *kbase_new, long offset) +{ + u32 *r; + u32 *loc_orig; + u32 *loc_new; + int type; + int res; + + for (r = _relocation_start; r < _relocation_end; r++) { + /* Sentinel for last relocation */ + if (*r == 0) + break; + + type = (*r >> 24) & 0xff; + loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2)); + loc_new = RELOCATED(loc_orig); + + if (reloc_handlers_rel[type] == NULL) { + /* Unsupported relocation */ + pr_err("Unhandled relocation type %d at 0x%pK\n", + type, loc_orig); + return -ENOEXEC; + } + + res = reloc_handlers_rel[type](loc_orig, loc_new, offset); + if (res) + return res; + } + + return 0; +} + +/* + * The exception table is filled in by the relocs tool after vmlinux is linked. + * It must be relocated separately since there will not be any relocation + * information for it filled in by the linker. + */ +static int __init relocate_exception_table(long offset) +{ +#ifdef CONFIG_BUILDTIME_TABLE_SORT + unsigned long *etable_start, *etable_end, *e; + + etable_start = RELOCATED(&__start___ex_table); + etable_end = RELOCATED(&__stop___ex_table); + + for (e = etable_start; e < etable_end; e++) + *e += offset; +#endif + return 0; +} + +#ifdef CONFIG_RANDOMIZE_BASE + +static inline __init unsigned long rotate_xor(unsigned long hash, + const void *area, size_t size) +{ + size_t i, diff; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +static inline __init unsigned long get_random_boot(void) +{ + unsigned long entropy = random_get_entropy(); + unsigned long hash = 0; + + /* Attempt to create a simple but unpredictable starting entropy. */ + hash = rotate_xor(hash, linux_banner, strlen(linux_banner)); + + /* Add in any runtime entropy we can get */ + hash = rotate_xor(hash, &entropy, sizeof(entropy)); + + return hash; +} + +static inline __init bool kaslr_disabled(void) +{ + char *str; + +#if defined(CONFIG_CMDLINE_BOOL) + const char *builtin_cmdline = CONFIG_CMDLINE; + + str = strstr(builtin_cmdline, "nokaslr"); + if (str == builtin_cmdline || + (str > builtin_cmdline && *(str - 1) == ' ')) + return true; +#endif + str = strstr(arcs_cmdline, "nokaslr"); + if (str == arcs_cmdline || (str > arcs_cmdline && *(str - 1) == ' ')) + return true; + + return false; +} + +static inline void __init *determine_relocation_address(void) +{ + /* Choose a new address for the kernel */ + unsigned long kernel_length; + void *dest = &_text; + unsigned long offset; + + if (kaslr_disabled()) + return dest; + + kernel_length = (long)_end - (long)(&_text); + + offset = get_random_boot() << 16; + offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1); + if (offset < kernel_length) + offset += ALIGN(kernel_length, 0xffff); + + return RELOCATED(dest); +} + +#else + +static inline void __init *determine_relocation_address(void) +{ + /* + * Choose a new address for the kernel + * For now we'll hard code the destination + */ + return (void *)(CAC_BASE + 0x02000000); +} + +#endif + +static inline int __init relocation_addr_valid(void *loc_new) +{ + if ((unsigned long)loc_new & 0x0000ffff) { + /* Inappropriately aligned new location */ + return 0; + } + if ((unsigned long)loc_new < (unsigned long)&_end) { + /* New location overlaps original kernel */ + return 0; + } + return 1; +} + +static inline void __init update_kaslr_offset(unsigned long *addr, long offset) +{ + unsigned long *new_addr = (unsigned long *)RELOCATED(addr); + + *new_addr = (unsigned long)offset; +} + +void *__init relocate_kernel(void) +{ + void *loc_new; + unsigned long kernel_length; + unsigned long bss_length; + long offset = 0; + int res = 1; + /* Default to original kernel entry point */ + void *kernel_entry = start_kernel; + + /* Get the command line */ + fw_init_cmdline(); + + kernel_length = (long)(&_relocation_start) - (long)(&_text); + bss_length = (long)&__bss_stop - (long)&__bss_start; + + loc_new = determine_relocation_address(); + + /* Sanity check relocation address */ + if (relocation_addr_valid(loc_new)) + offset = (unsigned long)loc_new - (unsigned long)(&_text); + + /* Reset the command line now so we don't end up with a duplicate */ + arcs_cmdline[0] = '\0'; + + if (offset) { + /* Copy the kernel to it's new location */ + memcpy(loc_new, &_text, kernel_length); + + /* Perform relocations on the new kernel */ + res = do_relocations(&_text, loc_new, offset); + if (res < 0) + goto out; + + /* Sync the caches ready for execution of new kernel */ + asm volatile ( + " ibar 0 \n" + " dbar 0 \n"); + + res = relocate_exception_table(offset); + if (res < 0) + goto out; + + /* + * The original .bss has already been cleared, and + * some variables such as command line parameters + * stored to it so make a copy in the new location. + */ + memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length); + + /* The current thread is now within the relocated image */ + __current_thread_info = RELOCATED(__current_thread_info); + + /* Return the new kernel's entry point */ + kernel_entry = RELOCATED(start_kernel); + + update_kaslr_offset(&__kaslr_offset, offset); + } +out: + return kernel_entry; +} + +/* + * Show relocation information on panic. + */ +void show_kernel_relocation(const char *level) +{ + if (__kaslr_offset > 0) { + printk(level); + pr_cont("Kernel relocated by 0x%pK\n", (void *)__kaslr_offset); + pr_cont(" .text @ 0x%pK\n", _text); + pr_cont(" .data @ 0x%pK\n", _sdata); + pr_cont(" .bss @ 0x%pK\n", __bss_start); + } +} + +static int kernel_location_notifier_fn(struct notifier_block *self, + unsigned long v, void *p) +{ + show_kernel_relocation(KERN_EMERG); + return NOTIFY_DONE; +} + +static struct notifier_block kernel_location_notifier = { + .notifier_call = kernel_location_notifier_fn +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_location_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 9a919f512d06..8148cb7d7431 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -74,6 +74,26 @@ SECTIONS PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT) #endif +#ifdef CONFIG_RELOCATABLE + . = ALIGN(4); + + .data.reloc : { + _relocation_start = .; + /* + * Space for relocation table + * This needs to be filled so that the + * relocs tool can overwrite the content. + * An invalid value is left at the start of the + * section to abort relocation if the table + * has not been filled in. + */ + LONG(0xFFFFFFFF); + FILL(0); + . += CONFIG_RELOCATION_TABLE_SIZE - 4; + _relocation_end = .; + } +#endif + /* * Align to 64K in attempt to eliminate holes before the * .bss..swapper_pg_dir section at the start of .bss. This diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 54ad86d13784..dd700c587f5d 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -108,7 +108,8 @@ static bool is_ignored_symbol(const char *name, char type) /* Symbol names that begin with the following are ignored.*/ static const char * const ignored_prefixes[] = { "$", /* local symbols for ARM, MIPS, etc. */ - ".LASANPC", /* s390 kasan local symbols */ + "L0", /* LoongArch local symbols */ + ".L", /* LoongArch/S390 local symbols */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 6eded325c837..3e9a8c2843c3 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -99,8 +99,10 @@ vmlinux_link() # The kallsyms linking does not need debug symbols included. if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then + if [ -z "CONFIG_LOONGARCH" ] ; then strip_debug=-Wl,--strip-debug fi + fi if [ "${SRCARCH}" != "um" ]; then objects="--whole-archive \ -- Gitee From 1689fa1e6db9755492c268a922a0a99e06750830 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 31/59] LoongArch: Add kexec/kdump support Change-Id: I1d19f1c7fa40286b7e96487330c4288a9c258669 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 40 +++++ arch/loongarch/include/asm/kexec.h | 52 ++++++ arch/loongarch/kernel/Makefile | 3 + arch/loongarch/kernel/crash.c | 105 ++++++++++++ arch/loongarch/kernel/crash_dump.c | 45 +++++ arch/loongarch/kernel/machine_kexec.c | 212 ++++++++++++++++++++++++ arch/loongarch/kernel/relocate_kernel.S | 212 ++++++++++++++++++++++++ arch/loongarch/kernel/setup.c | 176 ++++++++++++++++++++ arch/loongarch/kernel/traps.c | 4 + arch/loongarch/loongson64/reset.c | 106 ++++++++++++ 10 files changed, 955 insertions(+) create mode 100644 arch/loongarch/include/asm/kexec.h create mode 100644 arch/loongarch/kernel/crash.c create mode 100644 arch/loongarch/kernel/crash_dump.c create mode 100644 arch/loongarch/kernel/machine_kexec.c create mode 100644 arch/loongarch/kernel/relocate_kernel.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2d6f842e197c..9bc93eb9ce81 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -583,6 +583,46 @@ config NR_CPUS source "kernel/Kconfig.hz" +config KEXEC + bool "Kexec system call" + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. As of this writing the exact hardware + interface is strongly in flux, so no good recommendation can be + made. + +config CRASH_DUMP + bool "Kernel crash dumps" + help + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or firmware using + PHYSICAL_START. + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" + default "0xa4000000" if 32BIT + default "0x9000000004000000" if 64BIT + depends on CRASH_DUMP + help + This gives the KPRANGE1 or XKPRANGE address where the kernel is loaded. + If you plan to use kernel for capturing the crash dump change + this value to start of the reserved region (the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel). + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h new file mode 100644 index 000000000000..e3986500d7c2 --- /dev/null +++ b/arch/loongarch/include/asm/kexec.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kexec.h for kexec + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _ASM_KEXEC_H +#define _ASM_KEXEC_H + +#include + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + /* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +/* Reserve 3*4096 bytes for board-specific info */ +#define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH +#define MAX_NOTE_BYTES 1024 + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) + memcpy(newregs, oldregs, sizeof(*newregs)); + else + prepare_frametrace(newregs); +} + +#ifdef CONFIG_KEXEC +struct kimage; +extern unsigned long kexec_args[4]; +extern const size_t relocate_new_kernel_size; +extern int (*_machine_kexec_prepare)(struct kimage *); +extern void (*_machine_kexec_shutdown)(void); +extern void (*_machine_crash_shutdown)(struct pt_regs *regs); +extern void default_machine_crash_shutdown(struct pt_regs *regs); +extern void kexec_reboot(void); +#ifdef CONFIG_SMP +extern const unsigned char kexec_smp_wait[]; +extern unsigned long secondary_kexec_args[4]; +extern atomic_t kexec_ready_to_reboot; +extern void (*_crash_smp_send_stop)(void); +#endif +#endif + +#endif /* !_ASM_KEXEC_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 97ccc933bf51..57b670f4a0d5 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -25,6 +25,9 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o + obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/crash.c b/arch/loongarch/kernel/crash.c new file mode 100644 index 000000000000..a515ef5f415f --- /dev/null +++ b/arch/loongarch/kernel/crash.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This keeps a track of which one is crashing cpu. */ +static int crashing_cpu = -1; +static cpumask_t cpus_in_crash = CPU_MASK_NONE; + +#ifdef CONFIG_SMP +static void crash_shutdown_secondary(void *passed_regs) +{ + struct pt_regs *regs = passed_regs; + int cpu = smp_processor_id(); + + /* + * If we are passed registers, use those. Otherwise get the + * regs from the last interrupt, which should be correct, as + * we are in an interrupt. But if the regs are not there, + * pull them from the top of the stack. They are probably + * wrong, but we need something to keep from crashing again. + */ + if (!regs) + regs = get_irq_regs(); + if (!regs) + regs = task_pt_regs(current); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_in_crash); + + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); + + unreachable(); +} + +static void crash_kexec_prepare_cpus(void) +{ + static int cpus_stopped; + unsigned int msecs; + unsigned int ncpus; + + if (cpus_stopped) + return; + + ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + + smp_call_function(crash_shutdown_secondary, NULL, 0); + smp_wmb(); + + /* + * The crash CPU sends an IPI and wait for other CPUs to + * respond. Delay of at least 10 seconds. + */ + pr_emerg("Sending IPI to other cpus...\n"); + msecs = 10000; + while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { + cpu_relax(); + mdelay(1); + } + + cpus_stopped = 1; +} + +/* Override the weak function in kernel/panic.c */ +void crash_smp_send_stop(void) +{ + if (_crash_smp_send_stop) + _crash_smp_send_stop(); + + crash_kexec_prepare_cpus(); +} + +#else /* !defined(CONFIG_SMP) */ +static void crash_kexec_prepare_cpus(void) {} +#endif /* !defined(CONFIG_SMP) */ + +void default_machine_crash_shutdown(struct pt_regs *regs) +{ + local_irq_disable(); + crashing_cpu = smp_processor_id(); + crash_save_cpu(regs, crashing_cpu); + crash_kexec_prepare_cpus(); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); +} diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c new file mode 100644 index 000000000000..0f3fac5b0777 --- /dev/null +++ b/arch/loongarch/kernel/crash_dump.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +/* + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (!userbuf) { + memcpy(buf, vaddr + offset, csize); + } else { + if (copy_to_user(buf, vaddr + offset, csize)) { + memunmap(vaddr); + csize = -EFAULT; + } + } + + memunmap(vaddr); + + return csize; +} diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c new file mode 100644 index 000000000000..9c627eb83008 --- /dev/null +++ b/arch/loongarch/kernel/machine_kexec.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * machine_kexec.c for kexec + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +#include +#include + +extern const unsigned char relocate_new_kernel[]; +extern const size_t relocate_new_kernel_size; + +extern unsigned long kexec_start_address; +extern unsigned long kexec_indirection_page; + +static unsigned long reboot_code_buffer; +int (*_machine_kexec_prepare)(struct kimage *) = NULL; +void (*_machine_kexec_shutdown)(void) = NULL; +void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL; +#ifdef CONFIG_SMP +void (*relocated_kexec_smp_wait) (void *); +void (*_crash_smp_send_stop)(void) = NULL; +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0); +#endif + +static void kexec_image_info(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("kexec kimage info:\n"); + pr_debug(" type: %d\n", kimage->type); + pr_debug(" start: %lx\n", kimage->start); + pr_debug(" head: %lx\n", kimage->head); + pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + (unsigned long)kimage->segment[i].memsz, + (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); + } +} + +int +machine_kexec_prepare(struct kimage *kimage) +{ + kexec_image_info(kimage); + + if (_machine_kexec_prepare) + return _machine_kexec_prepare(kimage); + + return 0; +} + +void +machine_kexec_cleanup(struct kimage *kimage) +{ +} + +#ifdef CONFIG_SMP +static void kexec_shutdown_secondary(void *param) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); + + unreachable(); +} +#endif + +void +machine_shutdown(void) +{ + if (_machine_kexec_shutdown) + _machine_kexec_shutdown(); + +#ifdef CONFIG_SMP + smp_call_function(kexec_shutdown_secondary, NULL, 0); + + while (num_online_cpus() > 1) { + cpu_relax(); + mdelay(1); + } +#endif +} + +void +machine_crash_shutdown(struct pt_regs *regs) +{ + if (_machine_crash_shutdown) + _machine_crash_shutdown(regs); + else + default_machine_crash_shutdown(regs); +} + +void kexec_reboot(void) +{ + void (*do_kexec)(void) __noreturn; + + /* + * We know we were online, and there will be no incoming IPIs at + * this point. Mark online again before rebooting so that the crash + * analysis tool will see us correctly. + */ + set_cpu_online(smp_processor_id(), true); + + /* Ensure remote CPUs observe that we're online before rebooting. */ + smp_mb(); + +#ifdef CONFIG_SMP + if (smp_processor_id() > 0) { + /* + * Instead of cpu_relax() or wait, this is needed for kexec + * smp reboot. Kdump usually doesn't require an smp new + * kernel, but kexec may do. + */ + local_flush_icache_range((unsigned long)relocated_kexec_smp_wait, + reboot_code_buffer + relocate_new_kernel_size); + + relocated_kexec_smp_wait(NULL); + + unreachable(); + } +#endif + + /* + * Make sure we get correct instructions written by the + * machine_kexec() CPU. + */ + local_flush_icache_range(reboot_code_buffer, + reboot_code_buffer + relocate_new_kernel_size); + + do_kexec = (void *)reboot_code_buffer; + do_kexec(); + + unreachable(); +} + +void +machine_kexec(struct kimage *image) +{ + unsigned long entry; + unsigned long *ptr; + + reboot_code_buffer = + (unsigned long)page_address(image->control_code_page); + + kexec_start_address = + (unsigned long) phys_to_virt(image->start); + + if (image->type == KEXEC_TYPE_DEFAULT) { + kexec_indirection_page = + (unsigned long) phys_to_virt(image->head & PAGE_MASK); + } else { + kexec_indirection_page = (unsigned long)&image->head; + } + + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + /* + * The generic kexec code builds a page list with physical + * addresses. they are directly accessible through KPRANGEx + * (or XKPRANGE if on 64bit system), hence the + * phys_to_virt() call. + */ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt(entry & PAGE_MASK) : ptr + 1) { + if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION || + *ptr & IND_DESTINATION) + *ptr = (unsigned long) phys_to_virt(*ptr); + } + + /* Mark offline BEFORE disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + + /* + * we do not want to be bothered. + */ + local_irq_disable(); + + printk("Will call new kernel at %08lx\n", image->start); + printk("Bye ...\n"); + /* Make reboot code buffer available to the boot CPU. */ + flush_cache_all(); +#ifdef CONFIG_SMP + /* All secondary cpus now may jump to kexec_wait cycle */ + relocated_kexec_smp_wait = reboot_code_buffer + + (void *)(kexec_smp_wait - relocate_new_kernel); + smp_wmb(); + atomic_set(&kexec_ready_to_reboot, 1); +#endif + kexec_reboot(); +} diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S new file mode 100644 index 000000000000..42f32bf484e0 --- /dev/null +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * relocate_kernel.S for kexec + * Created by on Thu Oct 12 17:49:57 2006 + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include + +SYM_CODE_START(relocate_new_kernel) + la t0, arg0 + PTR_L a0, t0, 0 + la t0, arg1 + PTR_L a1, t0, 0 + la t0, arg2 + PTR_L a2, t0, 0 + la t0, arg3 + PTR_L a3, t0, 0 + + la t0, kexec_indirection_page + PTR_L s0, t0, 0 + la t0, kexec_start_address + PTR_L s1, t0, 0 + +process_entry: + PTR_L s2, s0, 0 + PTR_ADDIU s0, s0, SZREG + + /* + * In case of a kdump/crash kernel, the indirection page is not + * populated as the kernel is directly copied to a reserved location + */ + beqz s2, done + + /* destination page */ + andi s3, s2, 0x1 + beq s3, zero, 1f + li.w t0, ~0x1 + and s4, s2, t0 /* store destination addr in s4 */ + b process_entry + +1: + /* indirection page, update s0 */ + andi s3, s2, 0x2 + beq s3, zero, 1f + li.w t0, ~0x2 + and s0, s2, t0 + b process_entry + +1: + /* done page */ + andi s3, s2, 0x4 + beq s3, zero, 1f + b done +1: + /* source page */ + andi s3, s2, 0x8 + beq s3, zero, process_entry + li.w t0, ~0x8 + and s2, s2, t0 + li.w s6, (1 << _PAGE_SHIFT) / SZREG + +copy_word: + /* copy page word by word */ + REG_L s5, s2, 0 + REG_S s5, s4, 0 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 + beq s6, zero, process_entry + b copy_word + b process_entry + +done: +#ifdef CONFIG_SMP + /* + * kexec_flag reset is signal to other CPUs what kernel + * was moved to it's location. Note - we need relocated + * address of kexec_flag. + */ + bl 1f + 1: move t1,ra; + la t2,1b + la t0,kexec_flag + PTR_SUBU t0,t0,t2; + PTR_ADDU t0,t1,t0; + LONG_S zero,t0,0 +#endif + + dbar 0 + /* jump to kexec_start_address */ + jr s1 +SYM_CODE_END(relocate_new_kernel) + +#ifdef CONFIG_SMP +/* + * Other CPUs should wait until code is relocated and + * then start at entry (?) point. + */ +SYM_CODE_START(kexec_smp_wait) + la t0, s_arg0 + PTR_L a0, t0, 0 + la t0, s_arg1 + PTR_L a1, t0, 0 + la t0, s_arg2 + PTR_L a2, t0, 0 + la t0, s_arg3 + PTR_L a3, t0, 0 + la t0, kexec_start_address + PTR_L s1, t0, 0 + + /* + * Non-relocated address works for args and kexec_start_address (old + * kernel is not overwritten). But we need relocated address of + * kexec_flag. + */ + bl 1f +1: move t1,ra; + la t2,1b + la t0,kexec_flag + PTR_SUBU t0,t0,t2; + PTR_ADDU t0,t1,t0; + +1: LONG_L s0, t0, 0 + bne s0, zero,1b + + dbar 0 + +#ifdef CONFIG_CPU_LOONGSON64 + /* + * s1:initfn + * a0:base t1:cpuid t2:node t3:core t4:count + */ + csrrd t1, LOONGARCH_CSR_CPUID + andi t1, t1, CSR_CPUID_COREID + andi t3, t1, 0x3 + slli.w t3, t3, 8 /* get core id */ + or a0, a0, t3 + andi t2, t1, 0x3c + slli.d t2, t2, 42 /* get node id */ + or a0, a0, t2 +1: li.w t4, 0x100 /* wait for init loop */ +2: addi.w t4, t4, -1 /* limit mailbox access */ + bnez t4, 2b + ld.w s1, a0, 0x20 /* check PC as an indicator */ + beqz s1, 1b + ld.d s1, a0, 0x20 /* get PC via mailbox */ + ld.d sp, a0, 0x28 /* get SP via mailbox */ + ld.d tp, a0, 0x30 /* get TP via mailbox */ + ld.d a1, a0, 0x38 +#endif + + jr s1 /* jump to initial PC */ +SYM_CODE_END(kexec_smp_wait) +#endif + +#ifdef __loongarch64 + /* all PTR's must be aligned to 8 byte in 64-bit mode */ + .align 3 +#endif + +/* + * All parameters to new kernel are passed in registers a0-a3. + * kexec_args[0..3] are used to prepare register values. + */ +SYM_DATA_START(kexec_args) +arg0: PTR 0x0 +arg1: PTR 0x0 +arg2: PTR 0x0 +arg3: PTR 0x0 +SYM_DATA_END(kexec_args) + +#ifdef CONFIG_SMP +/* + * Secondary CPUs may have different kernel parameters in + * their registers a0-a3. secondary_kexec_args[0..3] are used + * to prepare register values. + */ +SYM_DATA_START(secondary_kexec_args) +s_arg0: PTR 0x0 +s_arg1: PTR 0x0 +s_arg2: PTR 0x0 +s_arg3: PTR 0x0 +SYM_DATA_END(secondary_kexec_args) + +SYM_DATA_START(kexec_flag) + LONG 0x1 +SYM_DATA_END(kexec_flag) + +#endif + +SYM_DATA_START(kexec_start_address) + PTR 0x0 +SYM_DATA_END(kexec_start_address) + +SYM_DATA_START(kexec_indirection_page) + PTR 0 +SYM_DATA_END(kexec_indirection_page) + +relocate_new_kernel_end: + +SYM_DATA_START(relocate_new_kernel_size) + PTR relocate_new_kernel_end - relocate_new_kernel +SYM_DATA_END(relocate_new_kernel_size) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index c849dc8b7907..bbae18d8a39d 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -36,6 +38,10 @@ EXPORT_SYMBOL(cpu_data); struct screen_info screen_info; #endif +#ifdef CONFIG_CRASH_DUMP +static phys_addr_t crashmem_start, crashmem_size; +#endif + /* * Setup information * @@ -197,6 +203,13 @@ static int __init early_parse_mem(char *p) memblock_add(start, size); +#ifdef CONFIG_CRASH_DUMP + if (start && size) { + crashmem_start = start; + crashmem_size = size; + } +#endif + return 0; } early_param("mem", early_parse_mem); @@ -236,6 +249,146 @@ static int __init early_parse_memmap(char *p) } early_param("memmap", early_parse_memmap); +#ifdef CONFIG_PROC_VMCORE +unsigned long setup_elfcorehdr, setup_elfcorehdr_size; +static int __init early_parse_elfcorehdr(char *p) +{ + u64 i; + phys_addr_t start, end; + + setup_elfcorehdr = memparse(p, &p); + + for_each_mem_range(i, &start, &end) { + if (setup_elfcorehdr >= start && setup_elfcorehdr < end) { + /* + * Reserve from the elf core header to the end of + * the memory segment, that should all be kdump + * reserved memory. + */ + setup_elfcorehdr_size = end - setup_elfcorehdr; + break; + } + } + /* + * If we don't find it in the memory map, then we shouldn't + * have to worry about it, as the new kernel won't use it. + */ + return 0; +} +early_param("elfcorehdr", early_parse_elfcorehdr); +#endif + +#ifdef CONFIG_KEXEC + +/* 64M alignment for crash kernel regions */ +#define CRASH_ALIGN SZ_64M +#define CRASH_ADDR_MAX SZ_512M + +static void __init loongarch_parse_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = memblock_phys_mem_size(); + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + + if (crash_base <= 0) { + crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_MAX, + crash_size, CRASH_ALIGN); + if (!crash_base) { + pr_warn("crashkernel reservation failed - No suitable area found.\n"); + return; + } + } else { + unsigned long long start; + + start = memblock_find_in_range(crash_base, crash_base + crash_size, + crash_size, 1); + if (start != crash_base) { + pr_warn("Invalid memory region reserved for crash kernel\n"); + return; + } + } + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init request_crashkernel(struct resource *res) +{ + int ret; + + if (crashk_res.start == crashk_res.end) + return; + + ret = request_resource(res, &crashk_res); + if (!ret) + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", + (unsigned long)((crashk_res.end - + crashk_res.start + 1) >> 20), + (unsigned long)(crashk_res.start >> 20)); +} +#else /* !defined(CONFIG_KEXEC) */ +static void __init loongarch_parse_crashkernel(void) +{ +} + +static void __init request_crashkernel(struct resource *res) +{ +} +#endif /* !defined(CONFIG_KEXEC) */ + +/* Traditionally, LoongArch's contiguous low memory is 256M, so crashkernel=X@Y + * is unable to be large enough in some cases. Thus, if the total memory of a + * node is more than 1GB, we reserve the top 256MB for the capture kernel */ +static void reserve_crashm_region(int node, unsigned long s0, unsigned long e0) +{ +#ifdef CONFIG_KEXEC + if (crashk_res.start == crashk_res.end) + return; + + if ((e0 - s0) <= (SZ_1G >> PAGE_SHIFT)) + return; + + s0 = e0 - (SZ_256M >> PAGE_SHIFT); + + memblock_reserve(PFN_PHYS(s0), (e0 - s0) << PAGE_SHIFT); +#endif +} + +/* + * After the kdump operation is performed to enter the capture kernel, the + * memory area used by the previous production kernel should be reserved to + * avoid destroy to the captured data. + */ +static void reserve_oldmem_region(int node, unsigned long s0, unsigned long e0) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long s1, e1; + + if (!is_kdump_kernel()) + return; + + if ((e0 - s0) > (SZ_1G >> PAGE_SHIFT)) + e0 = e0 - (SZ_256M >> PAGE_SHIFT); + + /* crashmem_start is crashk_res reserved by primary kernel */ + s1 = PFN_UP(crashmem_start); + e1 = PFN_DOWN(crashmem_start + crashmem_size); + + if (node == 0) { + memblock_reserve(PFN_PHYS(s0), (s1 - s0) << PAGE_SHIFT); + memblock_reserve(PFN_PHYS(e1), (e0 - e1) << PAGE_SHIFT); + } else { + memblock_reserve(PFN_PHYS(s0), (e0 - s0) << PAGE_SHIFT); + } +#endif +} + static void __init check_kernel_sections_mem(void) { phys_addr_t start = __pa_symbol(&_text); @@ -296,6 +449,9 @@ static void __init bootcmdline_init(char **cmdline_p) */ static void __init arch_mem_init(char **cmdline_p) { + unsigned int node; + unsigned long start_pfn, end_pfn; + /* call board setup routine */ plat_mem_setup(); memblock_set_bottom_up(true); @@ -305,6 +461,25 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); +#ifdef CONFIG_PROC_VMCORE + if (setup_elfcorehdr && setup_elfcorehdr_size) { + printk(KERN_INFO "kdump reserved memory at %lx-%lx\n", + setup_elfcorehdr, setup_elfcorehdr_size); + memblock_reserve(setup_elfcorehdr, setup_elfcorehdr_size); + } +#endif + + loongarch_parse_crashkernel(); +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) + memblock_reserve(crashk_res.start, resource_size(&crashk_res)); +#endif + for_each_online_node(node) { + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + reserve_crashm_region(node, start_pfn, end_pfn); + reserve_oldmem_region(node, start_pfn, end_pfn); + } + /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate @@ -366,6 +541,7 @@ static void __init resource_init(void) request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); + request_crashkernel(res); } } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 9cf02d619670..4eff58f2d1df 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,9 @@ void __noreturn die(const char *str, struct pt_regs *regs) oops_exit(); + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/loongarch/loongson64/reset.c b/arch/loongarch/loongson64/reset.c index 08e67368c94e..9b87260e1ab5 100644 --- a/arch/loongarch/loongson64/reset.c +++ b/arch/loongarch/loongson64/reset.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -43,11 +44,116 @@ static void loongson_poweroff(void) } } +#ifdef CONFIG_KEXEC + +/* 0X80000000~0X80200000 is safe */ +#define MAX_ARGS 64 +#define KEXEC_CTRL_CODE TO_CAC(0x100000UL) +#define KEXEC_ARGV_ADDR TO_CAC(0x108000UL) +#define KEXEC_ARGV_SIZE COMMAND_LINE_SIZE + +static int kexec_argc; +static int kdump_argc; +static void *kexec_argv; +static void *kdump_argv; + +static int loongson_kexec_prepare(struct kimage *image) +{ + int i, argc = 0; + unsigned long *argv; + char *str, *ptr, *bootloader = "kexec"; + + /* argv at offset 0, argv[] at offset KEXEC_ARGV_SIZE/2 */ + if (image->type == KEXEC_TYPE_DEFAULT) + argv = (unsigned long *)kexec_argv; + else + argv = (unsigned long *)kdump_argv; + + argv[argc++] = (unsigned long)(KEXEC_ARGV_ADDR + KEXEC_ARGV_SIZE/2); + + for (i = 0; i < image->nr_segments; i++) { + if (!strncmp(bootloader, (char *)image->segment[i].buf, + strlen(bootloader))) { + /* + * convert command line string to array + * of parameters (as bootloader does). + */ + long offt; + str = (char *)argv + KEXEC_ARGV_SIZE/2; + memcpy(str, image->segment[i].buf, KEXEC_ARGV_SIZE/2); + ptr = strchr(str, ' '); + + while (ptr && (argc < MAX_ARGS)) { + *ptr = '\0'; + if (ptr[1] != ' ') { + offt = (long)(ptr - str + 1); + argv[argc] = KEXEC_ARGV_ADDR + KEXEC_ARGV_SIZE/2 + offt; + argc++; + } + ptr = strchr(ptr + 1, ' '); + } + break; + } + } + + if (image->type == KEXEC_TYPE_DEFAULT) + kexec_argc = argc; + else + kdump_argc = argc; + + /* kexec/kdump need a safe page to save reboot_code_buffer */ + image->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE); + + return 0; +} + +static void loongson_kexec_shutdown(void) +{ +#ifdef CONFIG_SMP + int cpu; + + /* All CPUs go to reboot_code_buffer */ + for_each_possible_cpu(cpu) + if (!cpu_online(cpu)) + cpu_device_up(get_cpu_device(cpu)); + + secondary_kexec_args[0] = TO_UNCAC(0x1fe01000); +#endif + kexec_args[0] = kexec_argc; + kexec_args[1] = fw_arg1; + kexec_args[2] = fw_arg2; + memcpy((void *)fw_arg1, kexec_argv, KEXEC_ARGV_SIZE); +} + +static void loongson_crash_shutdown(struct pt_regs *regs) +{ + default_machine_crash_shutdown(regs); +#ifdef CONFIG_SMP + secondary_kexec_args[0] = TO_UNCAC(0x1fe01000); +#endif + kexec_args[0] = kdump_argc; + kexec_args[1] = fw_arg1; + kexec_args[2] = fw_arg2; + memcpy((void *)fw_arg1, kdump_argv, KEXEC_ARGV_SIZE); +} + +#endif + static int __init loongarch_reboot_setup(void) { pm_restart = loongson_restart; pm_power_off = loongson_poweroff; +#ifdef CONFIG_KEXEC + fw_arg1 = KEXEC_ARGV_ADDR; + kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + kdump_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + + _machine_kexec_prepare = loongson_kexec_prepare; + _machine_kexec_shutdown = loongson_kexec_shutdown; + _machine_crash_shutdown = loongson_crash_shutdown; +#endif + return 0; } -- Gitee From 68642f6a45375d565ee392524e24fee700c87441 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 32/59] LoongArch: Add perf events support Change-Id: I7f366b14ed3fcb5f1e1ec47425cd486a20ea0e5b Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/uapi/asm/perf_regs.h | 40 + arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/perf_event.c | 907 ++++++++++++++++++++ arch/loongarch/kernel/perf_regs.c | 57 ++ 5 files changed, 1008 insertions(+) create mode 100644 arch/loongarch/include/uapi/asm/perf_regs.h create mode 100644 arch/loongarch/kernel/perf_event.c create mode 100644 arch/loongarch/kernel/perf_regs.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 9bc93eb9ce81..d32d21c547b5 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -87,6 +87,8 @@ config LOONGARCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/loongarch/include/uapi/asm/perf_regs.h b/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..9943d418e01d --- /dev/null +++ b/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX = PERF_REG_LOONGARCH_R31 + 1, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 57b670f4a0d5..42ae7c716eca 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -31,4 +31,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c new file mode 100644 index 000000000000..e9d84478aaa5 --- /dev/null +++ b/arch/loongarch/kernel/perf_event.c @@ -0,0 +1,907 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux performance counter support for LoongArch. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include /* For perf_irq */ +#include + +struct stackframe { + unsigned long fp; + unsigned long ra; +}; + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) +{ + struct stackframe buftail; + unsigned long err; + unsigned long __user *user_frame_tail = (unsigned long *)(fp - sizeof(struct stackframe)); + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); + pagefault_enable(); + + if (err || (unsigned long)user_frame_tail >= buftail.fp) + return 0; + + perf_callchain_store(entry, buftail.ra); + + return buftail.fp; +} + +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->csr_era); + + fp = regs->regs[22]; + + while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) + fp = user_backtrace(entry, fp); +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, regs); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } +} + +#define LOONGARCH_MAX_HWEVENTS 4 + +struct cpu_hw_events { + /* Array of events on this cpu. */ + struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; + + /* + * Set the bit (indexed by the counter number) when the counter + * is used for an event. + */ + unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; + + /* + * Software copy of the control register for each performance counter. + * LoongArch CPUs vary in performance counters. They use this differently, + * and even may not use it. + */ + unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .saved_ctrl = {0}, +}; + +/* The description of LoongArch performance events. */ +struct loongarch_perf_event { + unsigned int event_id; +}; + +static struct loongarch_perf_event raw_event; +static DEFINE_MUTEX(raw_event_mutex); + +#define C(x) PERF_COUNT_HW_CACHE_##x + +struct loongarch_pmu { + u64 max_period; + u64 valid_count; + u64 overflow; + const char *name; + u64 (*read_counter)(unsigned int idx); + void (*write_counter)(unsigned int idx, u64 val); + const struct loongarch_perf_event *(*map_raw_event)(u64 config); + const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; + const struct loongarch_perf_event (*cache_event_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + unsigned int num_counters; +}; + +static struct loongarch_pmu loongarch_pmu; + +#define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) + +#define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3 | \ + CSR_PERFCTRL_IE) + +#define M_PERFCTL_CONFIG_MASK 0x1f0000 + +#define CNTR_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) + +static void resume_local_counters(void); +static void pause_local_counters(void); + +static u64 loongarch_pmu_read_counter(unsigned int idx) +{ + u64 val = -1; + + switch (idx) { + case 0: + val = read_csr_perfcntr0(); + break; + case 1: + val = read_csr_perfcntr1(); + break; + case 2: + val = read_csr_perfcntr2(); + break; + case 3: + val = read_csr_perfcntr3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_counter(unsigned int idx, u64 val) +{ + switch (idx) { + case 0: + write_csr_perfcntr0(val); + return; + case 1: + write_csr_perfcntr1(val); + return; + case 2: + write_csr_perfcntr2(val); + return; + case 3: + write_csr_perfcntr3(val); + return; + } +} + +static unsigned int loongarch_pmu_read_control(unsigned int idx) +{ + unsigned int val = -1; + + switch (idx) { + case 0: + val = read_csr_perfctrl0(); + break; + case 1: + val = read_csr_perfctrl1(); + break; + case 2: + val = read_csr_perfctrl2(); + break; + case 3: + val = read_csr_perfctrl3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) +{ + switch (idx) { + case 0: + write_csr_perfctrl0(val); + return; + case 1: + write_csr_perfctrl1(val); + return; + case 2: + write_csr_perfctrl2(val); + return; + case 3: + write_csr_perfctrl3(val); + return; + } +} + +static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc) +{ + int i; + + for (i = loongarch_pmu.num_counters - 1; i >= 0; i--) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + + return -EAGAIN; +} + +static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) +{ + struct perf_event *event = container_of(evt, struct perf_event, hw); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + unsigned int cpu; + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + CSR_PERFCTRL_IE; + + cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); + + pr_debug("Enabling perf counter for CPU%d\n", cpu); + /* + * We do not actually let the counter run. Leave it until start(). + */ +} + +static void loongarch_pmu_disable_event(int idx) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + unsigned long flags; + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + local_irq_save(flags); + cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & + ~M_PERFCTL_COUNT_EVENT_WHENEVER; + loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); + local_irq_restore(flags); +} + +static int loongarch_pmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + u64 left = local64_read(&hwc->period_left); + u64 period = hwc->sample_period; + int ret = 0; + + if (unlikely((left + period) & (1ULL << 63))) { + /* left underflowed by more than period. */ + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } else if (unlikely((left + period) <= period)) { + /* left underflowed by less than period. */ + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > loongarch_pmu.max_period) { + left = loongarch_pmu.max_period; + local64_set(&hwc->period_left, left); + } + + local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); + + loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); + + perf_event_update_userpage(event); + + return ret; +} + +static void loongarch_pmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + u64 prev_raw_count, new_raw_count; + u64 delta; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = loongarch_pmu.read_counter(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = new_raw_count - prev_raw_count; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static void loongarch_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Set the period for the event. */ + loongarch_pmu_event_set_period(event, hwc, hwc->idx); + + /* Enable the event. */ + loongarch_pmu_enable_event(hwc, hwc->idx); +} + +static void loongarch_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* We are working on a local event. */ + loongarch_pmu_disable_event(hwc->idx); + barrier(); + loongarch_pmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int loongarch_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + perf_pmu_disable(event->pmu); + + /* To look for a free counter for this event. */ + idx = loongarch_pmu_alloc_counter(cpuc, hwc); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then + * make sure it is disabled. + */ + event->hw.idx = idx; + loongarch_pmu_disable_event(idx); + cpuc->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + loongarch_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static void loongarch_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + loongarch_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void loongarch_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + loongarch_pmu_event_update(event, hwc, hwc->idx); +} + +static void loongarch_pmu_enable(struct pmu *pmu) +{ + resume_local_counters(); +} + +static void loongarch_pmu_disable(struct pmu *pmu) +{ + pause_local_counters(); +} + +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmu_reserve_mutex); + +static void reset_counters(void *arg); +static int __hw_perf_event_init(struct perf_event *event); + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, + &pmu_reserve_mutex)) { + /* + * We must not call the destroy function with interrupts + * disabled. + */ + on_each_cpu(reset_counters, + (void *)(long)loongarch_pmu.num_counters, 1); + mutex_unlock(&pmu_reserve_mutex); + } +} + +/* This is needed by specific irq handlers in perf_event_*.c */ +static void handle_associated_event(struct cpu_hw_events *cpuc, + int idx, struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc = &event->hw; + + loongarch_pmu_event_update(event, hwc, idx); + data->period = event->hw.last_period; + if (!loongarch_pmu_event_set_period(event, hwc, idx)) + return; + + if (perf_event_overflow(event, data, regs)) + loongarch_pmu_disable_event(idx); +} + +static irqreturn_t pmu_handle_irq(int irq, void *dev) +{ + int handled = IRQ_NONE; + unsigned int counters = loongarch_pmu.num_counters; + u64 counter; + struct pt_regs *regs; + struct perf_sample_data data; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * First we pause the local counters, so that when we are locked + * here, the counters are all paused. When it gets locked due to + * perf_disable(), the timer interrupt handler will be delayed. + * + * See also loongarch_pmu_start(). + */ + pause_local_counters(); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0, 0); + + switch (counters) { +#define HANDLE_COUNTER(n) \ + case n + 1: \ + if (test_bit(n, cpuc->used_mask)) { \ + counter = loongarch_pmu.read_counter(n); \ + if (counter & loongarch_pmu.overflow) { \ + handle_associated_event(cpuc, n, &data, regs); \ + handled = IRQ_HANDLED; \ + } \ + } \ + fallthrough; + HANDLE_COUNTER(3) + HANDLE_COUNTER(2) + HANDLE_COUNTER(1) + HANDLE_COUNTER(0) + } + + resume_local_counters(); + + /* + * Do all the work for the pending perf events. We can do this + * in here because the performance counter interrupt is a regular + * interrupt, not NMI. + */ + if (handled == IRQ_HANDLED) + irq_work_run(); + + return handled; +} + +static int loongarch_pmu_event_init(struct perf_event *event) +{ + int r, irq; + unsigned long flags; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + /* Init it to avoid false validate_group */ + event->hw.event_base = 0xffffffff; + return -ENOENT; + } + + if (event->cpu >= 0 && !cpu_online(event->cpu)) + return -ENODEV; + + irq = get_pmc_irq(); + flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) { + r = request_irq(irq, pmu_handle_irq, + flags, "Perf_PMU", &loongarch_pmu); + if (r < 0) { + pr_warn("PMU IRQ request failed\n"); + return -ENODEV; + } + } + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); + } + + return __hw_perf_event_init(event); +} + +static struct pmu pmu = { + .pmu_enable = loongarch_pmu_enable, + .pmu_disable = loongarch_pmu_disable, + .event_init = loongarch_pmu_event_init, + .add = loongarch_pmu_add, + .del = loongarch_pmu_del, + .start = loongarch_pmu_start, + .stop = loongarch_pmu_stop, + .read = loongarch_pmu_read, +}; + +static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) +{ + return (pev->event_id & 0xff); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) +{ + return &(*loongarch_pmu.general_event_map)[idx]; +} + +static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct loongarch_perf_event *pev; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pev = &((*loongarch_pmu.cache_event_map) + [cache_type] + [cache_op] + [cache_result]); + + return pev; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_cpuc; + + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) + return -EINVAL; + } + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) + return -EINVAL; + + return 0; +} + +static int n_counters(void) +{ + /* TODO: Read from csr */ + return 4; +} + +static void reset_counters(void *arg) +{ + int counters = (int)(long)arg; + + switch (counters) { + case 4: + loongarch_pmu_write_control(3, 0); + loongarch_pmu.write_counter(3, 0); + fallthrough; + case 3: + loongarch_pmu_write_control(2, 0); + loongarch_pmu.write_counter(2, 0); + fallthrough; + case 2: + loongarch_pmu_write_control(1, 0); + loongarch_pmu.write_counter(1, 0); + fallthrough; + case 1: + loongarch_pmu_write_control(0, 0); + loongarch_pmu.write_counter(0, 0); + } +} + +static const struct loongarch_perf_event loongson_new_event_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, +}; + +static const struct loongarch_perf_event loongson_new_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x8 }, + [C(RESULT_MISS)] = { 0x9 }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa }, + [C(RESULT_MISS)] = { 0xa9 }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x6 }, + [C(RESULT_MISS)] = { 0x7 }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0xc }, + [C(RESULT_MISS)] = { 0xd }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x3b }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x4 }, + [C(RESULT_MISS)] = { 0x3c }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02 }, + [C(RESULT_MISS)] = { 0x03 }, + }, +}, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct loongarch_perf_event *pev; + int err; + + /* Returning LoongArch event descriptor for generic perf event. */ + if (PERF_TYPE_HARDWARE == event->attr.type) { + if (event->attr.config >= PERF_COUNT_HW_MAX) + return -EINVAL; + pev = loongarch_pmu_map_general_event(event->attr.config); + } else if (PERF_TYPE_HW_CACHE == event->attr.type) { + pev = loongarch_pmu_map_cache_event(event->attr.config); + } else if (PERF_TYPE_RAW == event->attr.type) { + /* We are working on the global raw event. */ + mutex_lock(&raw_event_mutex); + pev = loongarch_pmu.map_raw_event(event->attr.config); + } else { + /* The event type is not (yet) supported. */ + return -EOPNOTSUPP; + } + + if (IS_ERR(pev)) { + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + return PTR_ERR(pev); + } + + /* + * We allow max flexibility on how each individual counter shared + * by the single CPU operates (the mode exclusion and the range). + */ + hwc->config_base = CSR_PERFCTRL_IE; + + hwc->event_base = loongarch_pmu_perf_event_encode(pev); + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + + if (!attr->exclude_user) { + hwc->config_base |= CSR_PERFCTRL_PLV3; + hwc->config_base |= CSR_PERFCTRL_PLV2; + } + if (!attr->exclude_kernel) { + hwc->config_base |= CSR_PERFCTRL_PLV0; + } + if (!attr->exclude_hv) { + hwc->config_base |= CSR_PERFCTRL_PLV1; + } + + hwc->config_base &= M_PERFCTL_CONFIG_MASK; + /* + * The event can belong to another cpu. We do not assign a local + * counter for it for now. + */ + hwc->idx = -1; + hwc->config = 0; + + if (!hwc->sample_period) { + hwc->sample_period = loongarch_pmu.max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + err = 0; + if (event->group_leader != event) + err = validate_group(event); + + event->destroy = hw_perf_event_destroy; + + if (err) + event->destroy(event); + + return err; +} + +static void pause_local_counters(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int ctr = loongarch_pmu.num_counters; + unsigned long flags; + + local_irq_save(flags); + do { + ctr--; + cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & + ~M_PERFCTL_COUNT_EVENT_WHENEVER); + } while (ctr > 0); + local_irq_restore(flags); +} + +static void resume_local_counters(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int ctr = loongarch_pmu.num_counters; + + do { + ctr--; + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); + } while (ctr > 0); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) +{ + /* currently most cores have 7-bit event numbers */ + unsigned int raw_id = config & 0xff; + unsigned int base_id = raw_id & 0x7f; + + switch (current_cpu_data.cputype) { + case CPU_LOONGSON64: + base_id = raw_id; + break; + default: + panic("Unkown cpu type '0x%x\n", current_cpu_data.cputype); + } + + raw_event.event_id = base_id; + + return &raw_event; +} + +static int __init +init_hw_perf_events(void) +{ + int counters; + + if (!cpu_has_pmp) + return -ENODEV; + + pr_info("Performance counters: "); + + counters = n_counters(); + if (counters == 0) { + pr_cont("No available PMU.\n"); + return -ENODEV; + } + + loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; + + switch (current_cpu_data.cputype) { + case CPU_LOONGSON64: + loongarch_pmu.name = "loongarch/loongson64"; + loongarch_pmu.general_event_map = &loongson_new_event_map; + loongarch_pmu.cache_event_map = &loongson_new_cache_map; + break; + default: + pr_cont("Either hardware does not support performance " + "counters, or not yet implemented.\n"); + return -ENODEV; + } + + loongarch_pmu.num_counters = counters; + + loongarch_pmu.max_period = (1ULL << 63) - 1; + loongarch_pmu.valid_count = (1ULL << 63) - 1; + loongarch_pmu.overflow = 1ULL << 63; + loongarch_pmu.read_counter = loongarch_pmu_read_counter; + loongarch_pmu.write_counter = loongarch_pmu_write_counter; + + on_each_cpu(reset_counters, (void *)(long)counters, 1); + + pr_cont("%s PMU enabled, %d %d-bit counters available to each " + "CPU.\n", loongarch_pmu.name, counters, 64); + + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/loongarch/kernel/perf_regs.c b/arch/loongarch/kernel/perf_regs.c new file mode 100644 index 000000000000..b13b1a4475b4 --- /dev/null +++ b/arch/loongarch/kernel/perf_regs.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Some parts derived from x86 version of this file. + * + * Copyright (C) 2013 Cavium, Inc. + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ + +#include + +#include + +#ifdef CONFIG_32BIT +u64 perf_reg_abi(struct task_struct *tsk) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* Must be CONFIG_64BIT */ +u64 perf_reg_abi(struct task_struct *tsk) +{ + if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_32BIT */ + +int perf_reg_validate(u64 mask) +{ + if (!mask) + return -EINVAL; + if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1)) + return -EINVAL; + return 0; +} + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX)) + return 0; + + if ((u32)idx == PERF_REG_LOONGARCH_PC) + return regs->csr_era; + + return regs->regs[idx]; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} -- Gitee From 24ae5a20fe123b4c8f13ea1176af3627b37af6be Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Mar 2022 16:10:05 +0800 Subject: [PATCH 33/59] LoongArch: Add hardware watchpoint support Change-Id: I85d7e31647b05d252b8169cae9f560a9dfa53b89 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 + arch/loongarch/include/asm/processor.h | 26 + arch/loongarch/include/asm/ptrace.h | 2 + arch/loongarch/include/asm/switch_to.h | 2 + arch/loongarch/include/asm/watch.h | 40 ++ arch/loongarch/include/uapi/asm/ptrace.h | 54 +++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/cpu-probe.c | 3 + arch/loongarch/kernel/ptrace.c | 135 ++++++ arch/loongarch/kernel/traps.c | 38 +- arch/loongarch/kernel/watch.c | 573 +++++++++++++++++++++++ 11 files changed, 876 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/watch.h create mode 100644 arch/loongarch/kernel/watch.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d32d21c547b5..899933dfa9df 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -212,6 +212,9 @@ config CPU_SUPPORTS_32BIT_KERNEL bool config CPU_SUPPORTS_64BIT_KERNEL bool +config HARDWARE_WATCHPOINTS + bool + default y menu "Kernel type" diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 6625401d8448..07f1c3c194ee 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -88,6 +88,22 @@ struct loongarch_fpu { {0,} \ } +struct loongarch3264_watch_reg_state { + /* The width of addr, mask is 32 in a 32 bit kernel and 64 in a + 64 bit kernel. We use unsigned long as it has the same + property. */ + unsigned long addr[NUM_WATCH_REGS]; + /* Only the mask and IRW bits from watchhi. */ + unsigned long mask[NUM_WATCH_REGS]; + unsigned char irw[NUM_WATCH_REGS]; + unsigned char irwstat[NUM_WATCH_REGS]; + unsigned char irwmask[NUM_WATCH_REGS]; +}; + +union loongarch_watch_reg_state { + struct loongarch3264_watch_reg_state loongarch3264; +}; + #define ARCH_MIN_TASKALIGN 32 struct loongarch_vdso_info; @@ -121,6 +137,12 @@ struct thread_struct { /* Eflags register */ unsigned long eflags; + /* Used by ptrace single_step */ + unsigned long single_step; + + /* Saved watch register state, if available. */ + union loongarch_watch_reg_state watch; + /* Other stuff associated with the thread. */ unsigned long trap_nr; unsigned long error_code; @@ -159,6 +181,10 @@ struct thread_struct { .csr_euen = 0, \ .csr_ecfg = 0, \ .csr_badvaddr = 0, \ + /* \ + * Saved watch register stuff \ + */ \ + .watch = {{{0,},},}, \ /* \ * Other stuff associated with the process \ */ \ diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index a42a79ac2a3d..9256305a83f8 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -151,4 +151,6 @@ static inline void user_stack_pointer_set(struct pt_regs *regs, regs->regs[3] = val; } +#define arch_has_single_step() (1) + #endif /* _ASM_PTRACE_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index 0987ac30475b..5e5d86d05c13 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -6,6 +6,7 @@ #define _ASM_SWITCH_TO_H #include +#include #include struct task_struct; @@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ + __process_watch(prev, next); \ (last) = __switch_to(prev, next, task_thread_info(next), \ __builtin_return_address(0), __builtin_frame_address(0)); \ } while (0) diff --git a/arch/loongarch/include/asm/watch.h b/arch/loongarch/include/asm/watch.h new file mode 100644 index 000000000000..804e85abd486 --- /dev/null +++ b/arch/loongarch/include/asm/watch.h @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Author: Chong Qiao + */ +#ifndef _ASM_WATCH_H +#define _ASM_WATCH_H + +#include + +#include + +void loongarch_install_watch_registers(struct task_struct *t); +void loongarch_update_watch_registers(struct task_struct *t); +void loongarch_clear_prev_watch_registers(struct task_struct *prev); +void loongarch_install_next_watch_registers(struct task_struct *next); +void loongarch_read_watch_registers(struct pt_regs *regs); +void loongarch_clear_watch_registers(void); +void loongarch_probe_watch_registers(struct cpuinfo_loongarch *c); + +unsigned long watch_csrrd(unsigned int reg); +void watch_csrwr(unsigned long val, unsigned int reg); + +#ifdef CONFIG_HARDWARE_WATCHPOINTS +#define __process_watch(prev, next) do { \ + if (test_bit(TIF_LOAD_WATCH, &task_thread_info(prev)->flags)) \ + loongarch_clear_prev_watch_registers(prev); \ + if (test_bit(TIF_LOAD_WATCH, &task_thread_info(next)->flags)) \ + loongarch_install_next_watch_registers(next); \ +} while (0) + +#else +#define __process_watch(prev, next) do {} while (0) +#endif + +#endif /* _ASM_WATCH_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index a5870ed6f08f..e2aefbee62b5 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -52,7 +52,61 @@ struct user_lasx_state { uint64_t regs[32*4]; }; +/* Read and write watchpoint registers. */ +#define NUM_WATCH_REGS 16 + +enum pt_watch_style { + pt_watch_style_la32, + pt_watch_style_la64 +}; + +struct la32_watch_regs { + uint32_t addr; + uint32_t mask; + /* irw/irwsta/irwmask I R W bits. + * bit 0 -- 1 if W bit is usable. + * bit 1 -- 1 if R bit is usable. + * bit 2 -- 1 if I bit is usable. + */ + uint8_t irw; + uint8_t irwstat; + uint8_t irwmask; +} __attribute__((aligned(8))); + +struct la64_watch_regs { + uint64_t addr; + uint64_t mask; + /* irw/irwsta/irwmask I R W bits. + * bit 0 -- 1 if W bit is usable. + * bit 1 -- 1 if R bit is usable. + * bit 2 -- 1 if I bit is usable. + */ + uint8_t irw; + uint8_t irwstat; + uint8_t irwmask; +} __attribute__((aligned(8))); + +struct pt_watch_regs { + /* NUM_WATCH_REGS in user gdb's struct pt_watch_regs, set by gdb. */ + int16_t max_valid; + /* The number of valid watch register pairs. */ + int16_t num_valid; + enum pt_watch_style style; + union { + struct la32_watch_regs la32[NUM_WATCH_REGS]; + struct la64_watch_regs la64[NUM_WATCH_REGS]; + }; +}; + #define PTRACE_SYSEMU 0x1f #define PTRACE_SYSEMU_SINGLESTEP 0x20 +#define PTRACE_GET_WATCH_REGS 0xd0 +#define PTRACE_SET_WATCH_REGS 0xd1 + +/* Watch irw/irwmask/irwstat bit definitions */ +#define LA_WATCH_W (1 << 0) +#define LA_WATCH_R (1 << 1) +#define LA_WATCH_I (1 << 2) +#define LA_WATCH_IRW (LA_WATCH_W | LA_WATCH_R | LA_WATCH_I) #endif /* _UAPI_ASM_PTRACE_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 42ae7c716eca..936dc2012c9f 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o +obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cba0eeb029c2..eb5b999efdac 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Hardware capabilities */ unsigned int elf_hwcap __read_mostly; @@ -186,6 +187,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) default: pr_warn("Warning: unimplemented tlb type\n"); } + + loongarch_probe_watch_registers(c); } #define MAX_NAME_LEN 32 diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 01300f5ca88e..47b18edff446 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -34,7 +34,9 @@ #include #include #include +#include #include +#include static void init_fp_ctx(struct task_struct *target) { @@ -497,10 +499,112 @@ static inline int write_user(struct task_struct *target, unsigned long addr, return 0; } +static int ptrace_get_watch_regs(struct task_struct *child, + struct pt_watch_regs __user *addr) +{ + enum pt_watch_style style; + int i; + unsigned int cnt; + + if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) + return -EIO; + if (!access_ok(addr, sizeof(struct pt_watch_regs))) + return -EIO; + +#ifdef CONFIG_32BIT + style = pt_watch_style_la32; +#define WATCH_STYLE la32 +#else + style = pt_watch_style_la64; +#define WATCH_STYLE la64 +#endif + + preempt_disable(); + loongarch_update_watch_registers(child); + preempt_enable_no_resched(); + + /* single step use first inst breakpoint */ + if (child->thread.single_step) + child->thread.watch.loongarch3264.irwmask[boot_cpu_data.watch_dreg_count] = 0; + + __get_user(cnt, &addr->max_valid); + cnt = min(boot_cpu_data.watch_reg_use_cnt, cnt); + __put_user(cnt, &addr->num_valid); + __put_user(style, &addr->style); + for (i = 0; i < cnt; i++) { + __put_user(child->thread.watch.loongarch3264.addr[i], + &addr->WATCH_STYLE[i].addr); + __put_user(child->thread.watch.loongarch3264.mask[i], + &addr->WATCH_STYLE[i].mask); + __put_user(child->thread.watch.loongarch3264.irw[i], + &addr->WATCH_STYLE[i].irw); + __put_user(child->thread.watch.loongarch3264.irwstat[i], + &addr->WATCH_STYLE[i].irwstat); + __put_user(child->thread.watch.loongarch3264.irwmask[i], + &addr->WATCH_STYLE[i].irwmask); + } + + return 0; +} + +static int ptrace_set_watch_regs(struct task_struct *child, + struct pt_watch_regs __user *addr) +{ + int i; + unsigned int cnt; + int watch_active = 0; + unsigned long addrt[NUM_WATCH_REGS]; + unsigned long maskt[NUM_WATCH_REGS]; + unsigned char irwt[NUM_WATCH_REGS]; + + if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) + return -EIO; + if (!access_ok(addr, sizeof(struct pt_watch_regs))) + return -EIO; + + __get_user(cnt, &addr->max_valid); + cnt = min(boot_cpu_data.watch_reg_use_cnt, cnt); + /* Check the values. */ + for (i = 0; i < cnt; i++) { + __get_user(addrt[i], &addr->WATCH_STYLE[i].addr); +#ifdef CONFIG_32BIT + if (addrt[i] & __UA_LIMIT) + return -EINVAL; +#else + if (test_tsk_thread_flag(child, TIF_32BIT_ADDR)) { + if (addrt[i] & 0xffffffff80000000UL) + return -EINVAL; + } else { + if (addrt[i] & __UA_LIMIT) + return -EINVAL; + } +#endif + __get_user(maskt[i], &addr->WATCH_STYLE[i].mask); + __get_user(irwt[i], &addr->WATCH_STYLE[i].irw); + } + /* Install them. */ + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + if (irwt[i] & LA_WATCH_IRW) + watch_active = 1; + child->thread.watch.loongarch3264.addr[i] = addrt[i]; + child->thread.watch.loongarch3264.mask[i] = maskt[i]; + child->thread.watch.loongarch3264.irw[i] = irwt[i]; + } + + if (watch_active) + set_tsk_thread_flag(child, TIF_LOAD_WATCH); + else + clear_tsk_thread_flag(child, TIF_LOAD_WATCH); + + return 0; +} + + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + void __user *addrp = (void __user *) addr; unsigned long __user *datap = (void __user *) data; switch (request) { @@ -512,6 +616,14 @@ long arch_ptrace(struct task_struct *child, long request, ret = write_user(child, addr, data); break; + case PTRACE_GET_WATCH_REGS: + ret = ptrace_get_watch_regs(child, addrp); + break; + + case PTRACE_SET_WATCH_REGS: + ret = ptrace_set_watch_regs(child, addrp); + break; + default: ret = ptrace_request(child, request, addr, data); break; @@ -519,3 +631,26 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } + +void user_enable_single_step(struct task_struct *task) +{ + int i = boot_cpu_data.watch_dreg_count; + struct thread_info *ti = task_thread_info(task); + + task->thread.single_step = task_pt_regs(task)->csr_era; + task->thread.watch.loongarch3264.addr[i] = task_pt_regs(task)->csr_era; + task->thread.watch.loongarch3264.mask[i] = -1ULL; + task->thread.watch.loongarch3264.irw[i] = LA_WATCH_I; + set_ti_thread_flag(ti, TIF_LOAD_WATCH); +} +EXPORT_SYMBOL(user_enable_single_step); + +void user_disable_single_step(struct task_struct *task) +{ + if (task->thread.single_step) { + task->thread.single_step = 0; + clear_tsk_thread_flag(task, TIF_LOAD_WATCH); + + } +} +EXPORT_SYMBOL(user_disable_single_step); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 4eff58f2d1df..26dae1774980 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -444,9 +445,44 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) goto out; } +/* Set CSR.FWPS.skip and CSR.LLBCTL.KLO */ +static inline void watch_reset_skip_and_llsc(void) +{ + unsigned long fwps; + + fwps = csr_readq(LOONGARCH_CSR_FWPS); + csr_writeq(0x10000 | fwps, LOONGARCH_CSR_FWPS); + csr_writel(0x4, LOONGARCH_CSR_LLBCTL); +} + asmlinkage void noinstr do_watch(struct pt_regs *regs) { - pr_warn("Hardware watch point handler not implemented!\n"); + irqentry_state_t state = irqentry_enter(regs); + + if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) { + if (current->thread.single_step) { + int llbit = (csr_readq(LOONGARCH_CSR_LLBCTL) & 0x1); + unsigned long ip = regs->csr_era; + + if (llbit) { + watch_reset_skip_and_llsc(); + } else if (ip == current->thread.single_step) { + csr_writeq(0x10000 | csr_readq(LOONGARCH_CSR_FWPS), LOONGARCH_CSR_FWPS); + } else { + loongarch_read_watch_registers(regs); + force_sig(SIGTRAP); + } + } else { + loongarch_read_watch_registers(regs); + force_sig(SIGTRAP); + } + } else { + if (notify_die(DIE_TRAP, "Break", regs, 0, + current->thread.trap_nr, SIGTRAP) != NOTIFY_STOP) + loongarch_clear_watch_registers(); + } + + irqentry_exit(regs, state); } asmlinkage void noinstr do_ri(struct pt_regs *regs) diff --git a/arch/loongarch/kernel/watch.c b/arch/loongarch/kernel/watch.c new file mode 100644 index 000000000000..8a9eeee9fe98 --- /dev/null +++ b/arch/loongarch/kernel/watch.c @@ -0,0 +1,573 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Author: Chong Qiao + */ + +#include + +#include +#include +#include +#include + +unsigned long watch_csrrd(unsigned int reg) +{ + switch (reg) { + case LOONGARCH_CSR_IB0ADDR + 8 * 0: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 0); + case LOONGARCH_CSR_IB0ADDR + 8 * 1: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 1); + case LOONGARCH_CSR_IB0ADDR + 8 * 2: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 2); + case LOONGARCH_CSR_IB0ADDR + 8 * 3: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 3); + case LOONGARCH_CSR_IB0ADDR + 8 * 4: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 4); + case LOONGARCH_CSR_IB0ADDR + 8 * 5: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 5); + case LOONGARCH_CSR_IB0ADDR + 8 * 6: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 6); + case LOONGARCH_CSR_IB0ADDR + 8 * 7: + return csr_readq(LOONGARCH_CSR_IB0ADDR + 8 * 7); + + case LOONGARCH_CSR_IB0MASK + 8 * 0: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 0); + case LOONGARCH_CSR_IB0MASK + 8 * 1: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 1); + case LOONGARCH_CSR_IB0MASK + 8 * 2: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 2); + case LOONGARCH_CSR_IB0MASK + 8 * 3: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 3); + case LOONGARCH_CSR_IB0MASK + 8 * 4: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 4); + case LOONGARCH_CSR_IB0MASK + 8 * 5: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 5); + case LOONGARCH_CSR_IB0MASK + 8 * 6: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 6); + case LOONGARCH_CSR_IB0MASK + 8 * 7: + return csr_readq(LOONGARCH_CSR_IB0MASK + 8 * 7); + + case LOONGARCH_CSR_IB0ASID + 8 * 0: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 0); + case LOONGARCH_CSR_IB0ASID + 8 * 1: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 1); + case LOONGARCH_CSR_IB0ASID + 8 * 2: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 2); + case LOONGARCH_CSR_IB0ASID + 8 * 3: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 3); + case LOONGARCH_CSR_IB0ASID + 8 * 4: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 4); + case LOONGARCH_CSR_IB0ASID + 8 * 5: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 5); + case LOONGARCH_CSR_IB0ASID + 8 * 6: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 6); + case LOONGARCH_CSR_IB0ASID + 8 * 7: + return csr_readq(LOONGARCH_CSR_IB0ASID + 8 * 7); + + case LOONGARCH_CSR_IB0CTL + 8 * 0: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 0); + case LOONGARCH_CSR_IB0CTL + 8 * 1: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 1); + case LOONGARCH_CSR_IB0CTL + 8 * 2: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 2); + case LOONGARCH_CSR_IB0CTL + 8 * 3: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 3); + case LOONGARCH_CSR_IB0CTL + 8 * 4: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 4); + case LOONGARCH_CSR_IB0CTL + 8 * 5: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 5); + case LOONGARCH_CSR_IB0CTL + 8 * 6: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 6); + case LOONGARCH_CSR_IB0CTL + 8 * 7: + return csr_readq(LOONGARCH_CSR_IB0CTL + 8 * 7); + + case LOONGARCH_CSR_DB0ADDR + 8 * 0: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 0); + case LOONGARCH_CSR_DB0ADDR + 8 * 1: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 1); + case LOONGARCH_CSR_DB0ADDR + 8 * 2: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 2); + case LOONGARCH_CSR_DB0ADDR + 8 * 3: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 3); + case LOONGARCH_CSR_DB0ADDR + 8 * 4: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 4); + case LOONGARCH_CSR_DB0ADDR + 8 * 5: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 5); + case LOONGARCH_CSR_DB0ADDR + 8 * 6: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 6); + case LOONGARCH_CSR_DB0ADDR + 8 * 7: + return csr_readq(LOONGARCH_CSR_DB0ADDR + 8 * 7); + + case LOONGARCH_CSR_DB0MASK + 8 * 0: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 0); + case LOONGARCH_CSR_DB0MASK + 8 * 1: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 1); + case LOONGARCH_CSR_DB0MASK + 8 * 2: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 2); + case LOONGARCH_CSR_DB0MASK + 8 * 3: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 3); + case LOONGARCH_CSR_DB0MASK + 8 * 4: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 4); + case LOONGARCH_CSR_DB0MASK + 8 * 5: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 5); + case LOONGARCH_CSR_DB0MASK + 8 * 6: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 6); + case LOONGARCH_CSR_DB0MASK + 8 * 7: + return csr_readq(LOONGARCH_CSR_DB0MASK + 8 * 7); + + case LOONGARCH_CSR_DB0ASID + 8 * 0: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 0); + case LOONGARCH_CSR_DB0ASID + 8 * 1: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 1); + case LOONGARCH_CSR_DB0ASID + 8 * 2: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 2); + case LOONGARCH_CSR_DB0ASID + 8 * 3: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 3); + case LOONGARCH_CSR_DB0ASID + 8 * 4: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 4); + case LOONGARCH_CSR_DB0ASID + 8 * 5: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 5); + case LOONGARCH_CSR_DB0ASID + 8 * 6: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 6); + case LOONGARCH_CSR_DB0ASID + 8 * 7: + return csr_readq(LOONGARCH_CSR_DB0ASID + 8 * 7); + + case LOONGARCH_CSR_DB0CTL + 8 * 0: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 0); + case LOONGARCH_CSR_DB0CTL + 8 * 1: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 1); + case LOONGARCH_CSR_DB0CTL + 8 * 2: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 2); + case LOONGARCH_CSR_DB0CTL + 8 * 3: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 3); + case LOONGARCH_CSR_DB0CTL + 8 * 4: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 4); + case LOONGARCH_CSR_DB0CTL + 8 * 5: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 5); + case LOONGARCH_CSR_DB0CTL + 8 * 6: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 6); + case LOONGARCH_CSR_DB0CTL + 8 * 7: + return csr_readq(LOONGARCH_CSR_DB0CTL + 8 * 7); + + case LOONGARCH_CSR_FWPS: + return csr_readq(LOONGARCH_CSR_FWPS); + case LOONGARCH_CSR_MWPS: + return csr_readq(LOONGARCH_CSR_MWPS); + case LOONGARCH_CSR_FWPC: + return csr_readq(LOONGARCH_CSR_FWPC); + case LOONGARCH_CSR_MWPC: + return csr_readq(LOONGARCH_CSR_MWPC); + default: + printk("read watch register number error %d\n", reg); + } + return 0; +} +EXPORT_SYMBOL(watch_csrrd); + +void watch_csrwr(unsigned long val, unsigned int reg) +{ + switch (reg) { + case LOONGARCH_CSR_IB0ADDR + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 0); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 1); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 2); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 3); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 4); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 5); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 6); + break; + case LOONGARCH_CSR_IB0ADDR + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_IB0ADDR + 8 * 7); + break; + + case LOONGARCH_CSR_IB0MASK + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 0); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 1); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 2); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 3); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 4); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 5); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 6); + break; + case LOONGARCH_CSR_IB0MASK + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_IB0MASK + 8 * 7); + break; + + case LOONGARCH_CSR_IB0ASID + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 0); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 1); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 2); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 3); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 4); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 5); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 6); + break; + case LOONGARCH_CSR_IB0ASID + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_IB0ASID + 8 * 7); + break; + + case LOONGARCH_CSR_IB0CTL + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 0); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 1); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 2); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 3); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 4); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 5); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 6); + break; + case LOONGARCH_CSR_IB0CTL + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_IB0CTL + 8 * 7); + break; + + case LOONGARCH_CSR_DB0ADDR + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 0); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 1); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 2); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 3); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 4); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 5); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 6); + break; + case LOONGARCH_CSR_DB0ADDR + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_DB0ADDR + 8 * 7); + break; + + case LOONGARCH_CSR_DB0MASK + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 0); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 1); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 2); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 3); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 4); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 5); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 6); + break; + case LOONGARCH_CSR_DB0MASK + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_DB0MASK + 8 * 7); + break; + + case LOONGARCH_CSR_DB0ASID + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 0); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 1); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 2); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 3); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 4); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 5); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 6); + break; + case LOONGARCH_CSR_DB0ASID + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_DB0ASID + 8 * 7); + break; + + case LOONGARCH_CSR_DB0CTL + 8 * 0: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 0); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 1: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 1); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 2: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 2); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 3: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 3); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 4: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 4); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 5: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 5); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 6: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 6); + break; + case LOONGARCH_CSR_DB0CTL + 8 * 7: + csr_writeq(val, LOONGARCH_CSR_DB0CTL + 8 * 7); + break; + + case LOONGARCH_CSR_FWPS: + csr_writeq(val, LOONGARCH_CSR_FWPS); + break; + case LOONGARCH_CSR_MWPS: + csr_writeq(val, LOONGARCH_CSR_MWPS); + break; + default: + printk("write watch register number error %d\n", reg); + } +} +EXPORT_SYMBOL(watch_csrwr); + +/* + * Install the watch registers for the current thread. A maximum of + * four registers are installed although the machine may have more. + */ +void loongarch_install_watch_registers(struct task_struct *t) +{ + int i, j, dbcn; + struct loongarch3264_watch_reg_state *watches = &t->thread.watch.loongarch3264; + + dbcn = boot_cpu_data.watch_dreg_count; + for (i = 0; i < current_cpu_data.watch_reg_use_cnt; i++) { + if (watches->irw[i] & (LA_WATCH_R | LA_WATCH_W)) { + unsigned int dbc = 0x1e; + + if ((watches->irw[i] & LA_WATCH_R)) + dbc |= 1<<8; + if ((watches->irw[i] & LA_WATCH_W)) + dbc |= 1<<9; + + watch_csrwr(watches->addr[i], LOONGARCH_CSR_DB0ADDR + 8*i); + watch_csrwr(watches->mask[i], LOONGARCH_CSR_DB0MASK + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*i); + watch_csrwr(dbc, LOONGARCH_CSR_DB0CTL + 8*i); + + } else if (watches->irw[i] & LA_WATCH_I) { + j = i - dbcn; + watch_csrwr(watches->addr[i], LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(watches->mask[i], LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(0x1e, LOONGARCH_CSR_IB0CTL + 8*j); + if ((task_pt_regs(t)->csr_era & ~watches->mask[i]) == + (watches->addr[i] & ~watches->mask[i])) + watch_csrwr(0x10000, LOONGARCH_CSR_FWPS); + } else if (watches->irwmask[i]) { + if (i < dbcn) { + watch_csrwr(0x0, LOONGARCH_CSR_DB0CTL + 8*i); + } else { + j = i - dbcn; + watch_csrwr(0x0, LOONGARCH_CSR_IB0CTL + 8*j); + } + } + } +} +EXPORT_SYMBOL(loongarch_install_watch_registers); + +#define PRMD_WE (1UL<<3) /* PRMD watch enable, restore to CRMD when eret */ + +void loongarch_clear_prev_watch_registers(struct task_struct *prev) +{ + struct pt_regs *regs = task_pt_regs(prev); + + loongarch_clear_watch_registers(); + prev->thread.csr_prmd &= ~PRMD_WE; + regs->csr_prmd &= ~PRMD_WE; +} + +void loongarch_install_next_watch_registers(struct task_struct *next) +{ + struct pt_regs *regs = task_pt_regs(next); + + loongarch_install_watch_registers(next); + next->thread.csr_prmd |= PRMD_WE; + regs->csr_prmd |= PRMD_WE; +} + +#define CTRL_WE 1 /* Breakpoint can only be written by hw debugger */ + +void loongarch_update_watch_registers(struct task_struct *t) +{ + struct loongarch3264_watch_reg_state *watches = + &t->thread.watch.loongarch3264; + int i, j, dbcn, ctl; + + dbcn = boot_cpu_data.watch_dreg_count; + for (i = 0; i < current_cpu_data.watch_reg_use_cnt; i++) { + if (i < dbcn) { + ctl = watch_csrrd(LOONGARCH_CSR_DB0CTL + 8*i); + if (ctl & CTRL_WE) + watches->irwmask[i] = 0; + else + watches->irwmask[i] = LA_WATCH_R | LA_WATCH_W; + } else { + j = i - dbcn; + ctl = watch_csrrd(LOONGARCH_CSR_IB0CTL + 8*j); + if (ctl & CTRL_WE) + watches->irwmask[i] = 0; + else + watches->irwmask[i] = LA_WATCH_I; + } + } +} +/* + * Read back the watchhi registers so the user space debugger has + * access to the I, R, and W bits. A maximum of four registers are + * read although the machine may have more. + */ + +void loongarch_read_watch_registers(struct pt_regs *regs) +{ + struct loongarch3264_watch_reg_state *watches = + ¤t->thread.watch.loongarch3264; + unsigned int i, j, dbcn, ibst, dbst; + + dbcn = boot_cpu_data.watch_dreg_count; + ibst = watch_csrrd(LOONGARCH_CSR_FWPS); + dbst = watch_csrrd(LOONGARCH_CSR_MWPS); + + for (i = 0; i < current_cpu_data.watch_reg_use_cnt; i++) { + if (i < dbcn) { + watches->addr[i] = watch_csrrd(LOONGARCH_CSR_DB0ADDR + 8*i); + watches->mask[i] = watch_csrrd(LOONGARCH_CSR_DB0MASK + 8*i); + if (dbst & (1u << i)) { + int dbc = watch_csrrd(LOONGARCH_CSR_DB0CTL + i*8); + + watches->irwstat[i] = 0; + if (dbc & (1<<8)) + watches->irwstat[i] |= LA_WATCH_R; + if (dbc & (1<<9)) + watches->irwstat[i] |= LA_WATCH_W; + } + } else { + j = i - dbcn; + watches->addr[i] = watch_csrrd(LOONGARCH_CSR_IB0ADDR + 8*j); + watches->mask[i] = watch_csrrd(LOONGARCH_CSR_IB0MASK + 8*j); + watches->irwstat[i] = 0; + if (ibst & (1u << j)) + watches->irwstat[i] |= LA_WATCH_I; + } + } + + if (ibst & 0xffff) + watch_csrwr(ibst, LOONGARCH_CSR_FWPS); + if (dbst & 0xffff) + watch_csrwr(dbst, LOONGARCH_CSR_MWPS); +} + +/* + * Disable all watch registers. Although only four registers are + * installed, all are cleared to eliminate the possibility of endless + * looping in the watch handler. + */ +void loongarch_clear_watch_registers(void) +{ + unsigned int i, j, dbcn, ibst, dbst; + + ibst = watch_csrrd(LOONGARCH_CSR_FWPS); + dbst = watch_csrrd(LOONGARCH_CSR_MWPS); + dbcn = boot_cpu_data.watch_dreg_count; + + /* + * bit 16: skip next event + * bit 0-15: breakpoint triggered, W1C + */ + + if (ibst & 0x1ffff) + watch_csrwr(ibst&~0x10000, LOONGARCH_CSR_FWPS); + if (dbst & 0x1ffff) + watch_csrwr(dbst&~0x10000, LOONGARCH_CSR_MWPS); + + for (i = 0; i < current_cpu_data.watch_reg_use_cnt; i++) { + if (i < dbcn) { + watch_csrwr(0, LOONGARCH_CSR_DB0ADDR + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8*i); + } else { + j = i - dbcn; + watch_csrwr(0, LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8*j); + } + } +} +EXPORT_SYMBOL(loongarch_clear_watch_registers); + +void loongarch_probe_watch_registers(struct cpuinfo_loongarch *c) +{ + unsigned int ibcn, dbcn, total; + + ibcn = watch_csrrd(LOONGARCH_CSR_FWPC) & 0x3f; + dbcn = watch_csrrd(LOONGARCH_CSR_MWPC) & 0x3f; + c->watch_dreg_count = dbcn; + c->watch_ireg_count = ibcn; + total = ibcn + dbcn; + c->watch_reg_use_cnt = (total < NUM_WATCH_REGS) ? total : NUM_WATCH_REGS; +} -- Gitee From 1a84c6d0e3d07ee06b2168b58877a5208c24cb33 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 34/59] LoongArch: Add power management support Add power management support for LoongArch: including CPUFreq, suspend and hibernation. Change-Id: Id835800c9bfd1d58fb5b1d9f3795178607a80e95 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 16 ++ arch/loongarch/Makefile | 3 + arch/loongarch/include/asm/acpi.h | 10 ++ .../include/asm/mach-loongson64/boot_param.h | 1 + .../include/asm/mach-loongson64/loongson.h | 3 + arch/loongarch/include/asm/time.h | 1 + arch/loongarch/kernel/acpi.c | 6 + arch/loongarch/kernel/asm-offsets.c | 12 ++ arch/loongarch/kernel/reset.c | 2 + arch/loongarch/kernel/setup.c | 4 + arch/loongarch/kernel/time.c | 11 +- arch/loongarch/loongson64/Makefile | 2 +- arch/loongarch/loongson64/platform.c | 62 +++++++ arch/loongarch/loongson64/reset.c | 1 - arch/loongarch/power/Makefile | 4 + arch/loongarch/power/common.c | 48 ++++++ arch/loongarch/power/hibernate.c | 59 +++++++ arch/loongarch/power/hibernate_asm.S | 68 ++++++++ arch/loongarch/power/suspend.c | 88 ++++++++++ arch/loongarch/power/suspend_asm.S | 156 ++++++++++++++++++ 20 files changed, 552 insertions(+), 5 deletions(-) create mode 100644 arch/loongarch/loongson64/platform.c create mode 100644 arch/loongarch/power/Makefile create mode 100644 arch/loongarch/power/common.c create mode 100644 arch/loongarch/power/hibernate.c create mode 100644 arch/loongarch/power/hibernate_asm.S create mode 100644 arch/loongarch/power/suspend.c create mode 100644 arch/loongarch/power/suspend_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 899933dfa9df..7502fa5d3e16 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -43,6 +43,7 @@ config LOONGARCH select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select BUILDTIME_TABLE_SORT select COMMON_CLK + select CPU_PM select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE @@ -703,8 +704,23 @@ endmenu menu "Power management options" +config ARCH_SUSPEND_POSSIBLE + def_bool y + depends on SYS_SUPPORTS_HOTPLUG_CPU || !SMP + +config ARCH_HIBERNATION_POSSIBLE + def_bool y + depends on SYS_SUPPORTS_HOTPLUG_CPU || !SMP + +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" endmenu +menu "CPU Power Management" + +source "drivers/cpufreq/Kconfig" + +endmenu + source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index d43920ae3bd1..bcb9d808fe4f 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -109,6 +109,9 @@ libs-y += arch/loongarch/lib/ # See arch/loongarch/Kbuild for content of core part of the kernel core-y += arch/loongarch/ +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/loongarch/power/ + ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 978ac2419d70..37a3174284fe 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -39,4 +39,14 @@ extern struct list_head acpi_wakeup_device_list; #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT +extern int loongarch_acpi_suspend(void); +extern int (*acpi_suspend_lowlevel)(void); +extern void loongarch_suspend_enter(void); +extern void loongarch_wakeup_start(void); + +static inline unsigned long acpi_get_wakeup_address(void) +{ + return (unsigned long)loongarch_wakeup_start; +} + #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/boot_param.h b/arch/loongarch/include/asm/mach-loongson64/boot_param.h index 1169c628cdf3..4c1c5fe0ec06 100644 --- a/arch/loongarch/include/asm/mach-loongson64/boot_param.h +++ b/arch/loongarch/include/asm/mach-loongson64/boot_param.h @@ -97,6 +97,7 @@ struct loongson_system_configuration { int cores_per_node; int cores_per_package; char *cpuname; + u64 suspend_addr; u64 vgabios_addr; }; diff --git a/arch/loongarch/include/asm/mach-loongson64/loongson.h b/arch/loongarch/include/asm/mach-loongson64/loongson.h index 5e6fc05a5c48..51b846ecf952 100644 --- a/arch/loongarch/include/asm/mach-loongson64/loongson.h +++ b/arch/loongarch/include/asm/mach-loongson64/loongson.h @@ -157,4 +157,7 @@ typedef enum { #define ls7a_writeq(val, addr) ls7a_write_type(val, addr, uint64_t) #define ls7a_write(val, addr) ls7a_write_type(val, addr, uint64_t) +void enable_gpe_wakeup(void); +void enable_pci_wakeup(void); + #endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h index 3a3c6f5bfeae..3f28caf48954 100644 --- a/arch/loongarch/include/asm/time.h +++ b/arch/loongarch/include/asm/time.h @@ -17,6 +17,7 @@ extern u64 cpu_clock_freq; extern u64 const_clock_freq; +extern void save_counter(void); extern void sync_counter(void); static inline unsigned int calc_const_freq(void) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 18b9153cfa6f..1205c055c91a 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -538,6 +538,12 @@ static void __init acpi_process_madt(void) } } +#ifdef CONFIG_ACPI_SLEEP +int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend; +#else +int (*acpi_suspend_lowlevel)(void); +#endif + int __init acpi_boot_init(void) { /* diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index a752679b6e53..95f353c67089 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -263,3 +263,15 @@ void output_smpboot_defines(void) OFFSET(CPU_BOOT_TINFO, secondary_data, thread_info); BLANK(); } + +#ifdef CONFIG_HIBERNATION +void output_pbe_defines(void) +{ + COMMENT(" Linux struct pbe offsets. "); + OFFSET(PBE_ADDRESS, pbe, address); + OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address); + OFFSET(PBE_NEXT, pbe, next); + DEFINE(PBE_SIZE, sizeof(struct pbe)); + BLANK(); +} +#endif diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index d22de0669b4b..fa6dbd369e20 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -14,6 +14,7 @@ #include #include #include +#include static void machine_hang(void) { @@ -49,6 +50,7 @@ void machine_power_off(void) preempt_disable(); smp_send_stop(); #endif + enable_pci_wakeup(); pm_power_off(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index bbae18d8a39d..b585dd369f52 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -494,6 +494,10 @@ static void __init arch_mem_init(char **cmdline_p) dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + /* Reserve for hibernation. */ + memblock_reserve(__pa_symbol(&__nosave_begin), + __pa_symbol(&__nosave_end) - __pa_symbol(&__nosave_begin)); + memblock_dump_all(); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index cd86575dcedc..074b25621607 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -120,12 +120,17 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_timeval; +static long init_offset __nosavedata; + +void save_counter(void) +{ + init_offset = drdtime(); +} void sync_counter(void) { /* Ensure counter begin at 0 */ - csr_writeq(-init_timeval, LOONGARCH_CSR_CNTC); + csr_writeq(init_offset, LOONGARCH_CSR_CNTC); } int constant_clockevent_init(void) @@ -218,7 +223,7 @@ void __init time_init(void) else const_clock_freq = calc_const_freq(); - init_timeval = drdtime() - csr_readq(LOONGARCH_CSR_CNTC); + init_offset = -(drdtime() - csr_readq(LOONGARCH_CSR_CNTC)); constant_clockevent_init(); constant_clocksource_init(); diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile index 3e54460d8d00..073f9e8da893 100644 --- a/arch/loongarch/loongson64/Makefile +++ b/arch/loongarch/loongson64/Makefile @@ -2,7 +2,7 @@ # All Loongson based systems # -obj-y += setup.o init.o env.o reset.o irq.o mem.o dma.o rtc.o +obj-y += setup.o init.o env.o reset.o irq.o mem.o dma.o rtc.o platform.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/loongson64/platform.c b/arch/loongarch/loongson64/platform.c new file mode 100644 index 000000000000..d2655bd49dc9 --- /dev/null +++ b/arch/loongarch/loongson64/platform.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct platform_device loongson3_cpufreq_device = { + .name = "loongson3_cpufreq", + .id = -1, +}; + +static int __init loongson_cpufreq_init(void) +{ + return platform_device_register(&loongson3_cpufreq_device); +} + +arch_initcall(loongson_cpufreq_init); + +static void enable_sci(void) +{ + u16 value; + value = readw(LS7A_PM1_CNT_REG); + value |= 1; + writew(value, LS7A_PM1_CNT_REG); +} + +static int __init loongson3_acpi_suspend_init(void) +{ +#ifdef CONFIG_ACPI + acpi_status status; + unsigned long long suspend_addr = 0; + + if (acpi_disabled) + return 0; + + enable_sci(); + status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr); + if (ACPI_FAILURE(status) || !suspend_addr) { + pr_err("ACPI S3 is not support!\n"); + return -1; + } + loongson_sysconf.suspend_addr = suspend_addr; +#endif + return 0; +} + +device_initcall(loongson3_acpi_suspend_init); diff --git a/arch/loongarch/loongson64/reset.c b/arch/loongarch/loongson64/reset.c index 9b87260e1ab5..d4b4546ef3f4 100644 --- a/arch/loongarch/loongson64/reset.c +++ b/arch/loongarch/loongson64/reset.c @@ -16,7 +16,6 @@ #include #include #include -#include static void loongson_restart(void) { diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile new file mode 100644 index 000000000000..08a200357c4e --- /dev/null +++ b/arch/loongarch/power/Makefile @@ -0,0 +1,4 @@ +obj-y += common.o + +obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o diff --git a/arch/loongarch/power/common.c b/arch/loongarch/power/common.c new file mode 100644 index 000000000000..5adc6e86bbec --- /dev/null +++ b/arch/loongarch/power/common.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include + +void enable_gpe_wakeup(void) +{ + struct list_head *node, *next; + u32 data = 0; + + list_for_each_safe(node, next, &acpi_wakeup_device_list) { + struct acpi_device *dev = + container_of(node, struct acpi_device, wakeup_list); + + if (!dev->wakeup.flags.valid + || ACPI_STATE_S3 > (u32) dev->wakeup.sleep_state + || !(device_may_wakeup(&dev->dev) + || dev->wakeup.prepare_count)) + continue; + + data |= (1 << dev->wakeup.gpe_number); + } + writel(data, LS7A_GPE0_ENA_REG); +} + +void enable_pci_wakeup(void) +{ + u16 value; + int pci_wake_enabled; + + pci_wake_enabled = !(readw(LS7A_PM1_ENA_REG) & ACPI_PCI_WAKE_STATUS); + if (pci_wake_enabled) { + value = readw(LS7A_PM1_ENA_REG); + value &= (~ACPI_PCI_WAKE_STATUS); + writew(value, LS7A_PM1_ENA_REG); + } +} diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c new file mode 100644 index 000000000000..217724b15213 --- /dev/null +++ b/arch/loongarch/power/hibernate.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include + +static u64 saved_crmd; +static u64 saved_prmd; +static u64 saved_euen; +static u64 saved_ecfg; +struct pt_regs saved_regs; + +void save_processor_state(void) +{ + saved_crmd = csr_readl(LOONGARCH_CSR_CRMD); + saved_prmd = csr_readl(LOONGARCH_CSR_PRMD); + saved_euen = csr_readl(LOONGARCH_CSR_EUEN); + saved_ecfg = csr_readl(LOONGARCH_CSR_ECFG); + + if (is_fpu_owner()) + save_fp(current); +} + +void restore_processor_state(void) +{ + csr_writel(saved_crmd, LOONGARCH_CSR_CRMD); + csr_writel(saved_prmd, LOONGARCH_CSR_PRMD); + csr_writel(saved_euen, LOONGARCH_CSR_EUEN); + csr_writel(saved_ecfg, LOONGARCH_CSR_ECFG); + + if (is_fpu_owner()) + restore_fp(current); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +extern int swsusp_asm_suspend(void); + +int swsusp_arch_suspend(void) +{ + enable_pci_wakeup(); + return swsusp_asm_suspend(); +} + +extern int swsusp_asm_resume(void); + +int swsusp_arch_resume(void) +{ + /* Avoid TLB mismatch during and after kernel resume */ + local_flush_tlb_all(); + return swsusp_asm_resume(); +} diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S new file mode 100644 index 000000000000..955608d3592a --- /dev/null +++ b/arch/loongarch/power/hibernate_asm.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hibernation support specific for loongarch - temporary page tables + * + * Licensed under the GPLv2 + * + * Copyright (C) 2009 Lemote Inc. + * Author: Hu Hongbing + * Wu Zhangjin + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include + +.text +SYM_FUNC_START(swsusp_asm_suspend) + la t0, saved_regs + PTR_S ra, t0, PT_R1 + PTR_S sp, t0, PT_R3 + PTR_S fp, t0, PT_R22 + PTR_S tp, t0, PT_R2 + PTR_S s0, t0, PT_R23 + PTR_S s1, t0, PT_R24 + PTR_S s2, t0, PT_R25 + PTR_S s3, t0, PT_R26 + PTR_S s4, t0, PT_R27 + PTR_S s5, t0, PT_R28 + PTR_S s6, t0, PT_R29 + PTR_S s7, t0, PT_R30 + PTR_S s8, t0, PT_R31 + b swsusp_save +SYM_FUNC_END(swsusp_asm_suspend) + +SYM_FUNC_START(swsusp_asm_resume) + la t0, restore_pblist + PTR_L t0, t0, 0 +0: + PTR_L t1, t0, PBE_ADDRESS /* source */ + PTR_L t2, t0, PBE_ORIG_ADDRESS /* destination */ + PTR_LI t3, _PAGE_SIZE + PTR_ADDU t3, t3, t1 +1: + REG_L t8, t1, 0 + REG_S t8, t2, 0 + PTR_ADDIU t1, t1, SZREG + PTR_ADDIU t2, t2, SZREG + bne t1, t3, 1b + PTR_L t0, t0, PBE_NEXT + bnez t0, 0b + la t0, saved_regs + PTR_L ra, t0, PT_R1 + PTR_L sp, t0, PT_R3 + PTR_L fp, t0, PT_R22 + PTR_L tp, t0, PT_R2 + PTR_L s0, t0, PT_R23 + PTR_L s1, t0, PT_R24 + PTR_L s2, t0, PT_R25 + PTR_L s3, t0, PT_R26 + PTR_L s4, t0, PT_R27 + PTR_L s5, t0, PT_R28 + PTR_L s6, t0, PT_R29 + PTR_L s7, t0, PT_R30 + PTR_L s8, t0, PT_R31 + PTR_LI v0, 0x0 + jirl zero, ra, 0 +SYM_FUNC_END(swsusp_asm_resume) diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c new file mode 100644 index 000000000000..8f312c4cc21c --- /dev/null +++ b/arch/loongarch/power/suspend.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson-specific suspend support + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include + +#include +#include +#include +#include + +#include + +u32 loongarch_nr_nodes; +u64 loongarch_suspend_addr; +u32 loongarch_pcache_ways; +u32 loongarch_scache_ways; +u32 loongarch_pcache_sets; +u32 loongarch_scache_sets; +u32 loongarch_pcache_linesz; +u32 loongarch_scache_linesz; + +struct saved_registers { + u32 ecfg; + u32 euen; + u64 pgd; + u64 kpgd; + u32 pwctl0; + u32 pwctl1; +}; +static struct saved_registers saved_regs; + +static void arch_common_suspend(void) +{ + save_counter(); + saved_regs.pgd = csr_readq(LOONGARCH_CSR_PGDL); + saved_regs.kpgd = csr_readq(LOONGARCH_CSR_PGDH); + saved_regs.pwctl0 = csr_readl(LOONGARCH_CSR_PWCTL0); + saved_regs.pwctl1 = csr_readl(LOONGARCH_CSR_PWCTL1); + saved_regs.ecfg = csr_readl(LOONGARCH_CSR_ECFG); + saved_regs.euen = csr_readl(LOONGARCH_CSR_EUEN); + + loongarch_nr_nodes = loongson_sysconf.nr_nodes; + loongarch_suspend_addr = loongson_sysconf.suspend_addr; + loongarch_pcache_ways = cpu_data[0].dcache.ways; + loongarch_scache_ways = cpu_data[0].scache.ways; + loongarch_pcache_sets = cpu_data[0].dcache.sets; + loongarch_scache_sets = cpu_data[0].scache.sets; + loongarch_pcache_linesz = cpu_data[0].dcache.linesz; + loongarch_scache_linesz = cpu_data[0].scache.linesz; +} + +static void arch_common_resume(void) +{ + sync_counter(); + local_flush_tlb_all(); + csr_writeq(per_cpu_offset(0), PERCPU_BASE_KS); + csr_writeq(eentry, LOONGARCH_CSR_EENTRY); + csr_writeq(eentry, LOONGARCH_CSR_MERRENTRY); + csr_writeq(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + + csr_writeq(saved_regs.pgd, LOONGARCH_CSR_PGDL); + csr_writeq(saved_regs.kpgd, LOONGARCH_CSR_PGDH); + csr_writel(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0); + csr_writel(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); + csr_writel(saved_regs.ecfg, LOONGARCH_CSR_ECFG); + csr_writel(saved_regs.euen, LOONGARCH_CSR_EUEN); +} + +int loongarch_acpi_suspend(void) +{ + arch_common_suspend(); + + enable_gpe_wakeup(); + enable_pci_wakeup(); + + /* processor specific suspend */ + loongarch_suspend_enter(); + + arch_common_resume(); + + return 0; +} diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S new file mode 100644 index 000000000000..03f77bdfc75e --- /dev/null +++ b/arch/loongarch/power/suspend_asm.S @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Sleep helper for Loongson-3 sleep mode. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Author: Huacai Chen + */ + +#include +#include +#include +#include +#include + + .extern loongarch_nr_nodes + .extern loongarch_suspend_addr + .extern loongarch_pcache_ways + .extern loongarch_pcache_sets + .extern loongarch_pcache_linesz + .extern loongarch_scache_ways + .extern loongarch_scache_sets + .extern loongarch_scache_linesz + + .text + .align 5 + +/* preparatory stuff */ +.macro SETUP_SLEEP + addi.d sp, sp, -PT_SIZE + st.d $r1, sp, PT_R1 + st.d $r2, sp, PT_R2 + st.d $r3, sp, PT_R3 + st.d $r4, sp, PT_R4 + st.d $r5, sp, PT_R5 + st.d $r6, sp, PT_R6 + st.d $r7, sp, PT_R7 + st.d $r8, sp, PT_R8 + st.d $r9, sp, PT_R9 + st.d $r10, sp, PT_R10 + st.d $r11, sp, PT_R11 + st.d $r20, sp, PT_R20 + st.d $r21, sp, PT_R21 + st.d $r22, sp, PT_R22 + st.d $r23, sp, PT_R23 + st.d $r24, sp, PT_R24 + st.d $r25, sp, PT_R25 + st.d $r26, sp, PT_R26 + st.d $r27, sp, PT_R27 + st.d $r28, sp, PT_R28 + st.d $r29, sp, PT_R29 + st.d $r30, sp, PT_R30 + st.d $r31, sp, PT_R31 + + la t0, acpi_saved_sp + st.d sp, t0, 0 +.endm + +/* Sleep code for Loongson-3 */ +SYM_CODE_START(loongarch_suspend_enter) + SETUP_SLEEP + + /* a0:address a1:L1_sets a2:L1_ways a3:L1_linesize */ + li.d a0, CSR_DMW0_INIT + la t0, loongarch_pcache_sets + ld.w a1, t0, 0 + la t0, loongarch_pcache_ways + ld.w a2, t0, 0 + la t0, loongarch_pcache_linesz + ld.w a3, t0, 0 +flushL1: + move t0, a2 +1: cacop 8, a0, 0 + cacop 9, a0, 0 + addi.d a0, a0, 1 + addi.w t0, t0, -1 + bnez t0, 1b + sub.d a0, a0, a2 + add.d a0, a0, a3 + addi.w a1, a1, -1 + bnez a1, flushL1 + + /* a0:nr_nodes a1:address a2:L2_sets a3:L2_ways t8:L2_linesize */ + la t0, loongarch_nr_nodes + ld.w a0, t0, 0 + li.d a1, CSR_DMW0_INIT + la t0, loongarch_scache_ways + ld.w a3, t0, 0 + la t0, loongarch_scache_linesz + ld.w t8, t0, 0 +flushL2_all: + la t0, loongarch_scache_sets + ld.w a2, t0, 0 + li.d t2, 0x100000000000 +flushL2_node: + move t0, a3 +1: cacop 0xb, a1, 0 + addi.d a1, a1, 1 + addi.w t0, t0, -1 + bnez t0, 1b + sub.d a1, a1, a3 + add.d a1, a1, t8 + addi.w a2, a2, -1 + bnez a2, flushL2_node + add.d a1, a1, t2 + addi.w a0, a0, -1 + bnez a0, flushL2_all + + /* Pass RA and SP to BIOS */ + addi.d a1, sp, 0 + la a0, loongarch_wakeup_start + la t0, loongarch_suspend_addr + ld.d t0, t0, 0 /* Call BIOS's STR sleep routine */ + jr t0 + nop +SYM_CODE_END(loongarch_suspend_enter) + +.macro SETUP_WAKEUP + ld.d $r1, sp, PT_R1 + ld.d $r2, sp, PT_R2 + ld.d $r3, sp, PT_R3 + ld.d $r4, sp, PT_R4 + ld.d $r5, sp, PT_R5 + ld.d $r6, sp, PT_R6 + ld.d $r7, sp, PT_R7 + ld.d $r8, sp, PT_R8 + ld.d $r9, sp, PT_R9 + ld.d $r10, sp, PT_R10 + ld.d $r11, sp, PT_R11 + ld.d $r20, sp, PT_R20 + ld.d $r21, sp, PT_R21 + ld.d $r22, sp, PT_R22 + ld.d $r23, sp, PT_R23 + ld.d $r24, sp, PT_R24 + ld.d $r25, sp, PT_R25 + ld.d $r26, sp, PT_R26 + ld.d $r27, sp, PT_R27 + ld.d $r28, sp, PT_R28 + ld.d $r29, sp, PT_R29 + ld.d $r30, sp, PT_R30 + ld.d $r31, sp, PT_R31 +.endm + + /* This is where we return upon wakeup. + * Reload all of the registers and return. + */ +SYM_CODE_START(loongarch_wakeup_start) + la t0, acpi_saved_sp + ld.d sp, t0, 0 + SETUP_WAKEUP + addi.d sp, sp, PT_SIZE + jr ra +SYM_CODE_END(loongarch_wakeup_start) -- Gitee From 7ac4cb10343ea05078d043e08ac784ff5cd39877 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 35/59] LoongArch: Add Loongson-3 default config file Change-Id: I79ed0f9512063aad0ac15efa68a43d691333d2ce Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 + arch/loongarch/configs/loongson3_defconfig | 782 +++++++++++++++++++++ 2 files changed, 784 insertions(+) create mode 100644 arch/loongarch/configs/loongson3_defconfig diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index bcb9d808fe4f..54cd8cc48321 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -7,6 +7,8 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs +KBUILD_DEFCONFIG := loongson3_defconfig + # # Select the object file format to substitute into the linker script. # diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig new file mode 100644 index 000000000000..1c91dd4193a9 --- /dev/null +++ b/arch/loongarch/configs/loongson3_defconfig @@ -0,0 +1,782 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZMA=y +CONFIG_SYSVIPC=y +CONFIG_LOONGARCH=y +CONFIG_MACH_LOONGSON64=y +CONFIG_CPU_LOONGSON64=y +CONFIG_64BIT=y +CONFIG_DMI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_NUMA_BALANCING=y +CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BPF_SYSCALL=y +CONFIG_EXPERT=y +CONFIG_USERFAULTFD=y +CONFIG_PERF_EVENTS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PAGE_SIZE_16KB=y +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y +CONFIG_NUMA=y +CONFIG_EFI=y +CONFIG_SMP=y +CONFIG_NR_CPUS=64 +CONFIG_HZ_250=y +CONFIG_KEXEC=y +CONFIG_HIBERNATION=y +CONFIG_ACPI=y +CONFIG_ACPI_SPCR_TABLE=y +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_TAD=y +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_IPMI=m +CONFIG_ACPI_PCI_SLOT=y +CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_EFI_CAPSULE_LOADER=m +CONFIG_EFI_TEST=m +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IOSCHED_BFQ=y +CONFIG_BFQ_GROUP_IOSCHED=y +CONFIG_BINFMT_MISC=m +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_FRONTSWAP=y +CONFIG_ZSWAP=y +CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y +CONFIG_ZPOOL=y +CONFIG_ZBUD=y +CONFIG_Z3FOLD=y +CONFIG_ZSMALLOC=m +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_INET_ESP=m +CONFIG_INET_UDP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BBR=m +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_MROUTE=y +CONFIG_NETWORK_PHY_TIMESTAMPING=y +CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_LOG_NETDEV=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_CONNLIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REJECT=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_HASH=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_OSF=m +CONFIG_NFT_TPROXY=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LED=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_NFCT=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_DUP_IPV4=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_SYNPROXY=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_SRH=m +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_SYNPROXY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BPFILTER=y +CONFIG_IP_SCTP=m +CONFIG_RDS=y +CONFIG_L2TP=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_VLAN_8021Q_MVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_CGROUP=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_BPF=m +CONFIG_OPENVSWITCH=m +CONFIG_NETLINK_DIAG=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_BT=m +CONFIG_BT_HCIBTUSB=m +# CONFIG_BT_HCIBTUSB_BCM is not set +CONFIG_CFG80211=m +CONFIG_CFG80211_WEXT=y +CONFIG_MAC80211=m +CONFIG_RFKILL=m +CONFIG_RFKILL_INPUT=y +CONFIG_NET_9P=y +CONFIG_CEPH_LIB=m +CONFIG_PCIEPORTBUS=y +CONFIG_HOTPLUG_PCI_PCIE=y +CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set +CONFIG_PCI_IOV=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_SHPC=y +CONFIG_PCCARD=m +CONFIG_YENTA=m +CONFIG_RAPIDIO=y +CONFIG_RAPIDIO_TSI721=y +CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y +CONFIG_RAPIDIO_ENUM_BASIC=m +CONFIG_RAPIDIO_CHMAN=m +CONFIG_RAPIDIO_MPORT_CDEV=m +CONFIG_UEVENT_HELPER=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=m +CONFIG_MTD_BLOCK=m +CONFIG_MTD_CFI=m +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_CFI_INTELEXT=m +CONFIG_MTD_CFI_AMDSTD=m +CONFIG_MTD_CFI_STAA=m +CONFIG_MTD_RAM=m +CONFIG_MTD_ROM=m +CONFIG_PARPORT=y +CONFIG_PARPORT_PC=y +CONFIG_PARPORT_SERIAL=y +CONFIG_PARPORT_PC_FIFO=y +CONFIG_ZRAM=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=y +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=y +CONFIG_EEPROM_AT24=m +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_ATTRS=m +CONFIG_SCSI_SAS_ATA=y +CONFIG_ISCSI_TCP=m +CONFIG_SCSI_MVSAS=y +# CONFIG_SCSI_MVSAS_DEBUG is not set +CONFIG_SCSI_MVSAS_TASKLET=y +CONFIG_SCSI_MVUMI=y +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=y +CONFIG_MEGARAID_MAILBOX=y +CONFIG_MEGARAID_LEGACY=y +CONFIG_MEGARAID_SAS=y +CONFIG_SCSI_MPT2SAS=y +CONFIG_LIBFC=m +CONFIG_LIBFCOE=m +CONFIG_FCOE=m +CONFIG_SCSI_QLOGIC_1280=m +CONFIG_SCSI_QLA_FC=m +CONFIG_TCM_QLA2XXX=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_PATA_ATIIXP=y +CONFIG_PATA_PCMCIA=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_MULTIPATH=m +CONFIG_BCACHE=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_CACHE=m +CONFIG_DM_WRITECACHE=m +CONFIG_DM_MIRROR=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_TARGET_CORE=m +CONFIG_TCM_IBLOCK=m +CONFIG_TCM_FILEIO=m +CONFIG_TCM_PSCSI=m +CONFIG_TCM_USER2=m +CONFIG_LOOPBACK_TARGET=m +CONFIG_ISCSI_TARGET=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=y +CONFIG_WIREGUARD=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_IPVLAN=m +CONFIG_VXLAN=y +CONFIG_RIONET=m +CONFIG_TUN=m +CONFIG_VETH=m +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +CONFIG_BNX2=y +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set +CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T3=m +CONFIG_CHELSIO_T4=m +# CONFIG_NET_VENDOR_CIRRUS is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_I825XX is not set +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_IGB=y +CONFIG_IXGB=y +CONFIG_IXGBE=y +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +CONFIG_8139CP=m +CONFIG_8139TOO=m +CONFIG_R8169=y +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_USB_RTL8150=m +CONFIG_USB_RTL8152=m +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +CONFIG_USB_NET_CDC_EEM=m +CONFIG_USB_NET_HUAWEI_CDC_NCM=m +CONFIG_USB_NET_CDC_MBIM=m +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_NET_ZAURUS is not set +CONFIG_ATH9K=m +CONFIG_ATH9K_HTC=m +CONFIG_IWLWIFI=m +CONFIG_IWLDVM=m +CONFIG_IWLMVM=m +CONFIG_IWLWIFI_BCAST_FILTERING=y +CONFIG_HOSTAP=m +CONFIG_MT7601U=m +CONFIG_RT2X00=m +CONFIG_RT2800USB=m +CONFIG_RTL8192CE=m +CONFIG_RTL8192SE=m +CONFIG_RTL8192DE=m +CONFIG_RTL8723AE=m +CONFIG_RTL8723BE=m +CONFIG_RTL8188EE=m +CONFIG_RTL8192EE=m +CONFIG_RTL8821AE=m +CONFIG_RTL8192CU=m +# CONFIG_RTLWIFI_DEBUG is not set +CONFIG_RTL8XXXU=m +CONFIG_ZD1211RW=m +CONFIG_USB_NET_RNDIS_WLAN=m +CONFIG_INPUT_POLLDEV=m +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_KEYBOARD_XTKBD=m +CONFIG_MOUSE_PS2_ELANTECH=y +CONFIG_MOUSE_PS2_SENTELIC=y +CONFIG_MOUSE_SERIAL=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=m +CONFIG_SERIO_SERPORT=m +CONFIG_SERIO_RAW=m +CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=16 +CONFIG_SERIAL_8250_RUNTIME_UARTS=16 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_PRINTER=m +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_SI=m +CONFIG_HW_RANDOM=y +CONFIG_RAW_DRIVER=m +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_PIIX4=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_LOONGSON=y +CONFIG_SENSORS_LM75=m +CONFIG_SENSORS_LM93=m +CONFIG_SENSORS_W83795=m +CONFIG_SENSORS_W83627HF=m +CONFIG_RC_CORE=m +CONFIG_LIRC=y +CONFIG_RC_DECODERS=y +CONFIG_IR_NEC_DECODER=m +CONFIG_IR_RC5_DECODER=m +CONFIG_IR_RC6_DECODER=m +CONFIG_IR_JVC_DECODER=m +CONFIG_IR_SONY_DECODER=m +CONFIG_IR_SANYO_DECODER=m +CONFIG_IR_SHARP_DECODER=m +CONFIG_IR_MCE_KBD_DECODER=m +CONFIG_IR_XMP_DECODER=m +CONFIG_IR_IMON_DECODER=m +CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_USB_SUPPORT=y +CONFIG_USB_VIDEO_CLASS=m +CONFIG_MEDIA_PCI_SUPPORT=y +CONFIG_VIDEO_BT848=m +CONFIG_DVB_BT8XX=m +CONFIG_DRM=y +CONFIG_DRM_RADEON=m +CONFIG_DRM_RADEON_USERPTR=y +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +CONFIG_DRM_AMDGPU_USERPTR=y +CONFIG_DRM_AST=y +CONFIG_FB_EFI=y +CONFIG_FB_RADEON=y +CONFIG_LCD_PLATFORM=m +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_LOGO=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +# CONFIG_SND_ISA is not set +CONFIG_SND_BT87X=m +CONFIG_SND_BT87X_OVERCLOCK=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_INPUT_BEEP=y +CONFIG_SND_HDA_PATCH_LOADER=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_USB_AUDIO=m +CONFIG_HIDRAW=y +CONFIG_UHID=m +CONFIG_HID_A4TECH=m +CONFIG_HID_CHERRY=m +CONFIG_HID_LOGITECH=m +CONFIG_HID_LOGITECH_DJ=m +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MICROSOFT=m +CONFIG_HID_MULTITOUCH=m +CONFIG_HID_SUNPLUS=m +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_OTG=y +CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +CONFIG_USB_STORAGE_REALTEK=m +CONFIG_USB_UAS=m +CONFIG_USB_DWC2=y +CONFIG_USB_DWC2_HOST=y +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_CH341=m +CONFIG_USB_SERIAL_CP210X=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_OPTION=m +CONFIG_USB_GADGET=y +CONFIG_INFINIBAND=m +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_UIO=m +CONFIG_UIO_PDRV_GENIRQ=m +CONFIG_UIO_DMEM_GENIRQ=m +CONFIG_UIO_PCI_GENERIC=m +# CONFIG_VIRTIO_MENU is not set +CONFIG_STAGING=y +CONFIG_COMEDI=m +CONFIG_COMEDI_PCI_DRIVERS=m +CONFIG_COMEDI_8255_PCI=m +CONFIG_COMEDI_ADL_PCI6208=m +CONFIG_COMEDI_ADL_PCI7X3X=m +CONFIG_COMEDI_ADL_PCI8164=m +CONFIG_COMEDI_ADL_PCI9111=m +CONFIG_COMEDI_ADL_PCI9118=m +CONFIG_COMEDI_ADV_PCI1710=m +CONFIG_COMEDI_ADV_PCI1720=m +CONFIG_COMEDI_ADV_PCI1723=m +CONFIG_COMEDI_ADV_PCI1724=m +CONFIG_COMEDI_ADV_PCI1760=m +CONFIG_COMEDI_ADV_PCI_DIO=m +CONFIG_COMEDI_NI_LABPC_PCI=m +CONFIG_COMEDI_NI_PCIDIO=m +CONFIG_COMEDI_NI_PCIMIO=m +CONFIG_R8188EU=m +# CONFIG_88EU_AP_MODE is not set +CONFIG_PM_DEVFREQ=y +CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y +CONFIG_DEVFREQ_GOV_PERFORMANCE=y +CONFIG_DEVFREQ_GOV_POWERSAVE=y +CONFIG_DEVFREQ_GOV_USERSPACE=y +CONFIG_PWM=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_BTRFS_FS=y +CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=y +CONFIG_OVERLAY_FS_INDEX=y +CONFIG_OVERLAY_FS_XINO_AUTO=y +CONFIG_OVERLAY_FS_METACOPY=y +CONFIG_FSCACHE=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=936 +CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=y +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZ4=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_BLOCKLAYOUT=y +CONFIG_CIFS=m +# CONFIG_CIFS_DEBUG is not set +CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_UTF8=y +CONFIG_KEY_DH_OPERATIONS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_APPARMOR=y +CONFIG_SECURITY_YAMA=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_842=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_PRINTK_TIME=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="e1000e.InterruptThrottleRate=4,4,4,4" -- Gitee From 7d2f69e8b91f23c46b7a7ef058875e4616f43598 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 2 May 2021 19:16:12 +0800 Subject: [PATCH 36/59] MAINTAINERS: Add maintainer information for LoongArch Add the maintainer information for the LoongArch (LA or LArch for short) architecture. Change-Id: Ia9515201c479082005cb7c4ac095bb1c1e11ac43 Signed-off-by: Huacai Chen --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 23a23bd94c00..e428b036ef70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10350,6 +10350,14 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git F: drivers/hid/hid-lg-g15.c +LOONGARCH +M: Huacai Chen +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git +F: Documentation/loongson/ +F: arch/loongarch/ +F: drivers/platform/loongarch/ + LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Sathya Prakash M: Sreekanth Reddy -- Gitee From 8b3a0336ddf78ffc5e294f1d760c5cb159d7d71e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 17 Nov 2021 15:16:39 +0800 Subject: [PATCH 37/59] tools/perf: Add basic support for LoongArch Change-Id: I1987545f22b1908ace2a337576272ecb3e19eb95 Signed-off-by: Huacai Chen --- scripts/Makefile | 9 +- .../loongarch/include/uapi/asm/perf_regs.h | 40 +++++++++ .../arch/loongarch/include/uapi/asm/unistd.h | 22 +++++ tools/perf/Makefile.config | 7 ++ tools/perf/arch/loongarch/Build | 1 + tools/perf/arch/loongarch/Makefile | 27 ++++++ .../arch/loongarch/annotate/instructions.c | 45 ++++++++++ .../loongarch/entry/syscalls/mksyscalltbl | 60 +++++++++++++ .../arch/loongarch/include/dwarf-regs-table.h | 27 ++++++ tools/perf/arch/loongarch/include/perf_regs.h | 88 +++++++++++++++++++ tools/perf/arch/loongarch/util/Build | 4 + tools/perf/arch/loongarch/util/dwarf-regs.c | 33 +++++++ tools/perf/arch/loongarch/util/perf_regs.c | 6 ++ .../arch/loongarch/util/unwind-libunwind.c | 82 +++++++++++++++++ tools/perf/util/annotate.c | 8 ++ tools/perf/util/dwarf-regs.c | 7 ++ tools/perf/util/env.c | 2 + tools/perf/util/genelf.h | 3 + tools/perf/util/syscalltbl.c | 4 + tools/scripts/Makefile.arch | 12 ++- 20 files changed, 485 insertions(+), 2 deletions(-) create mode 100644 tools/arch/loongarch/include/uapi/asm/perf_regs.h create mode 100644 tools/arch/loongarch/include/uapi/asm/unistd.h create mode 100644 tools/perf/arch/loongarch/Build create mode 100644 tools/perf/arch/loongarch/Makefile create mode 100644 tools/perf/arch/loongarch/annotate/instructions.c create mode 100755 tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl create mode 100644 tools/perf/arch/loongarch/include/dwarf-regs-table.h create mode 100644 tools/perf/arch/loongarch/include/perf_regs.h create mode 100644 tools/perf/arch/loongarch/util/Build create mode 100644 tools/perf/arch/loongarch/util/dwarf-regs.c create mode 100644 tools/perf/arch/loongarch/util/perf_regs.c create mode 100644 tools/perf/arch/loongarch/util/unwind-libunwind.c diff --git a/scripts/Makefile b/scripts/Makefile index 9adb6d247818..5d89c2ee2748 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -24,10 +24,17 @@ HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) ifdef CONFIG_UNWINDER_ORC +# Additional ARCH settings for x86 ifeq ($(ARCH),x86_64) ARCH := x86 endif -HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include + +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch64) +ARCH := loongarch +endif + +HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(ARCH)/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED HOSTLDLIBS_sorttable = -lpthread endif diff --git a/tools/arch/loongarch/include/uapi/asm/perf_regs.h b/tools/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..9943d418e01d --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX = PERF_REG_LOONGARCH_R31 + 1, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..d3666a55f7a6 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 99266f1da1b2..7cda0a8366d4 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -74,6 +74,13 @@ ifeq ($(SRCARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(SRCARCH),loongarch) + NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated + CFLAGS += -I$(OUTPUT)../arch/loongarch/include/uapi + LIBUNWIND_LIBS = -lunwind -lunwind-loongarch +endif + ifeq ($(SRCARCH),riscv) NO_PERF_REGS := 0 endif diff --git a/tools/perf/arch/loongarch/Build b/tools/perf/arch/loongarch/Build new file mode 100644 index 000000000000..e4e5f33c84d8 --- /dev/null +++ b/tools/perf/arch/loongarch/Build @@ -0,0 +1 @@ +perf-y += util/ diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile new file mode 100644 index 000000000000..1229157b09e1 --- /dev/null +++ b/tools/perf/arch/loongarch/Makefile @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif +PERF_HAVE_JITDUMP := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/loongarch/include/generated/asm +header := $(out)/syscalls_64.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, loongarch) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c new file mode 100644 index 000000000000..206e1fca38a4 --- /dev/null +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Perf annotate functions. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +static +struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "beqz", 4) || + !strncmp(name, "bnez", 4) || + !strncmp(name, "beq", 3) || + !strncmp(name, "bne", 3) || + !strncmp(name, "blt", 3) || + !strncmp(name, "bge", 3) || + !strncmp(name, "bltu", 4) || + !strncmp(name, "bgeu", 4) || + !strncmp(name, "bl", 2)) + ops = &call_ops; + else if (!strncmp(name, "jirl", 4)) + ops = &ret_ops; + else if (name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = loongarch__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..86dad5a018b7 --- /dev/null +++ b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl @@ -0,0 +1,60 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright (C) 2020 Loongson Technology Co., Ltd. +# Author(s): Ming Wang + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table_from_c() +{ + local sc nr last_sc + + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` + + { + + cat <<-_EoHEADER + #include + #include "$input" + int main(int argc, char *argv[]) + { + _EoHEADER + + while read sc nr; do + printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);" + last_sc=$nr + done + + printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);" + printf "}\n" + + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - + + $create_table_exe + + rm -f $create_table_exe +} + +create_table() +{ + echo "static const char *syscalltbl_loongarch[] = {" + create_table_from_c + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -n \ + |create_table diff --git a/tools/perf/arch/loongarch/include/dwarf-regs-table.h b/tools/perf/arch/loongarch/include/dwarf-regs-table.h new file mode 100644 index 000000000000..676c54a226a5 --- /dev/null +++ b/tools/perf/arch/loongarch/include/dwarf-regs-table.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +static const char * const loongarch_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", +}; +#endif diff --git a/tools/perf/arch/loongarch/include/perf_regs.h b/tools/perf/arch/loongarch/include/perf_regs.h new file mode 100644 index 000000000000..82d531dcd90f --- /dev/null +++ b/tools/perf/arch/loongarch/include/perf_regs.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +#define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX +#define PERF_REG_IP PERF_REG_LOONGARCH_PC +#define PERF_REG_SP PERF_REG_LOONGARCH_R3 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_LOONGARCH_PC: + return "PC"; + case PERF_REG_LOONGARCH_R1: + return "$1"; + case PERF_REG_LOONGARCH_R2: + return "$2"; + case PERF_REG_LOONGARCH_R3: + return "$3"; + case PERF_REG_LOONGARCH_R4: + return "$4"; + case PERF_REG_LOONGARCH_R5: + return "$5"; + case PERF_REG_LOONGARCH_R6: + return "$6"; + case PERF_REG_LOONGARCH_R7: + return "$7"; + case PERF_REG_LOONGARCH_R8: + return "$8"; + case PERF_REG_LOONGARCH_R9: + return "$9"; + case PERF_REG_LOONGARCH_R10: + return "$10"; + case PERF_REG_LOONGARCH_R11: + return "$11"; + case PERF_REG_LOONGARCH_R12: + return "$12"; + case PERF_REG_LOONGARCH_R13: + return "$13"; + case PERF_REG_LOONGARCH_R14: + return "$14"; + case PERF_REG_LOONGARCH_R15: + return "$15"; + case PERF_REG_LOONGARCH_R16: + return "$16"; + case PERF_REG_LOONGARCH_R17: + return "$17"; + case PERF_REG_LOONGARCH_R18: + return "$18"; + case PERF_REG_LOONGARCH_R19: + return "$19"; + case PERF_REG_LOONGARCH_R20: + return "$20"; + case PERF_REG_LOONGARCH_R21: + return "$21"; + case PERF_REG_LOONGARCH_R22: + return "$22"; + case PERF_REG_LOONGARCH_R23: + return "$23"; + case PERF_REG_LOONGARCH_R24: + return "$24"; + case PERF_REG_LOONGARCH_R25: + return "$25"; + case PERF_REG_LOONGARCH_R26: + return "$26"; + case PERF_REG_LOONGARCH_R27: + return "$27"; + case PERF_REG_LOONGARCH_R28: + return "$28"; + case PERF_REG_LOONGARCH_R29: + return "$29"; + case PERF_REG_LOONGARCH_R30: + return "$30"; + case PERF_REG_LOONGARCH_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build new file mode 100644 index 000000000000..fab48acf21c5 --- /dev/null +++ b/tools/perf/arch/loongarch/util/Build @@ -0,0 +1,4 @@ +perf-y += perf_regs.o + +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c new file mode 100644 index 000000000000..bbd343e3191f --- /dev/null +++ b/tools/perf/arch/loongarch/util/dwarf-regs.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +static const char *loongarch_gpr_names[32] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", + "$30", "$31" +}; + +const char *get_arch_regstr(unsigned int n) +{ + n %= 32; + return loongarch_gpr_names[n]; +} diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c new file mode 100644 index 000000000000..2833e101a7c6 --- /dev/null +++ b/tools/perf/arch/loongarch/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/loongarch/util/unwind-libunwind.c b/tools/perf/arch/loongarch/util/unwind-libunwind.c new file mode 100644 index 000000000000..abcd9e2c6624 --- /dev/null +++ b/tools/perf/arch/loongarch/util/unwind-libunwind.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_LOONGARCH_R1: + return PERF_REG_LOONGARCH_R1; + case UNW_LOONGARCH_R2: + return PERF_REG_LOONGARCH_R2; + case UNW_LOONGARCH_R3: + return PERF_REG_LOONGARCH_R3; + case UNW_LOONGARCH_R4: + return PERF_REG_LOONGARCH_R4; + case UNW_LOONGARCH_R5: + return PERF_REG_LOONGARCH_R5; + case UNW_LOONGARCH_R6: + return PERF_REG_LOONGARCH_R6; + case UNW_LOONGARCH_R7: + return PERF_REG_LOONGARCH_R7; + case UNW_LOONGARCH_R8: + return PERF_REG_LOONGARCH_R8; + case UNW_LOONGARCH_R9: + return PERF_REG_LOONGARCH_R9; + case UNW_LOONGARCH_R10: + return PERF_REG_LOONGARCH_R10; + case UNW_LOONGARCH_R11: + return PERF_REG_LOONGARCH_R11; + case UNW_LOONGARCH_R12: + return PERF_REG_LOONGARCH_R12; + case UNW_LOONGARCH_R13: + return PERF_REG_LOONGARCH_R13; + case UNW_LOONGARCH_R14: + return PERF_REG_LOONGARCH_R14; + case UNW_LOONGARCH_R15: + return PERF_REG_LOONGARCH_R15; + case UNW_LOONGARCH_R16: + return PERF_REG_LOONGARCH_R16; + case UNW_LOONGARCH_R17: + return PERF_REG_LOONGARCH_R17; + case UNW_LOONGARCH_R18: + return PERF_REG_LOONGARCH_R18; + case UNW_LOONGARCH_R19: + return PERF_REG_LOONGARCH_R19; + case UNW_LOONGARCH_R20: + return PERF_REG_LOONGARCH_R20; + case UNW_LOONGARCH_R21: + return PERF_REG_LOONGARCH_R21; + case UNW_LOONGARCH_R22: + return PERF_REG_LOONGARCH_R22; + case UNW_LOONGARCH_R23: + return PERF_REG_LOONGARCH_R23; + case UNW_LOONGARCH_R24: + return PERF_REG_LOONGARCH_R24; + case UNW_LOONGARCH_R25: + return PERF_REG_LOONGARCH_R25; + case UNW_LOONGARCH_R26: + return PERF_REG_LOONGARCH_R26; + case UNW_LOONGARCH_R27: + return PERF_REG_LOONGARCH_R27; + case UNW_LOONGARCH_R28: + return PERF_REG_LOONGARCH_R28; + case UNW_LOONGARCH_R29: + return PERF_REG_LOONGARCH_R29; + case UNW_LOONGARCH_R30: + return PERF_REG_LOONGARCH_R30; + case UNW_LOONGARCH_R31: + return PERF_REG_LOONGARCH_R31; + case UNW_LOONGARCH_PC: + return PERF_REG_LOONGARCH_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return -EINVAL; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8f188c2de8b..babda09e2dac 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -156,6 +156,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" +#include "arch/loongarch/annotate/instructions.c" static struct arch architectures[] = { { @@ -205,6 +206,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "loongarch", + .init = loongarch__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5aff..43a77aeeb310 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -14,6 +14,10 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -24,6 +28,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/loongarch/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +58,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_LOONGARCH: + return __get_dwarf_regstr(loongarch_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index f243d9acf943..abbab063616f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -347,6 +347,8 @@ static const char *normalize_arch(char *arch) return "mips"; if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) return "sh"; + if (!strncmp(arch, "loongarch", 9)) + return "loongarch"; return arch; } diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d4137559be05..fdeef73cf85f 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__s390x__) #define GEN_ELF_ARCH EM_S390 #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__loongarch__) +#define GEN_ELF_ARCH EM_LOONGARCH +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16..e441f12c7272 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__loongarch__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; +static const char **syscalltbl_native = syscalltbl_loongarch; #endif struct syscall { diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33f..0b2af44c167f 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/loongarch.*/loongarch/) ifndef ARCH ARCH := $(HOSTARCH) @@ -33,6 +34,15 @@ ifeq ($(ARCH),sh64) SRCARCH := sh endif +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch32) + SRCARCH := loongarch +endif + +ifeq ($(ARCH),loongarch64) + SRCARCH := loongarch +endif + LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1 -- Gitee From c11198bde5465264507d97be0ac2ba788aee6e01 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 1 Nov 2021 17:46:27 +0800 Subject: [PATCH 38/59] tools/objtool: Add basic support for LoongArch Change-Id: Iab28e11f9fb5a6ed1bc979dec0c1f9d2a00ddc0b Signed-off-by: Jinyang He Signed-off-by: Youling Tang --- tools/arch/loongarch/include/asm/orc_types.h | 79 ++ .../loongarch/include/uapi/asm/bitfield.h | 17 + tools/arch/loongarch/include/uapi/asm/inst.h | 470 +++++++++ tools/objtool/Makefile | 8 +- tools/objtool/arch.h | 19 +- tools/objtool/arch/loongarch/Build | 3 + tools/objtool/arch/loongarch/decode.c | 426 ++++++++ .../objtool/arch/loongarch/include/arch_elf.h | 6 + .../arch/loongarch/include/arch_special.h | 20 + .../objtool/arch/loongarch/include/cfi_regs.h | 42 + tools/objtool/arch/loongarch/orcapi.c | 261 +++++ tools/objtool/arch/loongarch/special.c | 364 +++++++ tools/objtool/arch/x86/Build | 1 + tools/objtool/arch/x86/decode.c | 629 +++++++++++- tools/objtool/arch/x86/orcapi.c | 195 ++++ tools/objtool/arch/x86/special.c | 122 ++- tools/objtool/check.c | 908 +++--------------- tools/objtool/check.h | 47 +- tools/objtool/elf.c | 28 +- tools/objtool/elf.h | 1 + tools/objtool/objtool.c | 1 + tools/objtool/objtool.h | 2 + tools/objtool/orc.h | 12 + tools/objtool/orc_dump.c | 70 +- tools/objtool/orc_gen.c | 123 +-- tools/objtool/special.c | 21 +- tools/objtool/special.h | 17 +- 27 files changed, 2913 insertions(+), 979 deletions(-) create mode 100644 tools/arch/loongarch/include/asm/orc_types.h create mode 100644 tools/arch/loongarch/include/uapi/asm/bitfield.h create mode 100644 tools/arch/loongarch/include/uapi/asm/inst.h create mode 100644 tools/objtool/arch/loongarch/Build create mode 100644 tools/objtool/arch/loongarch/decode.c create mode 100644 tools/objtool/arch/loongarch/include/arch_elf.h create mode 100644 tools/objtool/arch/loongarch/include/arch_special.h create mode 100644 tools/objtool/arch/loongarch/include/cfi_regs.h create mode 100644 tools/objtool/arch/loongarch/orcapi.c create mode 100644 tools/objtool/arch/loongarch/special.c create mode 100644 tools/objtool/arch/x86/orcapi.c create mode 100644 tools/objtool/orc.h diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h new file mode 100644 index 000000000000..be3a095e69e3 --- /dev/null +++ b/tools/arch/loongarch/include/asm/orc_types.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_ORC_TYPES_H +#define _ASM_ORC_TYPES_H + +#include +#include + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and FP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is + * usually based on. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_SP 2 +#define ORC_REG_FP 3 +#define ORC_REG_MAX 15 + +/* + * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the + * caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points + * to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset + * points to the iret return frame. + * + * The UNWIND_HINT macros are used only for the unwind_hint struct. They + * aren't used in struct orc_entry due to size and complexity constraints. + * Objtool converts them to real types when it converts the hints to orc + * entries. + */ +#define ORC_TYPE_CALL 0 +#define ORC_TYPE_REGS 1 +#define ORC_TYPE_REGS_IRET 2 +#define UNWIND_HINT_TYPE_SAVE 3 +#define UNWIND_HINT_TYPE_RESTORE 4 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and FP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + signed short sp_offset; + signed short fp_offset; + signed short ra_offset; + unsigned int sp_reg:4; + unsigned int fp_reg:4; + unsigned int ra_reg:4; + unsigned int type:2; + unsigned int end:1; + unsigned int unused:1; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ORC_TYPES_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/bitfield.h b/tools/arch/loongarch/include/uapi/asm/bitfield.h new file mode 100644 index 000000000000..1bdadee88617 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/bitfield.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef __UAPI_ASM_BITFIELD_H +#define __UAPI_ASM_BITFIELD_H + +/* + * * Damn ... bitfields depend from byteorder :-( + * */ +#define __BITFIELD_FIELD(field, more) \ + more \ + field; + +#endif /* __UAPI_ASM_BITFIELD_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/inst.h b/tools/arch/loongarch/include/uapi/asm/inst.h new file mode 100644 index 000000000000..4682f9ab6dc6 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/inst.h @@ -0,0 +1,470 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Format of an instruction in memory. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _UAPI_ASM_INST_H +#define _UAPI_ASM_INST_H + +#include + +enum reg0_op { + tlbclr_op = 0x19208, gtlbclr_op = 0x19208, + tlbflush_op = 0x19209, gtlbflush_op = 0x19209, + tlbsrch_op = 0x1920a, gtlbsrch_op = 0x1920a, + tlbrd_op = 0x1920b, gtlbrd_op = 0x1920b, + tlbwr_op = 0x1920c, gtlbwr_op = 0x1920c, + tlbfill_op = 0x1920d, gtlbfill_op = 0x1920d, + ertn_op = 0x1920e, +}; + +enum reg0i15_op { + break_op = 0x54, dbcl_op, syscall_op, hypcall_op, + idle_op = 0xc91, dbar_op = 0x70e4, ibar_op, +}; + +enum reg0i26_op { + b_op = 0x14, bl_op, +}; + +enum reg1i20_op { + lu12iw_op = 0xa, lu32id_op, pcaddi_op, pcalau12i_op, + pcaddu12i_op, pcaddu18i_op, +}; + +enum reg1i21_op { + beqz_op = 0x10, bnez_op, bceqz_op, bcnez_op = 0x12, jiscr0_op = 0x12, jiscr1_op = 0x12, +}; + +enum reg2_op { + gr2scr_op = 0x2, scr2gr_op, clow_op, + clzw_op, ctow_op, ctzw_op, clod_op, + clzd_op, ctod_op, ctzd_op, revb2h_op, + revb4h_op, revb2w_op, revbd_op, revh2w_op, + revhd_op, bitrev4b_op, bitrev8b_op, bitrevw_op, + bitrevd_op, extwh_op, extwb_op, rdtimelw_op, + rdtimehw_op, rdtimed_op, cpucfg_op, + iocsrrdb_op = 0x19200, iocsrrdh_op, iocsrrdw_op, iocsrrdd_op, + iocsrwrb_op, iocsrwrh_op, iocsrwrw_op, iocsrwrd_op, + movgr2fcsr_op = 0x4530, movfcsr2gr_op = 0x4532, + movgr2cf_op = 0x4536, movcf2gr_op = 0x4537, +}; + +enum reg2ui3_op { + rotrib_op = 0x261, rcrib_op = 0x281, +}; + +enum reg2ui4_op { + rotrih_op = 0x131, rcrih_op = 0x141, +}; + +enum reg2ui5_op { + slliw_op = 0x81, srliw_op = 0x89, sraiw_op = 0x91, rotriw_op = 0x99, + rcriw_op = 0xa1, +}; + +enum reg2ui6_op { + sllid_op = 0x41, srlid_op = 0x45, sraid_op = 0x49, rotrid_op = 0x4d, + rcrid_op = 0x51, +}; + +enum reg2lsbw_op { + bstrinsw_op = 0x3, bstrpickw_op = 0x3, +}; + +enum reg2lsbd_op { + bstrinsd_op = 0x2, bstrpickd_op = 0x3, +}; + +enum reg2i8_op { + lddir_op = 0x190, ldpte_op, +}; + +enum reg2i8idx1_op { + vstelmd_op = 0x622, +}; + +enum reg2i8idx2_op { + vstelmw_op = 0x312, xvstelmd_op = 0x331, +}; + +enum reg2i8idx3_op { + vstelmh_op = 0x18a, xvstelmw_op = 0x199, +}; + +enum reg2i8idx4_op { + vstelmb_op = 0xc6, xvstelmh_op = 0xcd, +}; + +enum reg2i8idx5_op { + xvstelmb_op = 0x67, +}; + +enum reg2i9_op { + vldrepld_op = 0x602, xvldrepld_op = 0x642, +}; + +enum reg2i10_op { + vldreplw_op = 0x302, xvldreplw_op = 0x322, +}; + +enum reg2i11_op { + vldreplh_op = 0x182, xvldreplh_op = 0x192, +}; + +enum reg2i12_op { + slti_op = 0x8, sltui_op, addiw_op, addid_op, + lu52id_op, andi_op, ori_op, xori_op, + cacop_op = 0x18, xvldreplb_op = 0xca, + ldb_op = 0xa0, ldh_op, ldw_op, ldd_op, stb_op, sth_op, + stw_op, std_op, ldbu_op, ldhu_op, ldwu_op, preld_op, + flds_op, fsts_op, fldd_op, fstd_op, vld_op, vst_op, xvld_op, + xvst_op, ldlw_op = 0xb8, ldrw_op, ldld_op, ldrd_op, stlw_op, + strw_op, stld_op, strd_op, vldreplb_op = 0xc2, +}; + +enum reg2i14_op { + llw_op = 0x20, scw_op, lld_op, scd_op, ldptrw_op, stptrw_op, + ldptrd_op, stptrd_op, +}; + +enum reg2i16_op { + addu16id_op = 0x4, jirl_op = 0x13, beq_op = 0x16, bne_op, blt_op, bge_op, bltu_op, bgeu_op, +}; + +enum reg2csr_op { + csrrd_op = 0x4, csrwr_op = 0x4, csrxchg_op = 0x4, + gcsrrd_op = 0x5, gcsrwr_op = 0x5, gcsrxchg_op = 0x5, +}; + +enum reg3_op { + asrtled_op = 0x2, asrtgtd_op, + addw_op = 0x20, addd_op, subw_op, subd_op, + slt_op, sltu_op, maskeqz_op, masknez_op, + nor_op, and_op, or_op, xor_op, orn_op, + andn_op, sllw_op, srlw_op, sraw_op, slld_op, + srld_op, srad_op, rotrb_op, rotrh_op, + rotrw_op, rotrd_op, mulw_op, mulhw_op, + mulhwu_op, muld_op, mulhd_op, mulhdu_op, + mulwdw_op, mulwdwu_op, divw_op, modw_op, + divwu_op, modwu_op, divd_op, modd_op, + divdu_op, moddu_op, crcwbw_op, + crcwhw_op, crcwww_op, crcwdw_op, crccwbw_op, + crccwhw_op, crccwww_op, crccwdw_op, addu12iw_op, + addu12id_op, + adcb_op = 0x60, adch_op, adcw_op, adcd_op, + sbcb_op, sbch_op, sbcw_op, sbcd_op, + rcrb_op, rcrh_op, rcrw_op, rcrd_op, + ldxb_op = 0x7000, ldxh_op = 0x7008, ldxw_op = 0x7010, ldxd_op = 0x7018, + stxb_op = 0x7020, stxh_op = 0x7028, stxw_op = 0x7030, stxd_op = 0x7038, + ldxbu_op = 0x7040, ldxhu_op = 0x7048, ldxwu_op = 0x7050, + preldx_op = 0x7058, fldxs_op = 0x7060, fldxd_op = 0x7068, + fstxs_op = 0x7070, fstxd_op = 0x7078, vldx_op = 0x7080, + vstx_op = 0x7088, xvldx_op = 0x7090, xvstx_op = 0x7098, + amswapw_op = 0x70c0, amswapd_op, amaddw_op, amaddd_op, amandw_op, + amandd_op, amorw_op, amord_op, amxorw_op, amxord_op, ammaxw_op, + ammaxd_op, amminw_op, ammind_op, ammaxwu_op, ammaxdu_op, + amminwu_op, ammindu_op, amswap_dbw_op, amswap_dbd_op, amadd_dbw_op, + amadd_dbd_op, amand_dbw_op, amand_dbd_op, amor_dbw_op, amor_dbd_op, + amxor_dbw_op, amxor_dbd_op, ammax_dbw_op, ammax_dbd_op, ammin_dbw_op, + ammin_dbd_op, ammax_dbwu_op, ammax_dbdu_op, ammin_dbwu_op, + ammin_dbdu_op, fldgts_op = 0x70e8, fldgtd_op, + fldles_op, fldled_op, fstgts_op, fstgtd_op, fstles_op, fstled_op, + ldgtb_op, ldgth_op, ldgtw_op, ldgtd_op, ldleb_op, ldleh_op, ldlew_op, + ldled_op, stgtb_op, stgth_op, stgtw_op, stgtd_op, stleb_op, stleh_op, + stlew_op, stled_op, +}; + +enum reg3sa2_op { + alslw_op = 0x2, alslwu_op, bytepickw_op, alsld_op = 0x16, + +}; + +enum reg3sa3_op { + bytepickd_op = 0x3, +}; + +struct reg2_format { + __BITFIELD_FIELD(unsigned int opcode : 22, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg2ui3_format { + __BITFIELD_FIELD(unsigned int opcode : 19, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui4_format { + __BITFIELD_FIELD(unsigned int opcode : 18, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui5_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui6_format { + __BITFIELD_FIELD(unsigned int opcode : 16, + __BITFIELD_FIELD(unsigned int simmediate : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2lsbw_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int msbw : 5, + __BITFIELD_FIELD(unsigned int op : 1, + __BITFIELD_FIELD(unsigned int lsbw : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))))) +}; + +struct reg2lsbd_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int msbd : 6, + __BITFIELD_FIELD(unsigned int lsbd : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg3sa2_format { + __BITFIELD_FIELD(unsigned int opcode : 15, + __BITFIELD_FIELD(unsigned int simmediate : 2, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa3_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa4_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg4_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int fa : 5, + __BITFIELD_FIELD(unsigned int fk : 5, + __BITFIELD_FIELD(unsigned int fj : 5, + __BITFIELD_FIELD(unsigned int fd : 5, + ;))))) +}; + +struct reg2i8_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i8idx1_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int idx : 1, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx2_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int idx : 2, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx3_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int idx : 3, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx4_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int idx : 4, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx5_format { + __BITFIELD_FIELD(unsigned int opcode : 9, + __BITFIELD_FIELD(unsigned int idx : 5, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i9_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 9, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i10_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int simmediate : 10, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i11_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int simmediate : 11, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(signed int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2u12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i14_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int simmediate : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i16_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2csr_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int csr : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg1i21_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int simmediate_h : 5, + ;)))) +}; + +struct reg1i20_format { + __BITFIELD_FIELD(unsigned int opcode : 7, + __BITFIELD_FIELD(unsigned int simmediate : 20, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg0i15_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 15, + ;)) +}; + +struct reg0i26_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int simmediate_h : 10, + ;))) +}; + +union loongarch_instruction { + unsigned int word; + unsigned short halfword[2]; + unsigned char byte[4]; + struct reg2_format reg2_format; + struct reg2ui3_format reg2ui3_format; + struct reg2ui4_format reg2ui4_format; + struct reg2ui5_format reg2ui5_format; + struct reg2ui6_format reg2ui6_format; + struct reg2lsbw_format reg2lsbw_format; + struct reg2lsbd_format reg2lsbd_format; + struct reg3_format reg3_format; + struct reg3sa2_format reg3sa2_format; + struct reg3sa3_format reg3sa3_format; + struct reg3sa4_format reg3sa4_format; + struct reg4_format reg4_format; + struct reg2i8_format reg2i8_format; + struct reg2i8idx1_format reg2i8idx1_format; + struct reg2i8idx2_format reg2i8idx2_format; + struct reg2i8idx3_format reg2i8idx3_format; + struct reg2i8idx4_format reg2i8idx4_format; + struct reg2i8idx5_format reg2i8idx5_format; + struct reg2i9_format reg2i9_format; + struct reg2i10_format reg2i10_format; + struct reg2i11_format reg2i11_format; + struct reg2i12_format reg2i12_format; + struct reg2i14_format reg2i14_format; + struct reg2i16_format reg2i16_format; + struct reg2csr_format reg2csr_format; + struct reg1i21_format reg1i21_format; + struct reg1i20_format reg1i20_format; + struct reg0i15_format reg0i15_format; + struct reg0i26_format reg0i26_format; +}; + +#endif /* _UAPI_ASM_INST_H */ diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 5cdb19036d7f..d76161c8165b 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -25,7 +25,7 @@ LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ - -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs @@ -46,6 +46,12 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif +ifeq ($(SRCARCH),loongarch) + CFLAGS += -D__loongarch__ + SUBCMD_CHECK := y + SUBCMD_ORC := y +endif + ifeq ($(SUBCMD_ORC),y) CFLAGS += -DINSN_USE_ORC endif diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 4a84c3081b8e..f990434c0629 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -8,13 +8,27 @@ #include #include -#include "objtool.h" + +#include "builtin.h" #include "cfi.h" +#include "objtool.h" #ifdef INSN_USE_ORC #include #endif +#ifndef R_LARCH_NONE +#define R_LARCH_NONE 0 +#endif + +#ifndef R_LARCH_ADD32 +#define R_LARCH_ADD32 50 +#endif + +#ifndef R_LARCH_SUB32 +#define R_LARCH_SUB32 55 +#endif + enum insn_type { INSN_JUMP_CONDITIONAL, INSN_JUMP_UNCONDITIONAL, @@ -90,4 +104,7 @@ const char *arch_nop_insn(int len); int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +void arch_try_find_call(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn); + #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/loongarch/Build b/tools/objtool/arch/loongarch/Build new file mode 100644 index 000000000000..d89a998db54b --- /dev/null +++ b/tools/objtool/arch/loongarch/Build @@ -0,0 +1,3 @@ +objtool-y += decode.o +objtool-y += special.o +objtool-$(SUBCMD_ORC) += orcapi.o diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c new file mode 100644 index 000000000000..29ca3e63f5d5 --- /dev/null +++ b/tools/objtool/arch/loongarch/decode.c @@ -0,0 +1,426 @@ +/* + * Copyright (C) 2015 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include + +#define unlikely(cond) (cond) +#include +#include + +#include "../../check.h" +#include "../../elf.h" +#include "../../arch.h" +#include "../../warn.h" + +#ifndef LOONGARCH_INSN_SIZE +#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction) +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + +#define to_cfi_reg(reg) (reg) + +#define signex(x, symbol_idx) \ +({ \ + unsigned long ___u64; \ + ___u64 = ((x) & (1UL << symbol_idx)) ? \ + ~((1UL << (symbol_idx + 1)) - 1) | (x) : \ + ((1UL << (symbol_idx + 1)) - 1) & (x); \ + ___u64; \ +}) + +const unsigned int code_nop = 0x03400000; + +static int is_loongarch(const struct elf *elf) +{ + if (elf->ehdr.e_machine == EM_LOONGARCH) + return 1; + + WARN("unexpected ELF machine type %x\n", elf->ehdr.e_machine); + return 0; +} + +bool arch_callee_saved_reg(unsigned char reg) +{ + switch (reg) { + case CFI_S0 ... CFI_S8: + case CFI_FP: + case CFI_RA: + return true; + default: + return false; + } +} + +unsigned long arch_dest_reloc_offset(int addend) +{ + return addend; +} + +unsigned long arch_jump_destination(struct instruction *insn) +{ + return insn->offset + insn->immediate * 4; +} + +#define ADD_OP(op) \ + if (!(op = calloc(1, sizeof(*op)))) \ + return -1; \ + else for (list_add_tail(&op->list, ops_list); op; op = NULL) + +int arch_decode_instruction(const struct elf *elf, const struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, enum insn_type *type, + unsigned long *immediate, + struct list_head *ops_list) +{ + union loongarch_instruction code; + struct stack_op *op = NULL; + + if (!is_loongarch(elf)) + return -1; + + if (maxlen < LOONGARCH_INSN_SIZE) + return 0; + + *len = LOONGARCH_INSN_SIZE; + *type = INSN_OTHER; + *immediate = 0; + + code = *(union loongarch_instruction *)(sec->data->d_buf + offset); + + /* For some where we .fill 0 and we cannot execute it. */ + if (code.word == 0) + *type = INSN_NOP; + + switch (code.reg2i12_format.opcode) { + case addid_op: + if ((code.reg2i12_format.rj == CFI_SP) || (code.reg2i12_format.rd == CFI_SP)) { + /* addi.d reg1,reg2,imm */ + *immediate = signex(code.reg2i12_format.simmediate, 11); + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = to_cfi_reg(code.reg2i12_format.rj); + op->src.offset = *immediate; + op->dest.type = OP_DEST_REG; + op->dest.reg = to_cfi_reg(code.reg2i12_format.rd); + } + } + break; + case std_op: + if ((code.reg2i12_format.rj == CFI_SP)) { + /* st.d reg,sp,imm */ + *immediate = signex(code.reg2i12_format.simmediate, 11); + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = to_cfi_reg(code.reg2i12_format.rd); + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_SP; + op->dest.offset = *immediate; + } + } + break; + case ldd_op: + if ((code.reg2i12_format.rj == CFI_SP)) { + /* ld.d reg,sp,imm */ + *immediate = signex(code.reg2i12_format.simmediate, 11); + ADD_OP(op) { + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_SP; + op->src.offset = *immediate; + op->dest.type = OP_DEST_REG; + op->dest.reg = to_cfi_reg(code.reg2i12_format.rd); + } + } + break; + case andi_op: + if (code.reg2i12_format.simmediate == 0 && + code.reg2i12_format.rj == 0 && + code.reg2i12_format.rd == 0) + /* nop */ + *type = INSN_NOP; + break; + default: + switch (code.reg2i16_format.opcode) { + case jirl_op: + if (code.reg2i16_format.simmediate) { + WARN("unexpected insn type 0x%lx\n", offset); + return -1; + } + if (code.reg2i16_format.rj == CFI_RA && + code.reg2i16_format.rd == 0) + /* jr ra */ + *type = INSN_RETURN; + else if (code.reg2i16_format.rd == CFI_RA) + /* jalr reg */ + *type = INSN_CALL_DYNAMIC; + else if (code.reg2i16_format.rd == 0) + /* jr reg */ + *type = INSN_JUMP_DYNAMIC; + break; + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + *immediate = signex(code.reg2i16_format.simmediate, 15); + *type = INSN_JUMP_CONDITIONAL; + break; + case beqz_op: + case bnez_op: + *immediate = signex(code.reg1i21_format.simmediate_h << 16 | + code.reg1i21_format.simmediate_l, 20); + *type = INSN_JUMP_CONDITIONAL; + break; + case bl_op: + *type = INSN_CALL; + break; + case b_op: + *type = INSN_JUMP_UNCONDITIONAL; + break; + default: + if (code.reg2i14_format.opcode == stptrd_op && + code.reg2i14_format.rj == CFI_SP) { + /* stptr.d reg,sp,imm */ + *immediate = signex(code.reg2i14_format.simmediate, 13); + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = to_cfi_reg(code.reg2i14_format.rd); + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_SP; + op->dest.offset = *immediate; + } + } else if (code.reg2i14_format.opcode == ldptrd_op && + code.reg2i14_format.rj == CFI_SP) { + /* ldptr.d reg,sp,imm */ + *immediate = signex(code.reg2i14_format.simmediate, 13); + ADD_OP(op) { + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_SP; + op->src.offset = *immediate; + op->dest.type = OP_DEST_REG; + op->dest.reg = to_cfi_reg(code.reg2i14_format.rd); + } + } else if (code.reg0i15_format.opcode == break_op) { + /* break */ + *type = INSN_BUG; + } else if (code.reg2_format.opcode == ertn_op) { + /* ertn */ + *type = INSN_RETURN; + } + break; + } + break; + } + + return 0; +} + +void arch_initial_func_cfi_state(struct cfi_init_state *state) +{ + int i; + + for (i = 0; i < CFI_NUM_REGS; i++) { + state->regs[i].base = CFI_UNDEFINED; + state->regs[i].offset = 0; + } + + /* initial CFA (call frame address) */ + state->cfa.base = CFI_SP; + state->cfa.offset = 0; +} + +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) +{ + struct cfi_reg *cfa = &insn->cfi.cfa; + + switch (sp_reg) { + case ORC_REG_UNDEFINED: + cfa->base = CFI_UNDEFINED; + break; + case ORC_REG_SP: + cfa->base = CFI_SP; + break; + case ORC_REG_FP: + cfa->base = CFI_FP; + break; + default: + return -1; + } + + return 0; +} + +bool arch_has_valid_stack_frame(struct insn_state *state) +{ + return true; +} + +static int update_cfi_state_regs(struct instruction *insn, + struct cfi_state *cfi, + struct stack_op *op) +{ + struct cfi_reg *cfa = &cfi->cfa; + + if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) + return 0; + + /* addi.d sp, sp, imm */ + if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && + op->dest.reg == CFI_SP && op->src.reg == CFI_SP) + cfa->offset -= op->src.offset; + + return 0; +} + +static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, + struct stack_op *op) +{ + struct cfi_reg *cfa = &cfi->cfa; + struct cfi_reg *regs = cfi->regs; + + if (cfa->base == CFI_UNDEFINED) { + if (insn->func) { + WARN_FUNC("undefined stack state", insn->sec, insn->offset); + return -1; + } + return 0; + } + + if (cfi->type == ORC_TYPE_REGS) + return update_cfi_state_regs(insn, cfi, op); + + switch (op->dest.type) { + case OP_DEST_REG: + switch (op->src.type) { + case OP_SRC_ADD: + if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { + /* addi.d sp, sp, imm */ + cfi->stack_size -= op->src.offset; + if (cfa->base == CFI_SP) + cfa->offset -= op->src.offset; + } else if (op->dest.reg == CFI_FP && op->src.reg == CFI_SP) { + /* addi.d fp, sp, imm */ + if (cfa->base == CFI_SP && cfa->offset == op->src.offset) { + cfa->base = CFI_FP; + cfa->offset = 0; + } + } else if (op->dest.reg == CFI_SP && op->src.reg == CFI_FP) { + /* addi.d sp, fp, imm */ + if (cfa->base == CFI_FP && cfa->offset == 0) { + cfa->base = CFI_SP; + cfa->offset = -op->src.offset; + } + } + break; + case OP_SRC_REG_INDIRECT: + /* ld.d _reg, sp, imm */ + if (op->src.reg == CFI_SP && + op->src.offset == (regs[op->dest.reg].offset + cfi->stack_size)) { + restore_reg(cfi, op->dest.reg); + /* Gcc may not restore sp, we adjust it directly. */ + if (cfa->base == CFI_FP && cfa->offset == 0) { + cfa->base = CFI_SP; + cfa->offset = cfi->stack_size; + } + } + break; + default: + break; + } + break; + case OP_DEST_REG_INDIRECT: + if (op->src.type == OP_SRC_REG) { + /* st.d _reg, sp, imm */ + save_reg(cfi, op->src.reg, CFI_CFA, op->dest.offset - cfi->stack_size); + } + break; + default: + WARN_FUNC("unknown stack-related instruction", insn->sec, insn->offset); + return -1; + } + + return 0; +} + +int arch_handle_insn_ops(struct instruction *insn, struct insn_state *state) +{ + struct stack_op *op; + + list_for_each_entry(op, &insn->stack_ops, list) { + int res; + + res = update_cfi_state(insn, &state->cfi, op); + if (res) + return res; + } + + return 0; +} + +#ifndef R_LARCH_MARK_LA +#define R_LARCH_MARK_LA 20 +#endif + +void arch_try_find_call(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn) +{ + int count = 0, reg; + struct instruction *orbit; + struct reloc *reloc; + union loongarch_instruction code; + + if (list_empty(p_orbit_list)) { + WARN_FUNC("BUG: why do I have no insn track?", insn->sec, insn->offset); + return; + } + + if (func_last_orbit(p_orbit_list) != insn) { + WARN_FUNC("BUG: insn is not expected.", insn->sec, insn->offset); + return; + } + + code = *(union loongarch_instruction *)(insn->sec->data->d_buf + insn->offset); + if (code.reg2i16_format.opcode != jirl_op || code.reg2i16_format.rd != CFI_RA) { + WARN_FUNC("BUG: first insn track is not expected.", insn->sec, insn->offset); + return; + } + + reg = code.reg2i16_format.rj; + list_for_each_entry(orbit, p_orbit_list, orbit_node) { + count++; + /* jirl, la.abs (== lu12i.w, ori, lu32i.d, lu52i.d) */ + if (count == 5) + break; + } + + if (count != 5) + return; + + code = *(union loongarch_instruction *)(orbit->sec->data->d_buf + orbit->offset); + if (code.reg1i20_format.opcode != lu12iw_op || code.reg1i20_format.rd != reg) + return; + + reloc = find_reloc_by_dest(file->elf, orbit->sec, orbit->offset); + if (!reloc) + return; + + if (reloc->type != R_LARCH_MARK_LA) + return; + + insn->type = INSN_CALL; + insn->call_dest = reloc->sym; +} + +const char *arch_nop_insn(int len) +{ + if (len != LOONGARCH_INSN_SIZE) + WARN("invalid NOP size: %d\n", len); + + return (const char *)&code_nop; +} diff --git a/tools/objtool/arch/loongarch/include/arch_elf.h b/tools/objtool/arch/loongarch/include/arch_elf.h new file mode 100644 index 000000000000..e99d937cd831 --- /dev/null +++ b/tools/objtool/arch/loongarch/include/arch_elf.h @@ -0,0 +1,6 @@ +#ifndef _OBJTOOL_ARCH_ELF +#define _OBJTOOL_ARCH_ELF + +#define R_NONE R_LARCH_NONE + +#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/arch/loongarch/include/arch_special.h b/tools/objtool/arch/loongarch/include/arch_special.h new file mode 100644 index 000000000000..96213d32e243 --- /dev/null +++ b/tools/objtool/arch/loongarch/include/arch_special.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LOONGARCH_ARCH_SPECIAL_H +#define _LOONGARCH_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 16 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 8 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 + +#define ALT_ENTRY_SIZE 12 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _LOONGARCH_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/loongarch/include/cfi_regs.h b/tools/objtool/arch/loongarch/include/cfi_regs.h new file mode 100644 index 000000000000..09bc76271b3b --- /dev/null +++ b/tools/objtool/arch/loongarch/include/cfi_regs.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_CFI_REGS_H +#define _OBJTOOL_CFI_REGS_H + +#define CFI_ZERO 0 +#define CFI_RA 1 +#define CFI_TP 2 +#define CFI_SP 3 +#define CFI_A0 4 +#define CFI_A1 5 +#define CFI_A2 6 +#define CFI_A3 7 +#define CFI_A4 8 +#define CFI_A5 9 +#define CFI_A6 10 +#define CFI_A7 11 +#define CFI_T0 12 +#define CFI_T1 13 +#define CFI_T2 14 +#define CFI_T3 15 +#define CFI_T4 16 +#define CFI_T5 17 +#define CFI_T6 18 +#define CFI_T7 19 +#define CFI_T8 20 +/* #define CFI_X 21 */ +#define CFI_FP 22 +#define CFI_S0 23 +#define CFI_S1 24 +#define CFI_S2 25 +#define CFI_S3 26 +#define CFI_S4 27 +#define CFI_S5 28 +#define CFI_S6 29 +#define CFI_S7 30 +#define CFI_S8 31 +#define CFI_NUM_REGS 32 + +#define CFI_BP CFI_FP + +#endif /* _OBJTOOL_CFI_REGS_H */ diff --git a/tools/objtool/arch/loongarch/orcapi.c b/tools/objtool/arch/loongarch/orcapi.c new file mode 100644 index 000000000000..6acda62cd74e --- /dev/null +++ b/tools/objtool/arch/loongarch/orcapi.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include + +#include "../../orc.h" +#include "../../warn.h" + +#ifndef R_LARCH_SOP_PUSH_PCREL +#define R_LARCH_SOP_PUSH_PCREL 22 +#endif + +#ifndef R_LARCH_SOP_PUSH_DUP +#define R_LARCH_SOP_PUSH_DUP 24 +#endif + +#ifndef R_LARCH_SOP_PUSH_ABSOLUTE +#define R_LARCH_SOP_PUSH_ABSOLUTE 23 +#endif + +#ifndef R_LARCH_SOP_SR +#define R_LARCH_SOP_SR 34 +#endif + +#ifndef R_LARCH_SOP_SL +#define R_LARCH_SOP_SL 33 +#endif + +#ifndef R_LARCH_SOP_SUB +#define R_LARCH_SOP_SUB 32 +#endif + +#ifndef R_LARCH_SOP_POP_32_U +#define R_LARCH_SOP_POP_32_U 46 +#endif + +static struct orc_entry empty = { + .sp_reg = ORC_REG_UNDEFINED, + .type = ORC_TYPE_CALL, +}; + +int create_orc(struct objtool_file *file) +{ + struct instruction *insn; + + for_each_insn(file, insn) { + struct orc_entry *orc = &insn->orc; + struct cfi_reg *cfa = &insn->cfi.cfa; + struct cfi_reg *fp = &insn->cfi.regs[CFI_FP]; + struct cfi_reg *ra = &insn->cfi.regs[CFI_RA]; + + orc->end = insn->cfi.end; + + if (cfa->base == CFI_UNDEFINED) { + orc->sp_reg = ORC_REG_UNDEFINED; + continue; + } + + switch (cfa->base) { + case CFI_SP: + orc->sp_reg = ORC_REG_SP; + break; + case CFI_FP: + orc->sp_reg = ORC_REG_FP; + break; + default: + WARN_FUNC("unknown CFA base reg %d", + insn->sec, insn->offset, cfa->base); + return -1; + } + + switch (fp->base) { + case CFI_UNDEFINED: + orc->fp_reg = ORC_REG_UNDEFINED; + orc->fp_offset = 0; + break; + case CFI_CFA: + orc->fp_reg = ORC_REG_PREV_SP; + orc->fp_offset = fp->offset; + break; + default: + WARN_FUNC("unknown FP base reg %d", + insn->sec, insn->offset, fp->base); + } + + switch (ra->base) { + case CFI_UNDEFINED: + orc->ra_reg = ORC_REG_UNDEFINED; + orc->ra_offset = 0; + break; + case CFI_CFA: + orc->ra_reg = ORC_REG_PREV_SP; + orc->ra_offset = ra->offset; + break; + default: + WARN_FUNC("unknown RA base reg %d", + insn->sec, insn->offset, ra->base); + } + + orc->sp_offset = cfa->offset; + orc->type = insn->cfi.type; + } + + return 0; +} + +int arch_create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, + unsigned int idx, struct section *insn_sec, + unsigned long insn_off, struct orc_entry *o) +{ + struct orc_entry *orc; + struct reloc *reloc, *p, *c; + + /* populate ORC data */ + orc = (struct orc_entry *)u_sec->data->d_buf + idx; + memcpy(orc, o, sizeof(*orc)); + + /* populate reloc for ip + * Since loongarch64 has no PC32 at present, fake as follows, + * R_LARCH_SOP_PUSH_PCREL + * R_LARCH_SOP_PUSH_DUP + * R_LARCH_SOP_PUSH_ABSOLUTE + * R_LARCH_SOP_SR + * R_LARCH_SOP_PUSH_ABSOLUTE + * R_LARCH_SOP_SL + * R_LARCH_SOP_SUB + * R_LARCH_SOP_POP_32_U + */ + reloc = malloc(sizeof(*reloc) * 8); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc) * 8); + + insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx", + insn_off); + return -1; + } + + p = reloc; + c = reloc; + + p->type = R_LARCH_SOP_PUSH_PCREL; + p->offset = idx * sizeof(int); + p->sec = ip_relocsec; + + c++; + c->type = R_LARCH_SOP_PUSH_DUP; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_PUSH_ABSOLUTE; + c->addend = 0x20; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_SR; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_PUSH_ABSOLUTE; + c->addend = 0x20; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_SL; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_SUB; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + p = c; + + c++; + c->type = R_LARCH_SOP_POP_32_U; + c->offset = idx * sizeof(int); + c->sec = ip_relocsec; + p->next = c; + + elf_add_reloc(elf, reloc); + + return 0; +} + +int arch_create_orc_entry_empty(struct elf *elf, struct section *u_sec, + struct section *ip_relasec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off) +{ + return arch_create_orc_entry(elf, u_sec, ip_relasec, idx, + insn_sec, insn_off, &empty); +} + +static const char *reg_name(unsigned int reg) +{ + switch (reg) { + case ORC_REG_SP: + return "sp"; + case ORC_REG_FP: + return "fp"; + case ORC_REG_PREV_SP: + return "prevsp"; + default: + return "?"; + } +} + +static const char *orc_type_name(unsigned int type) +{ + switch (type) { + case ORC_TYPE_CALL: + return "call"; + case ORC_TYPE_REGS: + return "regs"; + default: + return "?"; + } +} + +static void print_reg(unsigned int reg, int offset) +{ + if (reg == ORC_REG_UNDEFINED) + printf(" (und) "); + else + printf("%s + %3d", reg_name(reg), offset); +} + +void arch_print_reg(struct orc_entry orc) +{ + printf(" sp:"); + + print_reg(orc.sp_reg, orc.sp_offset); + + printf(" fp:"); + + print_reg(orc.fp_reg, orc.fp_offset); + + printf(" ra:"); + print_reg(orc.ra_reg, orc.ra_offset); + + printf(" type:%s end:%d\n", + orc_type_name(orc.type), orc.end); +} diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c new file mode 100644 index 000000000000..22f2aeccaf97 --- /dev/null +++ b/tools/objtool/arch/loongarch/special.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +#include "../../special.h" +#include "../../builtin.h" +#include "../../check.h" +#include "../../warn.h" + +void arch_handle_alternative(unsigned short feature, struct special_alt *alt) +{ +} + +void arch_mark_func_jump_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn; + struct reloc *reloc; + union loongarch_instruction code; + + func_for_each_insn(file, func, insn) { + code = *(union loongarch_instruction *)(insn->sec->data->d_buf + insn->offset); + + if (code.reg1i20_format.opcode != pcaddu12i_op) + continue; + + reloc = find_reloc_by_dest(file->elf, insn->sec, insn->offset); + + if (!reloc || (strncmp(reloc->sym->name, ".L", 2) && + strncmp(reloc->sym->name, "jumptable", 9))) + continue; + + reloc = find_reloc_by_dest(file->elf, reloc->sym->sec, reloc->sym->offset); + + if (!reloc || strncmp(reloc->sym->name, ".L", 2)) + continue; + + reloc->jump_table_start = true; + } +} + +static int dynamic_add_jump_table(struct objtool_file *file, + struct instruction *insn, struct reloc *table) +{ + struct reloc *reloc = table; + struct instruction *dest_insn; + struct alternative *alt; + struct symbol *pfunc = insn->func->pfunc; + + list_for_each_entry_from(reloc, &table->sec->reloc_list, list) { + + /* Check for the end of the table: */ + if (reloc != table && reloc->jump_table_start) + break; + + /* Detect function pointers from contiguous objects: */ + if (reloc->sym->sec == pfunc->sec && + reloc->sym->offset == pfunc->offset) + break; + + dest_insn = find_insn(file, reloc->sym->sec, reloc->sym->offset); + if (!dest_insn) + break; + + /* Make sure the destination is in the same function: */ + if (!dest_insn->func || dest_insn->func->pfunc != pfunc) + break; + + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + return -1; + } + + alt->insn = dest_insn; + list_add_tail(&alt->list, &insn->alts); + } + + return 0; +} + +/* + * Switch jump could be thought of as having two stagestwo stages: + * Stage1: la (la.pcrel or la.abs) + * 1) lui12i.w ori lu32i.d lu52i.d + * 2) pcaddu12i addi.d + * Stage2: ld (addr + idx) + * 1) alsl.d ldptr.d + * 2) (alsl.d) ldx.d + * 3) add.d ldptr.d + * ... + */ + +int arch_dynamic_add_jump_table_alts(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn) +{ + int dest_reg; + int stage = 0, instack = 0; + int i = 0; + int lu52id = 0, ldptrd = 0; + struct instruction *orbit, *n; + struct reloc *reloc, *rodata_reloc; + union loongarch_instruction code; + + if (list_empty(p_orbit_list)) { + WARN_FUNC("BUG: why do I have no insn track?", insn->sec, insn->offset); + return 1; + } + + if (func_last_orbit(p_orbit_list) != insn) { + WARN_FUNC("BUG: insn is not expected.", insn->sec, insn->offset); + return 1; + } + + n = list_next_entry(insn, orbit_node); + code = *(union loongarch_instruction *)(n->sec->data->d_buf + n->offset); + if (code.reg2i12_format.opcode == addid_op && + code.reg2i12_format.rj == CFI_SP && + code.reg2i12_format.rd == CFI_SP) { + insn->type = INSN_RETURN; + return 0; + } + + code = *(union loongarch_instruction *)(insn->sec->data->d_buf + insn->offset); + if (code.reg2i16_format.opcode != jirl_op) { + WARN_FUNC("BUG: first insn track is not expected.", insn->sec, insn->offset); + return 1; + } + + dest_reg = code.reg2i16_format.rj; + + list_for_each_entry(orbit, p_orbit_list, orbit_node) { + code = *(union loongarch_instruction *)(orbit->sec->data->d_buf + orbit->offset); + switch (i) { + case 1: + if (code.reg2i12_format.opcode != lu52id_op || + code.reg2i12_format.rj != dest_reg || + code.reg2i12_format.rd != dest_reg) + i = 5; + break; + case 2: + if (code.reg1i20_format.opcode != lu32id_op || + code.reg1i20_format.rd != dest_reg) + i = 5; + break; + case 3: + if (code.reg2i12_format.opcode != ori_op || + code.reg2i12_format.rj != dest_reg || + code.reg2i12_format.rd != dest_reg) + i = 5; + break; + case 4: + if (code.reg1i20_format.opcode != lu12iw_op || + code.reg1i20_format.rd != dest_reg) { + i = 5; + break; + } + reloc = find_reloc_by_dest(file->elf, orbit->sec, orbit->offset); + if (!reloc) { + WARN_FUNC("BUG: lu12i.w has no reloc.", orbit->sec, orbit->offset); + return 1; + } + n = find_insn(file, reloc->sym->sec, reloc->sym->offset); + if (!n) { + /* Global symbol */ + insn->type = INSN_RETURN; + break; + } + insn->jump_dest = n; + return 0; + default: + /* i == 0, skip jirl insn. */ + break; + } + if (i == 5) + break; + + i++; + } + + list_for_each_entry(orbit, p_orbit_list, orbit_node) { + i++; + code = *(union loongarch_instruction *)(orbit->sec->data->d_buf + orbit->offset); + if (instack) { + if ((code.reg2i12_format.opcode == std_op || code.reg2i12_format.opcode == stptrd_op) && + code.reg2i12_format.simmediate == instack && code.reg2i12_format.rj == 3) { + dest_reg = code.reg2i12_format.rd; + instack = 0; + } + continue; + } + if ((code.reg2i12_format.opcode == ldd_op || code.reg2i12_format.opcode == ldptrd_op) && + code.reg2i12_format.rd == dest_reg && code.reg2i12_format.rj == 3) { + instack = code.reg2i12_format.simmediate; + continue; + } + + switch (stage) { + case 0: + /* alsl.d */ + if (code.reg3sa2_format.opcode == alsld_op && + code.reg3sa2_format.rd == dest_reg) { + dest_reg = code.reg3sa2_format.rk; + stage = 1; + } + /* ldptr.d */ + if (code.reg2i14_format.opcode == ldptrd_op && + code.reg2i14_format.rd == dest_reg) { + dest_reg = code.reg2i14_format.rj; + ldptrd = i; + } + /* ldx.d */ + if (code.reg3_format.opcode == ldxd_op && + code.reg3_format.rd == dest_reg) { + dest_reg = code.reg3_format.rj; + stage = 1; + } + /* add.d */ + if (ldptrd && + code.reg3_format.opcode == addd_op && + code.reg3_format.rd == dest_reg) { + dest_reg = code.reg3_format.rj; + stage = 1; + } + /* ~ lu52i.d */ + if (code.reg2i12_format.opcode == lu52id_op && + code.reg2i12_format.rj == dest_reg && + code.reg2i12_format.rd == dest_reg) { + insn->type = INSN_RETURN; + return 0; + } + /* ~ addi.d */ + if (code.reg2i12_format.opcode == addid_op && + code.reg2i12_format.rd == dest_reg) { + insn->type = INSN_RETURN; + return 0; + } + break; + case 1: + /* pcaddu12i */ + if (!lu52id && + code.reg1i20_format.opcode == pcaddu12i_op && + code.reg1i20_format.rd == dest_reg) { + reloc = find_reloc_by_dest(file->elf, orbit->sec, orbit->offset); + if (!reloc) { + WARN_FUNC("BUG: pcaddu12i has no reloc.", orbit->sec, orbit->offset); + return 1; + } + + if (!strncmp(reloc->sym->name, ".L", 2) || + !strncmp(reloc->sym->name, "jumptable", 9)) { + rodata_reloc = find_reloc_by_dest(file->elf, + reloc->sym->sec, reloc->sym->offset); + + if (!rodata_reloc) { + WARN_FUNC("BUG: rodata has no reloc.", orbit->sec, orbit->offset); + return 1; + } + + insn->jump_table = rodata_reloc; + + return dynamic_add_jump_table(file, insn, insn->jump_table); + } else if (reloc->sym->bind == STB_LOCAL && reloc->sym->type == STT_OBJECT) { + insn->type = INSN_RETURN; + return 0; + } else if (reloc->sym->bind == STB_GLOBAL) { + insn->type = INSN_RETURN; + return 0; + } + + WARN("BUG here"); + return 1; + } + if (lu52id && + code.reg1i20_format.opcode == lu12iw_op && + code.reg1i20_format.rd == dest_reg) { + reloc = find_reloc_by_dest(file->elf, orbit->sec, orbit->offset); + if (!reloc) { + WARN_FUNC("BUG: lu12i.w has no reloc.", orbit->sec, orbit->offset); + return 1; + } + + if (!strncmp(reloc->sym->name, ".L", 2) || + !strncmp(reloc->sym->name, "jumptable", 9)) { + rodata_reloc = find_reloc_by_dest(file->elf, + reloc->sym->sec, reloc->sym->offset); + + if (!rodata_reloc) { + WARN_FUNC("BUG: rodata has no reloc.", orbit->sec, orbit->offset); + return 1; + } + + insn->jump_table = rodata_reloc; + + return dynamic_add_jump_table(file, insn, insn->jump_table); + } else if (reloc->sym->bind == STB_LOCAL && reloc->sym->type == STT_OBJECT) { + insn->type = INSN_RETURN; + return 0; + } else if (reloc->sym->bind == STB_GLOBAL) { + insn->type = INSN_RETURN; + return 0; + } + + WARN("BUG here"); + return 1; + } + /* addi.d */ + if (code.reg2i12_format.opcode == addid_op && + code.reg2i12_format.rd == dest_reg) { + dest_reg = code.reg2i12_format.rj; + } + /* lu52i.d */ + if (code.reg2i12_format.opcode == lu52id_op && + code.reg2i12_format.rj == dest_reg && + code.reg2i12_format.rd == dest_reg) { + lu52id = i; + } + break; + default: + WARN_FUNC("BUG: why am I here?", orbit->sec, orbit->offset); + return 1; + } + } + + insn->type = INSN_RETURN; + return 0; +} + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, struct reloc *reloc) +{ + return insn->offset == special_alt->new_off && + (insn->type == INSN_CALL || is_jump(insn)); +} + +/* + * Unfortunately these have to be hard coded because the noreturn + * attribute isn't provided in ELF data. + */ +bool arch_is_noreturn(struct symbol *func) +{ + int i; + static const char * const arch_noreturns[] = { + "__stack_chk_fail", + "panic", + "do_exit", + "do_task_dead", + "__module_put_and_exit", + "complete_and_exit", + "__reiserfs_panic", + "lbug_with_loc", + "fortify_panic", + "usercopy_abort", + "kunit_try_catch_throw", + "die", + }; + + for (i = 0; i < ARRAY_SIZE(arch_noreturns); i++) + if (!strcmp(func->name, arch_noreturns[i])) + return true; + + return false; +} diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build index 9f7869b5c5e0..0d1751512e00 100644 --- a/tools/objtool/arch/x86/Build +++ b/tools/objtool/arch/x86/Build @@ -1,5 +1,6 @@ objtool-y += special.o objtool-y += decode.o +objtool-$(SUBCMD_ORC) += orcapi.o inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = ../arch/x86/lib/x86-opcode-map.txt diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..e566c31bdbd5 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -8,6 +8,9 @@ #define unlikely(cond) (cond) #include +#include +#include + #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" @@ -15,7 +18,6 @@ #include "../../elf.h" #include "../../arch.h" #include "../../warn.h" -#include static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -41,6 +43,436 @@ static int is_x86_64(const struct elf *elf) } } +static int update_cfi_state_regs(struct instruction *insn, + struct cfi_state *cfi, + struct stack_op *op) +{ + struct cfi_reg *cfa = &cfi->cfa; + + if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) + return 0; + + /* push */ + if (op->dest.type == OP_DEST_PUSH || op->dest.type == OP_DEST_PUSHF) + cfa->offset += 8; + + /* pop */ + if (op->src.type == OP_SRC_POP || op->src.type == OP_SRC_POPF) + cfa->offset -= 8; + + /* add immediate to sp */ + if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && + op->dest.reg == CFI_SP && op->src.reg == CFI_SP) + cfa->offset -= op->src.offset; + + return 0; +} + +/* + * A note about DRAP stack alignment: + * + * GCC has the concept of a DRAP register, which is used to help keep track of + * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP + * register. The typical DRAP pattern is: + * + * 4c 8d 54 24 08 lea 0x8(%rsp),%r10 + * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp + * 41 ff 72 f8 pushq -0x8(%r10) + * 55 push %rbp + * 48 89 e5 mov %rsp,%rbp + * (more pushes) + * 41 52 push %r10 + * ... + * 41 5a pop %r10 + * (more pops) + * 5d pop %rbp + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * There are some variations in the epilogues, like: + * + * 5b pop %rbx + * 41 5a pop %r10 + * 41 5c pop %r12 + * 41 5d pop %r13 + * 41 5e pop %r14 + * c9 leaveq + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * and: + * + * 4c 8b 55 e8 mov -0x18(%rbp),%r10 + * 48 8b 5d e0 mov -0x20(%rbp),%rbx + * 4c 8b 65 f0 mov -0x10(%rbp),%r12 + * 4c 8b 6d f8 mov -0x8(%rbp),%r13 + * c9 leaveq + * 49 8d 62 f8 lea -0x8(%r10),%rsp + * c3 retq + * + * Sometimes r13 is used as the DRAP register, in which case it's saved and + * restored beforehand: + * + * 41 55 push %r13 + * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13 + * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp + * ... + * 49 8d 65 f0 lea -0x10(%r13),%rsp + * 41 5d pop %r13 + * c3 retq + */ +static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, + struct stack_op *op) +{ + struct cfi_reg *cfa = &cfi->cfa; + struct cfi_reg *regs = cfi->regs; + + /* stack operations don't make sense with an undefined CFA */ + if (cfa->base == CFI_UNDEFINED) { + if (insn->func) { + WARN_FUNC("undefined stack state", insn->sec, insn->offset); + return -1; + } + return 0; + } + + if (cfi->type == UNWIND_HINT_TYPE_REGS || + cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL) + return update_cfi_state_regs(insn, cfi, op); + + switch (op->dest.type) { + + case OP_DEST_REG: + switch (op->src.type) { + + case OP_SRC_REG: + if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && + cfa->base == CFI_SP && + regs[CFI_BP].base == CFI_CFA && + regs[CFI_BP].offset == -cfa->offset) { + + /* mov %rsp, %rbp */ + cfa->base = op->dest.reg; + cfi->bp_scratch = false; + } + + else if (op->src.reg == CFI_SP && + op->dest.reg == CFI_BP && cfi->drap) { + + /* drap: mov %rsp, %rbp */ + regs[CFI_BP].base = CFI_BP; + regs[CFI_BP].offset = -cfi->stack_size; + cfi->bp_scratch = false; + } + + else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { + + /* + * mov %rsp, %reg + * + * This is needed for the rare case where GCC + * does: + * + * mov %rsp, %rax + * ... + * mov %rax, %rsp + */ + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = -cfi->stack_size; + } + + else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && + cfa->base == CFI_BP) { + + /* + * mov %rbp, %rsp + * + * Restore the original stack pointer (Clang). + */ + cfi->stack_size = -cfi->regs[CFI_BP].offset; + } + + else if (op->dest.reg == cfa->base) { + + /* mov %reg, %rsp */ + if (cfa->base == CFI_SP && + cfi->vals[op->src.reg].base == CFI_CFA) { + + /* + * This is needed for the rare case + * where GCC does something dumb like: + * + * lea 0x8(%rsp), %rcx + * ... + * mov %rcx, %rsp + */ + cfa->offset = -cfi->vals[op->src.reg].offset; + cfi->stack_size = cfa->offset; + + } else { + cfa->base = CFI_UNDEFINED; + cfa->offset = 0; + } + } + + break; + + case OP_SRC_ADD: + if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { + + /* add imm, %rsp */ + cfi->stack_size -= op->src.offset; + if (cfa->base == CFI_SP) + cfa->offset -= op->src.offset; + break; + } + + if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + + /* lea disp(%rbp), %rsp */ + cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); + break; + } + + if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { + + /* drap: lea disp(%rsp), %drap */ + cfi->drap_reg = op->dest.reg; + + /* + * lea disp(%rsp), %reg + * + * This is needed for the rare case where GCC + * does something dumb like: + * + * lea 0x8(%rsp), %rcx + * ... + * mov %rcx, %rsp + */ + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = \ + -cfi->stack_size + op->src.offset; + + break; + } + + if (cfi->drap && op->dest.reg == CFI_SP && + op->src.reg == cfi->drap_reg) { + + /* drap: lea disp(%drap), %rsp */ + cfa->base = CFI_SP; + cfa->offset = cfi->stack_size = -op->src.offset; + cfi->drap_reg = CFI_UNDEFINED; + cfi->drap = false; + break; + } + + if (op->dest.reg == cfi->cfa.base) { + WARN_FUNC("unsupported stack register modification", + insn->sec, insn->offset); + return -1; + } + + break; + + case OP_SRC_AND: + if (op->dest.reg != CFI_SP || + (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || + (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { + WARN_FUNC("unsupported stack pointer realignment", + insn->sec, insn->offset); + return -1; + } + + if (cfi->drap_reg != CFI_UNDEFINED) { + /* drap: and imm, %rsp */ + cfa->base = cfi->drap_reg; + cfa->offset = cfi->stack_size = 0; + cfi->drap = true; + } + + /* + * Older versions of GCC (4.8ish) realign the stack + * without DRAP, with a frame pointer. + */ + + break; + + case OP_SRC_POP: + case OP_SRC_POPF: + if (!cfi->drap && op->dest.reg == cfa->base) { + + /* pop %rbp */ + cfa->base = CFI_SP; + } + + if (cfi->drap && cfa->base == CFI_BP_INDIRECT && + op->dest.reg == cfi->drap_reg && + cfi->drap_offset == -cfi->stack_size) { + + /* drap: pop %drap */ + cfa->base = cfi->drap_reg; + cfa->offset = 0; + cfi->drap_offset = -1; + + } else if (regs[op->dest.reg].offset == -cfi->stack_size) { + + /* pop %reg */ + restore_reg(cfi, op->dest.reg); + } + + cfi->stack_size -= 8; + if (cfa->base == CFI_SP) + cfa->offset -= 8; + + break; + + case OP_SRC_REG_INDIRECT: + if (cfi->drap && op->src.reg == CFI_BP && + op->src.offset == cfi->drap_offset) { + + /* drap: mov disp(%rbp), %drap */ + cfa->base = cfi->drap_reg; + cfa->offset = 0; + cfi->drap_offset = -1; + } + + if (cfi->drap && op->src.reg == CFI_BP && + op->src.offset == regs[op->dest.reg].offset) { + + /* drap: mov disp(%rbp), %reg */ + restore_reg(cfi, op->dest.reg); + + } else if (op->src.reg == cfa->base && + op->src.offset == regs[op->dest.reg].offset + cfa->offset) { + + /* mov disp(%rbp), %reg */ + /* mov disp(%rsp), %reg */ + restore_reg(cfi, op->dest.reg); + } + + break; + + default: + WARN_FUNC("unknown stack-related instruction", + insn->sec, insn->offset); + return -1; + } + + break; + + case OP_DEST_PUSH: + case OP_DEST_PUSHF: + cfi->stack_size += 8; + if (cfa->base == CFI_SP) + cfa->offset += 8; + + if (op->src.type != OP_SRC_REG) + break; + + if (cfi->drap) { + if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { + + /* drap: push %drap */ + cfa->base = CFI_BP_INDIRECT; + cfa->offset = -cfi->stack_size; + + /* save drap so we know when to restore it */ + cfi->drap_offset = -cfi->stack_size; + + } else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) { + + /* drap: push %rbp */ + cfi->stack_size = 0; + + } else { + + /* drap: push %reg */ + save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size); + } + + } else { + + /* push %reg */ + save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size); + } + + /* detect when asm code uses rbp as a scratch register */ + if (!no_fp && insn->func && op->src.reg == CFI_BP && + cfa->base != CFI_BP) + cfi->bp_scratch = true; + break; + + case OP_DEST_REG_INDIRECT: + + if (cfi->drap) { + if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { + + /* drap: mov %drap, disp(%rbp) */ + cfa->base = CFI_BP_INDIRECT; + cfa->offset = op->dest.offset; + + /* save drap offset so we know when to restore it */ + cfi->drap_offset = op->dest.offset; + } else { + + /* drap: mov reg, disp(%rbp) */ + save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset); + } + + } else if (op->dest.reg == cfa->base) { + + /* mov reg, disp(%rbp) */ + /* mov reg, disp(%rsp) */ + save_reg(cfi, op->src.reg, CFI_CFA, + op->dest.offset - cfi->cfa.offset); + } + + break; + + case OP_DEST_LEAVE: + if ((!cfi->drap && cfa->base != CFI_BP) || + (cfi->drap && cfa->base != cfi->drap_reg)) { + WARN_FUNC("leave instruction with modified stack frame", + insn->sec, insn->offset); + return -1; + } + + /* leave (mov %rbp, %rsp; pop %rbp) */ + + cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; + restore_reg(cfi, CFI_BP); + + if (!cfi->drap) { + cfa->base = CFI_SP; + cfa->offset -= 8; + } + + break; + + case OP_DEST_MEM: + if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { + WARN_FUNC("unknown stack-related memory operation", + insn->sec, insn->offset); + return -1; + } + + /* pop mem */ + cfi->stack_size -= 8; + if (cfa->base == CFI_SP) + cfa->offset -= 8; + + break; + + default: + WARN_FUNC("unknown stack-related instruction", + insn->sec, insn->offset); + return -1; + } + + return 0; +} + bool arch_callee_saved_reg(unsigned char reg) { switch (reg) { @@ -567,6 +999,196 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) state->regs[16].offset = -8; } +bool arch_has_valid_stack_frame(struct insn_state *state) +{ + struct cfi_state *cfi = &state->cfi; + + if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA && + cfi->regs[CFI_BP].offset == -16) + return true; + + if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP) + return true; + + return false; +} + +int arch_handle_insn_ops(struct instruction *insn, struct insn_state *state) +{ + struct stack_op *op; + + list_for_each_entry(op, &insn->stack_ops, list) { + struct cfi_state old_cfi = state->cfi; + int res; + + res = update_cfi_state(insn, &state->cfi, op); + if (res) + return res; + + if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { + WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); + return -1; + } + + if (op->dest.type == OP_DEST_PUSHF) { + if (!state->uaccess_stack) { + state->uaccess_stack = 1; + } else if (state->uaccess_stack >> 31) { + WARN_FUNC("PUSHF stack exhausted", + insn->sec, insn->offset); + return 1; + } + state->uaccess_stack <<= 1; + state->uaccess_stack |= state->uaccess; + } + + if (op->src.type == OP_SRC_POPF) { + if (state->uaccess_stack) { + state->uaccess = state->uaccess_stack & 1; + state->uaccess_stack >>= 1; + if (state->uaccess_stack == 1) + state->uaccess_stack = 0; + } + } + } + + return 0; +} + +int arch_create_static_call_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + struct static_call_site *site; + struct instruction *insn; + struct symbol *key_sym; + char *key_name, *tmp; + int idx; + + sec = find_section_by_name(file->elf, ".static_call_sites"); + if (sec) { + INIT_LIST_HEAD(&file->static_call_list); + WARN("file already has .static_call_sites section, skipping"); + return 0; + } + + if (list_empty(&file->static_call_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) + idx++; + + sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, + sizeof(struct static_call_site), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->static_call_list, static_call_node) { + + site = (struct static_call_site *)sec->data->d_buf + idx; + memset(site, 0, sizeof(struct static_call_site)); + + /* populate reloc for 'addr' */ + reloc = malloc(sizeof(*reloc)); + + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + + insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); + if (!reloc->sym) { + WARN_FUNC("static call tramp: missing containing symbol", + insn->sec, insn->offset); + return -1; + } + + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + /* find key symbol */ + key_name = strdup(insn->call_dest->name); + if (!key_name) { + perror("strdup"); + return -1; + } + if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, + STATIC_CALL_TRAMP_PREFIX_LEN)) { + WARN("static_call: trampoline name malformed: %s", key_name); + return -1; + } + tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; + memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); + + key_sym = find_symbol_by_name(file->elf, tmp); + if (!key_sym) { + if (!module) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + + /* + * For modules(), the key might not be exported, which + * means the module can make static calls but isn't + * allowed to change them. + * + * In that case we temporarily set the key to be the + * trampoline address. This is fixed up in + * static_call_add_module(). + */ + key_sym = insn->call_dest; + } + free(key_name); + + /* populate reloc for 'key' */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + reloc->sym = key_sym; + reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(struct static_call_site) + 4; + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + +int arch_read_static_call_tramps(struct objtool_file *file) +{ + struct section *sec; + struct symbol *func; + + for_each_sec(file, sec) { + list_for_each_entry(func, &sec->symbol_list, list) { + if (func->bind == STB_GLOBAL && + !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, + strlen(STATIC_CALL_TRAMP_PREFIX_STR))) + func->static_call_tramp = true; + } + } + + return 0; +} + const char *arch_nop_insn(int len) { static const char nops[5][5] = { @@ -620,3 +1242,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) return 0; } + +void arch_try_find_call(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn) +{ +} diff --git a/tools/objtool/arch/x86/orcapi.c b/tools/objtool/arch/x86/orcapi.c new file mode 100644 index 000000000000..018ddb51215d --- /dev/null +++ b/tools/objtool/arch/x86/orcapi.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include + +#include "../../orc.h" +#include "../../warn.h" + +static struct orc_entry empty = { + .sp_reg = ORC_REG_UNDEFINED, + .bp_reg = ORC_REG_UNDEFINED, + .type = UNWIND_HINT_TYPE_CALL, +}; + +int create_orc(struct objtool_file *file) +{ + struct instruction *insn; + + for_each_insn(file, insn) { + struct orc_entry *orc = &insn->orc; + struct cfi_reg *cfa = &insn->cfi.cfa; + struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + + if (!insn->sec->text) + continue; + + orc->end = insn->cfi.end; + + if (cfa->base == CFI_UNDEFINED) { + orc->sp_reg = ORC_REG_UNDEFINED; + continue; + } + + switch (cfa->base) { + case CFI_SP: + orc->sp_reg = ORC_REG_SP; + break; + case CFI_SP_INDIRECT: + orc->sp_reg = ORC_REG_SP_INDIRECT; + break; + case CFI_BP: + orc->sp_reg = ORC_REG_BP; + break; + case CFI_BP_INDIRECT: + orc->sp_reg = ORC_REG_BP_INDIRECT; + break; + case CFI_R10: + orc->sp_reg = ORC_REG_R10; + break; + case CFI_R13: + orc->sp_reg = ORC_REG_R13; + break; + case CFI_DI: + orc->sp_reg = ORC_REG_DI; + break; + case CFI_DX: + orc->sp_reg = ORC_REG_DX; + break; + default: + WARN_FUNC("unknown CFA base reg %d", + insn->sec, insn->offset, cfa->base); + return -1; + } + + switch (bp->base) { + case CFI_UNDEFINED: + orc->bp_reg = ORC_REG_UNDEFINED; + break; + case CFI_CFA: + orc->bp_reg = ORC_REG_PREV_SP; + break; + case CFI_BP: + orc->bp_reg = ORC_REG_BP; + break; + default: + WARN_FUNC("unknown BP base reg %d", + insn->sec, insn->offset, bp->base); + return -1; + } + + orc->sp_offset = cfa->offset; + orc->bp_offset = bp->offset; + orc->type = insn->cfi.type; + } + + return 0; +} + +int arch_create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, + unsigned int idx, struct section *insn_sec, + unsigned long insn_off, struct orc_entry *o) +{ + struct orc_entry *orc; + struct reloc *reloc; + + /* populate ORC data */ + orc = (struct orc_entry *)u_sec->data->d_buf + idx; + memcpy(orc, o, sizeof(*orc)); + + /* populate reloc for ip */ + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + + insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx", + insn_off); + return -1; + } + + reloc->type = R_X86_64_PC32; + reloc->offset = idx * sizeof(int); + reloc->sec = ip_relocsec; + + elf_add_reloc(elf, reloc); + + return 0; +} + +int arch_create_orc_entry_empty(struct elf *elf, struct section *u_sec, + struct section *ip_relasec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off) +{ + return arch_create_orc_entry(elf, u_sec, ip_relasec, idx, + insn_sec, insn_off, &empty); +} + +static const char *reg_name(unsigned int reg) +{ + switch (reg) { + case ORC_REG_PREV_SP: + return "prevsp"; + case ORC_REG_DX: + return "dx"; + case ORC_REG_DI: + return "di"; + case ORC_REG_BP: + return "bp"; + case ORC_REG_SP: + return "sp"; + case ORC_REG_R10: + return "r10"; + case ORC_REG_R13: + return "r13"; + case ORC_REG_BP_INDIRECT: + return "bp(ind)"; + case ORC_REG_SP_INDIRECT: + return "sp(ind)"; + default: + return "?"; + } +} + +static const char *orc_type_name(unsigned int type) +{ + switch (type) { + case UNWIND_HINT_TYPE_CALL: + return "call"; + case UNWIND_HINT_TYPE_REGS: + return "regs"; + case UNWIND_HINT_TYPE_REGS_PARTIAL: + return "regs (partial)"; + default: + return "?"; + } +} + +static void print_reg(unsigned int reg, int offset) +{ + if (reg == ORC_REG_BP_INDIRECT) + printf("(bp%+d)", offset); + else if (reg == ORC_REG_SP_INDIRECT) + printf("(sp%+d)", offset); + else if (reg == ORC_REG_UNDEFINED) + printf("(und)"); + else + printf("%s%+d", reg_name(reg), offset); +} + +void arch_print_reg(struct orc_entry orc) +{ + printf(" sp:"); + + print_reg(orc.sp_reg, orc.sp_offset); + + printf(" bp:"); + + print_reg(orc.bp_reg, orc.bp_offset); + + printf(" type:%s end:%d\n", + orc_type_name(orc.type), orc.end); +} diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 151b13d0a267..ef2f3ecf469c 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -91,7 +91,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ -struct reloc *arch_find_switch_table(struct objtool_file *file, +static struct reloc *find_switch_table(struct objtool_file *file, struct instruction *insn) { struct reloc *text_reloc, *rodata_reloc; @@ -143,3 +143,123 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, return rodata_reloc; } + +int arch_dynamic_add_jump_table_alts(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn) +{ + return 0; +} + +/* + * find_jump_table() - Given a dynamic jump, find the switch jump table + * associated with it. + */ +static struct reloc *find_jump_table(struct objtool_file *file, + struct symbol *func, + struct instruction *insn) +{ + struct reloc *table_reloc; + struct instruction *dest_insn, *orig_insn = insn; + + /* + * Backward search using the @first_jump_src links, these help avoid + * much of the 'in between' code. Which avoids us getting confused by + * it. + */ + for (; + insn && insn->func && insn->func->pfunc == func; + insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { + + if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) + break; + + /* allow small jumps within the range */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && + insn->jump_dest && + (insn->jump_dest->offset <= insn->offset || + insn->jump_dest->offset > orig_insn->offset)) + break; + + table_reloc = find_switch_table(file, insn); + if (!table_reloc) + continue; + dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); + if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) + continue; + + return table_reloc; + } + + return NULL; +} + +/* + * First pass: Mark the head of each jump table so that in the next pass, + * we know when a given jump table ends and the next one starts. + */ +void arch_mark_func_jump_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn, *last = NULL; + struct reloc *reloc; + + func_for_each_insn(file, func, insn) { + if (!last) + last = insn; + + /* + * Store back-pointers for unconditional forward jumps such + * that find_jump_table() can back-track using those and + * avoid some potentially confusing code. + */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && + insn->offset > last->offset && + insn->jump_dest->offset > insn->offset && + !insn->jump_dest->first_jump_src) { + + insn->jump_dest->first_jump_src = insn; + last = insn->jump_dest; + } + + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + + reloc = find_jump_table(file, func, insn); + if (reloc) { + reloc->jump_table_start = true; + insn->jump_table = reloc; + } + } +} + +/* + * Unfortunately these have to be hard coded because the noreturn + * attribute isn't provided in ELF data. + */ +bool arch_is_noreturn(struct symbol *func) +{ + int i; + static const char * const arch_noreturns[] = { + "__stack_chk_fail", + "panic", + "do_exit", + "do_task_dead", + "__module_put_and_exit", + "complete_and_exit", + "__reiserfs_panic", + "lbug_with_loc", + "fortify_panic", + "usercopy_abort", + "machine_real_restart", + "rewind_stack_do_exit", + "kunit_try_catch_throw", + "xen_start_kernel", + "cpu_bringup_and_idle", + }; + + for (i = 0; i < ARRAY_SIZE(arch_noreturns); i++) + if (!strcmp(func->name, arch_noreturns[i])) + return true; + + return false; +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8932f41c387f..bfc0fc81529c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -13,6 +13,7 @@ #include "special.h" #include "warn.h" #include "arch_elf.h" +#include "orc.h" #include #include @@ -21,13 +22,8 @@ #define FAKE_JUMP_OFFSET -1 -struct alternative { - struct list_head list; - struct instruction *insn; - bool skip_orig; -}; - struct cfi_init_state initial_func_cfi; +struct list_head orbit_list; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) @@ -53,8 +49,8 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, return next; } -static struct instruction *next_insn_same_func(struct objtool_file *file, - struct instruction *insn) +struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) { struct instruction *next = list_next_entry(insn, list); struct symbol *func = insn->func; @@ -73,8 +69,8 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, return find_insn(file, func->cfunc->sec, func->cfunc->offset); } -static struct instruction *prev_insn_same_sym(struct objtool_file *file, - struct instruction *insn) +struct instruction *prev_insn_same_sym(struct objtool_file *file, + struct instruction *insn) { struct instruction *prev = list_prev_entry(insn, list); @@ -84,44 +80,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, return NULL; } -#define func_for_each_insn(file, func, insn) \ - for (insn = find_insn(file, func->sec, func->offset); \ - insn; \ - insn = next_insn_same_func(file, insn)) - -#define sym_for_each_insn(file, sym, insn) \ - for (insn = find_insn(file, sym->sec, sym->offset); \ - insn && &insn->list != &file->insn_list && \ - insn->sec == sym->sec && \ - insn->offset < sym->offset + sym->len; \ - insn = list_next_entry(insn, list)) - -#define sym_for_each_insn_continue_reverse(file, sym, insn) \ - for (insn = list_prev_entry(insn, list); \ - &insn->list != &file->insn_list && \ - insn->sec == sym->sec && insn->offset >= sym->offset; \ - insn = list_prev_entry(insn, list)) - -#define sec_for_each_insn_from(file, insn) \ - for (; insn; insn = next_insn_same_sec(file, insn)) - -#define sec_for_each_insn_continue(file, insn) \ - for (insn = next_insn_same_sec(file, insn); insn; \ - insn = next_insn_same_sec(file, insn)) - -static bool is_sibling_call(struct instruction *insn) -{ - /* An indirect jump is either a sibling call or a jump to a table. */ - if (insn->type == INSN_JUMP_DYNAMIC) - return list_empty(&insn->alts); - - if (!is_static_jump(insn)) - return false; - - /* add_jump_destinations() sets insn->call_dest for sibling calls. */ - return !!insn->call_dest; -} - /* * This checks to see if the given function is a "noreturn" function. * @@ -134,42 +92,17 @@ static bool is_sibling_call(struct instruction *insn) static bool __dead_end_function(struct objtool_file *file, struct symbol *func, int recursion) { - int i; struct instruction *insn; bool empty = true; - /* - * Unfortunately these have to be hard coded because the noreturn - * attribute isn't provided in ELF data. - */ - static const char * const global_noreturns[] = { - "__stack_chk_fail", - "panic", - "do_exit", - "do_task_dead", - "__module_put_and_exit", - "complete_and_exit", - "__reiserfs_panic", - "lbug_with_loc", - "fortify_panic", - "usercopy_abort", - "machine_real_restart", - "rewind_stack_do_exit", - "kunit_try_catch_throw", - "xen_start_kernel", - "cpu_bringup_and_idle", - }; - if (!func) return false; if (func->bind == STB_WEAK) return false; - if (func->bind == STB_GLOBAL) - for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) - if (!strcmp(func->name, global_noreturns[i])) - return true; + if (func->bind == STB_GLOBAL && arch_is_noreturn(func)) + return true; if (!func->len) return false; @@ -367,14 +300,27 @@ static int add_dead_ends(struct objtool_file *file) goto reachable; list_for_each_entry(reloc, &sec->reloc_list, list) { +#ifdef __loongarch__ + if (!reloc->next || reloc->type != R_LARCH_ADD32 || + reloc->next->type != R_LARCH_SUB32) { +#else if (reloc->sym->type != STT_SECTION) { +#endif WARN("unexpected relocation symbol type in %s", sec->name); return -1; } +#ifdef __loongarch__ + insn = find_insn(file, reloc->sym->sec, reloc->sym->offset); +#else insn = find_insn(file, reloc->sym->sec, reloc->addend); +#endif if (insn) insn = list_prev_entry(insn, list); +#ifdef __loongarch__ + else if (reloc->sym->offset == reloc->sym->sec->len) { +#else else if (reloc->addend == reloc->sym->sec->len) { +#endif insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find unreachable insn at %s+0x%x", @@ -428,120 +374,49 @@ static int add_dead_ends(struct objtool_file *file) return 0; } -static int create_static_call_sections(struct objtool_file *file) +/* + * Mark not sibling call instructions. + */ +static int add_not_sibling_call(struct objtool_file *file) { - struct section *sec, *reloc_sec; + struct section *sec; struct reloc *reloc; - struct static_call_site *site; struct instruction *insn; - struct symbol *key_sym; - char *key_name, *tmp; - int idx; - - sec = find_section_by_name(file->elf, ".static_call_sites"); - if (sec) { - INIT_LIST_HEAD(&file->static_call_list); - WARN("file already has .static_call_sites section, skipping"); - return 0; - } - - if (list_empty(&file->static_call_list)) - return 0; - idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) - idx++; - - sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, - sizeof(struct static_call_site), idx); + sec = find_section_by_name(file->elf, ".rela.discard.not_sibling_call"); if (!sec) - return -1; - - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) - return -1; - - idx = 0; - list_for_each_entry(insn, &file->static_call_list, static_call_node) { - - site = (struct static_call_site *)sec->data->d_buf + idx; - memset(site, 0, sizeof(struct static_call_site)); - - /* populate reloc for 'addr' */ - reloc = malloc(sizeof(*reloc)); - - if (!reloc) { - perror("malloc"); - return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); - if (!reloc->sym) { - WARN_FUNC("static call tramp: missing containing symbol", - insn->sec, insn->offset); - return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); + return 0; - /* find key symbol */ - key_name = strdup(insn->call_dest->name); - if (!key_name) { - perror("strdup"); - return -1; - } - if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR, - STATIC_CALL_TRAMP_PREFIX_LEN)) { - WARN("static_call: trampoline name malformed: %s", key_name); + list_for_each_entry(reloc, &sec->reloc_list, list) { +#ifdef __loongarch__ + if (!reloc->next || reloc->type != R_LARCH_ADD32 || + reloc->next->type != R_LARCH_SUB32) { +#else + if (reloc->sym->type != STT_SECTION) { +#endif + WARN("unexpected relocation symbol type in %s", sec->name); return -1; } - tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN; - memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN); - key_sym = find_symbol_by_name(file->elf, tmp); - if (!key_sym) { - if (!module) { - WARN("static_call: can't find static_call_key symbol: %s", tmp); - return -1; - } - - /* - * For modules(), the key might not be exported, which - * means the module can make static calls but isn't - * allowed to change them. - * - * In that case we temporarily set the key to be the - * trampoline address. This is fixed up in - * static_call_add_module(). - */ - key_sym = insn->call_dest; - } - free(key_name); +#ifdef __loongarch__ + insn = find_insn(file, reloc->sym->sec, reloc->sym->offset); +#else + insn = find_insn(file, reloc->sym->sec, reloc->addend); +#endif - /* populate reloc for 'key' */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (!insn) { + WARN("unexpected relocation symbol offset at %s: %lx", + reloc->sym->sec->name, reloc->sym->offset); return -1; } - memset(reloc, 0, sizeof(*reloc)); - reloc->sym = key_sym; - reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site) + 4; - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); - - idx++; + insn->not_sibling_call = true; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; + return 0; +} +int __weak arch_create_static_call_sections(struct objtool_file *file) +{ return 0; } @@ -800,6 +675,16 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); +#ifdef __loongarch__ + } else if (!strncmp(reloc->sym->name, ".L", 2)) { + /* '.L' in LA compiler means target name prefix */ + dest_sec = reloc->sym->sec; + dest_off = reloc->sym->offset + reloc->addend; + } else if (reloc->sym->type == STT_NOTYPE && reloc->sym->bind == STB_LOCAL) { + /* In asm file, jump dest maybe a label without '.L' prefix */ + dest_sec = reloc->sym->sec; + dest_off = reloc->sym->offset + reloc->addend; +#endif } else if (reloc->sym->sec->idx) { dest_sec = reloc->sym->sec; dest_off = reloc->sym->sym.st_value + @@ -1227,7 +1112,7 @@ static int add_special_section_alts(struct objtool_file *file) return ret; } -static int add_jump_table(struct objtool_file *file, struct instruction *insn, +int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *table) { struct reloc *reloc = table; @@ -1283,88 +1168,6 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, return 0; } -/* - * find_jump_table() - Given a dynamic jump, find the switch jump table - * associated with it. - */ -static struct reloc *find_jump_table(struct objtool_file *file, - struct symbol *func, - struct instruction *insn) -{ - struct reloc *table_reloc; - struct instruction *dest_insn, *orig_insn = insn; - - /* - * Backward search using the @first_jump_src links, these help avoid - * much of the 'in between' code. Which avoids us getting confused by - * it. - */ - for (; - insn && insn->func && insn->func->pfunc == func; - insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { - - if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) - break; - - /* allow small jumps within the range */ - if (insn->type == INSN_JUMP_UNCONDITIONAL && - insn->jump_dest && - (insn->jump_dest->offset <= insn->offset || - insn->jump_dest->offset > orig_insn->offset)) - break; - - table_reloc = arch_find_switch_table(file, insn); - if (!table_reloc) - continue; - dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); - if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) - continue; - - return table_reloc; - } - - return NULL; -} - -/* - * First pass: Mark the head of each jump table so that in the next pass, - * we know when a given jump table ends and the next one starts. - */ -static void mark_func_jump_tables(struct objtool_file *file, - struct symbol *func) -{ - struct instruction *insn, *last = NULL; - struct reloc *reloc; - - func_for_each_insn(file, func, insn) { - if (!last) - last = insn; - - /* - * Store back-pointers for unconditional forward jumps such - * that find_jump_table() can back-track using those and - * avoid some potentially confusing code. - */ - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && - insn->offset > last->offset && - insn->jump_dest->offset > insn->offset && - !insn->jump_dest->first_jump_src) { - - insn->jump_dest->first_jump_src = insn; - last = insn->jump_dest; - } - - if (insn->type != INSN_JUMP_DYNAMIC) - continue; - - reloc = find_jump_table(file, func, insn); - if (reloc) { - reloc->jump_table_start = true; - insn->jump_table = reloc; - } - } -} - static int add_func_jump_tables(struct objtool_file *file, struct symbol *func) { @@ -1402,7 +1205,7 @@ static int add_jump_table_alts(struct objtool_file *file) if (func->type != STT_FUNC) continue; - mark_func_jump_tables(file, func); + arch_mark_func_jump_tables(file, func); ret = add_func_jump_tables(file, func); if (ret) return ret; @@ -1447,7 +1250,11 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } +#ifdef __loongarch__ + insn = find_insn(file, reloc->sym->sec, reloc->sym->offset); +#else insn = find_insn(file, reloc->sym->sec, reloc->addend); +#endif if (!insn) { WARN("can't find insn for unwind_hints[%d]", i); return -1; @@ -1609,20 +1416,8 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } -static int read_static_call_tramps(struct objtool_file *file) +int __weak arch_read_static_call_tramps(struct objtool_file *file) { - struct section *sec; - struct symbol *func; - - for_each_sec(file, sec) { - list_for_each_entry(func, &sec->symbol_list, list) { - if (func->bind == STB_GLOBAL && - !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR, - strlen(STATIC_CALL_TRAMP_PREFIX_STR))) - func->static_call_tramp = true; - } - } - return 0; } @@ -1666,6 +1461,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = add_not_sibling_call(file); + if (ret) + return ret; + add_ignores(file); add_uaccess_safe(file); @@ -1676,7 +1475,7 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ - ret = read_static_call_tramps(file); + ret = arch_read_static_call_tramps(file); if (ret) return ret; @@ -1761,507 +1560,6 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state return false; } -static bool has_valid_stack_frame(struct insn_state *state) -{ - struct cfi_state *cfi = &state->cfi; - - if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA && - cfi->regs[CFI_BP].offset == -16) - return true; - - if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP) - return true; - - return false; -} - -static int update_cfi_state_regs(struct instruction *insn, - struct cfi_state *cfi, - struct stack_op *op) -{ - struct cfi_reg *cfa = &cfi->cfa; - - if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) - return 0; - - /* push */ - if (op->dest.type == OP_DEST_PUSH || op->dest.type == OP_DEST_PUSHF) - cfa->offset += 8; - - /* pop */ - if (op->src.type == OP_SRC_POP || op->src.type == OP_SRC_POPF) - cfa->offset -= 8; - - /* add immediate to sp */ - if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && - op->dest.reg == CFI_SP && op->src.reg == CFI_SP) - cfa->offset -= op->src.offset; - - return 0; -} - -static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset) -{ - if (arch_callee_saved_reg(reg) && - cfi->regs[reg].base == CFI_UNDEFINED) { - cfi->regs[reg].base = base; - cfi->regs[reg].offset = offset; - } -} - -static void restore_reg(struct cfi_state *cfi, unsigned char reg) -{ - cfi->regs[reg].base = initial_func_cfi.regs[reg].base; - cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset; -} - -/* - * A note about DRAP stack alignment: - * - * GCC has the concept of a DRAP register, which is used to help keep track of - * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP - * register. The typical DRAP pattern is: - * - * 4c 8d 54 24 08 lea 0x8(%rsp),%r10 - * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp - * 41 ff 72 f8 pushq -0x8(%r10) - * 55 push %rbp - * 48 89 e5 mov %rsp,%rbp - * (more pushes) - * 41 52 push %r10 - * ... - * 41 5a pop %r10 - * (more pops) - * 5d pop %rbp - * 49 8d 62 f8 lea -0x8(%r10),%rsp - * c3 retq - * - * There are some variations in the epilogues, like: - * - * 5b pop %rbx - * 41 5a pop %r10 - * 41 5c pop %r12 - * 41 5d pop %r13 - * 41 5e pop %r14 - * c9 leaveq - * 49 8d 62 f8 lea -0x8(%r10),%rsp - * c3 retq - * - * and: - * - * 4c 8b 55 e8 mov -0x18(%rbp),%r10 - * 48 8b 5d e0 mov -0x20(%rbp),%rbx - * 4c 8b 65 f0 mov -0x10(%rbp),%r12 - * 4c 8b 6d f8 mov -0x8(%rbp),%r13 - * c9 leaveq - * 49 8d 62 f8 lea -0x8(%r10),%rsp - * c3 retq - * - * Sometimes r13 is used as the DRAP register, in which case it's saved and - * restored beforehand: - * - * 41 55 push %r13 - * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13 - * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp - * ... - * 49 8d 65 f0 lea -0x10(%r13),%rsp - * 41 5d pop %r13 - * c3 retq - */ -static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, - struct stack_op *op) -{ - struct cfi_reg *cfa = &cfi->cfa; - struct cfi_reg *regs = cfi->regs; - - /* stack operations don't make sense with an undefined CFA */ - if (cfa->base == CFI_UNDEFINED) { - if (insn->func) { - WARN_FUNC("undefined stack state", insn->sec, insn->offset); - return -1; - } - return 0; - } - - if (cfi->type == UNWIND_HINT_TYPE_REGS || - cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL) - return update_cfi_state_regs(insn, cfi, op); - - switch (op->dest.type) { - - case OP_DEST_REG: - switch (op->src.type) { - - case OP_SRC_REG: - if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && - cfa->base == CFI_SP && - regs[CFI_BP].base == CFI_CFA && - regs[CFI_BP].offset == -cfa->offset) { - - /* mov %rsp, %rbp */ - cfa->base = op->dest.reg; - cfi->bp_scratch = false; - } - - else if (op->src.reg == CFI_SP && - op->dest.reg == CFI_BP && cfi->drap) { - - /* drap: mov %rsp, %rbp */ - regs[CFI_BP].base = CFI_BP; - regs[CFI_BP].offset = -cfi->stack_size; - cfi->bp_scratch = false; - } - - else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { - - /* - * mov %rsp, %reg - * - * This is needed for the rare case where GCC - * does: - * - * mov %rsp, %rax - * ... - * mov %rax, %rsp - */ - cfi->vals[op->dest.reg].base = CFI_CFA; - cfi->vals[op->dest.reg].offset = -cfi->stack_size; - } - - else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && - cfa->base == CFI_BP) { - - /* - * mov %rbp, %rsp - * - * Restore the original stack pointer (Clang). - */ - cfi->stack_size = -cfi->regs[CFI_BP].offset; - } - - else if (op->dest.reg == cfa->base) { - - /* mov %reg, %rsp */ - if (cfa->base == CFI_SP && - cfi->vals[op->src.reg].base == CFI_CFA) { - - /* - * This is needed for the rare case - * where GCC does something dumb like: - * - * lea 0x8(%rsp), %rcx - * ... - * mov %rcx, %rsp - */ - cfa->offset = -cfi->vals[op->src.reg].offset; - cfi->stack_size = cfa->offset; - - } else { - cfa->base = CFI_UNDEFINED; - cfa->offset = 0; - } - } - - break; - - case OP_SRC_ADD: - if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { - - /* add imm, %rsp */ - cfi->stack_size -= op->src.offset; - if (cfa->base == CFI_SP) - cfa->offset -= op->src.offset; - break; - } - - if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { - - /* lea disp(%rbp), %rsp */ - cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); - break; - } - - if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { - - /* drap: lea disp(%rsp), %drap */ - cfi->drap_reg = op->dest.reg; - - /* - * lea disp(%rsp), %reg - * - * This is needed for the rare case where GCC - * does something dumb like: - * - * lea 0x8(%rsp), %rcx - * ... - * mov %rcx, %rsp - */ - cfi->vals[op->dest.reg].base = CFI_CFA; - cfi->vals[op->dest.reg].offset = \ - -cfi->stack_size + op->src.offset; - - break; - } - - if (cfi->drap && op->dest.reg == CFI_SP && - op->src.reg == cfi->drap_reg) { - - /* drap: lea disp(%drap), %rsp */ - cfa->base = CFI_SP; - cfa->offset = cfi->stack_size = -op->src.offset; - cfi->drap_reg = CFI_UNDEFINED; - cfi->drap = false; - break; - } - - if (op->dest.reg == cfi->cfa.base) { - WARN_FUNC("unsupported stack register modification", - insn->sec, insn->offset); - return -1; - } - - break; - - case OP_SRC_AND: - if (op->dest.reg != CFI_SP || - (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || - (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { - WARN_FUNC("unsupported stack pointer realignment", - insn->sec, insn->offset); - return -1; - } - - if (cfi->drap_reg != CFI_UNDEFINED) { - /* drap: and imm, %rsp */ - cfa->base = cfi->drap_reg; - cfa->offset = cfi->stack_size = 0; - cfi->drap = true; - } - - /* - * Older versions of GCC (4.8ish) realign the stack - * without DRAP, with a frame pointer. - */ - - break; - - case OP_SRC_POP: - case OP_SRC_POPF: - if (!cfi->drap && op->dest.reg == cfa->base) { - - /* pop %rbp */ - cfa->base = CFI_SP; - } - - if (cfi->drap && cfa->base == CFI_BP_INDIRECT && - op->dest.reg == cfi->drap_reg && - cfi->drap_offset == -cfi->stack_size) { - - /* drap: pop %drap */ - cfa->base = cfi->drap_reg; - cfa->offset = 0; - cfi->drap_offset = -1; - - } else if (regs[op->dest.reg].offset == -cfi->stack_size) { - - /* pop %reg */ - restore_reg(cfi, op->dest.reg); - } - - cfi->stack_size -= 8; - if (cfa->base == CFI_SP) - cfa->offset -= 8; - - break; - - case OP_SRC_REG_INDIRECT: - if (cfi->drap && op->src.reg == CFI_BP && - op->src.offset == cfi->drap_offset) { - - /* drap: mov disp(%rbp), %drap */ - cfa->base = cfi->drap_reg; - cfa->offset = 0; - cfi->drap_offset = -1; - } - - if (cfi->drap && op->src.reg == CFI_BP && - op->src.offset == regs[op->dest.reg].offset) { - - /* drap: mov disp(%rbp), %reg */ - restore_reg(cfi, op->dest.reg); - - } else if (op->src.reg == cfa->base && - op->src.offset == regs[op->dest.reg].offset + cfa->offset) { - - /* mov disp(%rbp), %reg */ - /* mov disp(%rsp), %reg */ - restore_reg(cfi, op->dest.reg); - } - - break; - - default: - WARN_FUNC("unknown stack-related instruction", - insn->sec, insn->offset); - return -1; - } - - break; - - case OP_DEST_PUSH: - case OP_DEST_PUSHF: - cfi->stack_size += 8; - if (cfa->base == CFI_SP) - cfa->offset += 8; - - if (op->src.type != OP_SRC_REG) - break; - - if (cfi->drap) { - if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { - - /* drap: push %drap */ - cfa->base = CFI_BP_INDIRECT; - cfa->offset = -cfi->stack_size; - - /* save drap so we know when to restore it */ - cfi->drap_offset = -cfi->stack_size; - - } else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) { - - /* drap: push %rbp */ - cfi->stack_size = 0; - - } else { - - /* drap: push %reg */ - save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size); - } - - } else { - - /* push %reg */ - save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size); - } - - /* detect when asm code uses rbp as a scratch register */ - if (!no_fp && insn->func && op->src.reg == CFI_BP && - cfa->base != CFI_BP) - cfi->bp_scratch = true; - break; - - case OP_DEST_REG_INDIRECT: - - if (cfi->drap) { - if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { - - /* drap: mov %drap, disp(%rbp) */ - cfa->base = CFI_BP_INDIRECT; - cfa->offset = op->dest.offset; - - /* save drap offset so we know when to restore it */ - cfi->drap_offset = op->dest.offset; - } else { - - /* drap: mov reg, disp(%rbp) */ - save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset); - } - - } else if (op->dest.reg == cfa->base) { - - /* mov reg, disp(%rbp) */ - /* mov reg, disp(%rsp) */ - save_reg(cfi, op->src.reg, CFI_CFA, - op->dest.offset - cfi->cfa.offset); - } - - break; - - case OP_DEST_LEAVE: - if ((!cfi->drap && cfa->base != CFI_BP) || - (cfi->drap && cfa->base != cfi->drap_reg)) { - WARN_FUNC("leave instruction with modified stack frame", - insn->sec, insn->offset); - return -1; - } - - /* leave (mov %rbp, %rsp; pop %rbp) */ - - cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; - restore_reg(cfi, CFI_BP); - - if (!cfi->drap) { - cfa->base = CFI_SP; - cfa->offset -= 8; - } - - break; - - case OP_DEST_MEM: - if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { - WARN_FUNC("unknown stack-related memory operation", - insn->sec, insn->offset); - return -1; - } - - /* pop mem */ - cfi->stack_size -= 8; - if (cfa->base == CFI_SP) - cfa->offset -= 8; - - break; - - default: - WARN_FUNC("unknown stack-related instruction", - insn->sec, insn->offset); - return -1; - } - - return 0; -} - -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) -{ - struct stack_op *op; - - list_for_each_entry(op, &insn->stack_ops, list) { - struct cfi_state old_cfi = state->cfi; - int res; - - res = update_cfi_state(insn, &state->cfi, op); - if (res) - return res; - - if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { - WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); - return -1; - } - - if (op->dest.type == OP_DEST_PUSHF) { - if (!state->uaccess_stack) { - state->uaccess_stack = 1; - } else if (state->uaccess_stack >> 31) { - WARN_FUNC("PUSHF stack exhausted", - insn->sec, insn->offset); - return 1; - } - state->uaccess_stack <<= 1; - state->uaccess_stack |= state->uaccess; - } - - if (op->src.type == OP_SRC_POPF) { - if (state->uaccess_stack) { - state->uaccess = state->uaccess_stack & 1; - state->uaccess_stack >>= 1; - if (state->uaccess_stack == 1) - state->uaccess_stack = 0; - } - } - } - - return 0; -} - static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { struct cfi_state *cfi1 = &insn->cfi; @@ -2375,7 +1673,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state) static int validate_sibling_call(struct instruction *insn, struct insn_state *state) { - if (has_modified_stack_frame(insn, state)) { + if (!insn->not_sibling_call && has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", insn->sec, insn->offset); return 1; @@ -2500,6 +1798,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, insn->visited |= visited; + list_add(&insn->orbit_node, &orbit_list); + + if (insn->type == INSN_JUMP_DYNAMIC && + arch_dynamic_add_jump_table_alts(&orbit_list, file, func, insn)) + return 1; + if (!insn->ignore_alts && !list_empty(&insn->alts)) { bool skip_orig = false; @@ -2513,6 +1817,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, BT_FUNC("(alt)", insn); return ret; } + while (func_last_orbit(&orbit_list) && + func_last_orbit(&orbit_list)->offset != insn->offset) { + list_del(&func_last_orbit(&orbit_list)->orbit_node); + } } if (insn->alt_group) @@ -2522,7 +1830,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (handle_insn_ops(insn, &state)) + if (arch_handle_insn_ops(insn, &state)) return 1; switch (insn->type) { @@ -2532,12 +1840,15 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_CALL: case INSN_CALL_DYNAMIC: + if (insn->type == INSN_CALL_DYNAMIC) + arch_try_find_call(&orbit_list, file, func, insn); + ret = validate_call(insn, &state); if (ret) return ret; if (!no_fp && func && !is_fentry_call(insn) && - !has_valid_stack_frame(&state)) { + !arch_has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); return 1; @@ -2565,6 +1876,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } + while (func_last_orbit(&orbit_list) && + func_last_orbit(&orbit_list)->offset != insn->offset) { + list_del(&func_last_orbit(&orbit_list)->orbit_node); + } + if (insn->type == INSN_JUMP_UNCONDITIONAL) return 0; @@ -2677,6 +1993,9 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) if (ret && backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; + while (!list_empty(&orbit_list)) { + list_del(&func_last_orbit(&orbit_list)->orbit_node); + } } insn = list_next_entry(insn, list); @@ -2824,6 +2143,10 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, ret = validate_branch(file, insn->func, insn, *state); if (ret && backtrace) BT_FUNC("<=== (sym)", insn); + + while (!list_empty(&orbit_list)) + list_del(&func_last_orbit(&orbit_list)->orbit_node); + return ret; } @@ -2902,10 +2225,41 @@ static int validate_reachable_instructions(struct objtool_file *file) return 0; } +bool is_sibling_call(struct instruction *insn) +{ + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return list_empty(&insn->alts); + + if (!is_static_jump(insn)) + return false; + + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ + return !!insn->call_dest; +} + + +void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset) +{ + if (arch_callee_saved_reg(reg) && + cfi->regs[reg].base == CFI_UNDEFINED) { + cfi->regs[reg].base = base; + cfi->regs[reg].offset = offset; + } +} + +void restore_reg(struct cfi_state *cfi, unsigned char reg) +{ + cfi->regs[reg].base = initial_func_cfi.regs[reg].base; + cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset; +} + int check(struct objtool_file *file) { int ret, warnings = 0; + INIT_LIST_HEAD(&orbit_list); + arch_initial_func_cfi_state(&initial_func_cfi); ret = decode_sections(file); @@ -2949,7 +2303,7 @@ int check(struct objtool_file *file) warnings += ret; } - ret = create_static_call_sections(file); + ret = arch_create_static_call_sections(file); if (ret < 0) goto out; warnings += ret; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 2804848e628e..07fafc53b7b8 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -22,6 +22,7 @@ struct insn_state { struct instruction { struct list_head list; struct hlist_node hash; + struct list_head orbit_node; struct list_head static_call_node; struct section *sec; unsigned long offset; @@ -30,7 +31,7 @@ struct instruction { unsigned long immediate; bool dead_end, ignore, ignore_alts; bool hint; - bool retpoline_safe; + bool retpoline_safe, not_sibling_call; s8 instr; u8 visited; u8 ret_offset; @@ -65,8 +66,22 @@ static inline bool is_jump(struct instruction *insn) return is_static_jump(insn) || is_dynamic_jump(insn); } +bool is_sibling_call(struct instruction *insn); +void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset); +void restore_reg(struct cfi_state *cfi, unsigned char reg); + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); +struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn); +struct instruction *prev_insn_same_sym(struct objtool_file *file, + struct instruction *insn); +int add_jump_table(struct objtool_file *file, struct instruction *insn, + struct reloc *table); +bool arch_has_valid_stack_frame(struct insn_state *state); +int arch_handle_insn_ops(struct instruction *insn, struct insn_state *state); +int arch_create_static_call_sections(struct objtool_file *file); +int arch_read_static_call_tramps(struct objtool_file *file); #define for_each_insn(file, insn) \ list_for_each_entry(insn, &file->insn_list, list) @@ -77,4 +92,34 @@ struct instruction *find_insn(struct objtool_file *file, insn->sec == sec; \ insn = list_next_entry(insn, list)) + +#define func_last_orbit(p) \ + (list_first_entry_or_null(p, struct instruction, orbit_node)) + +#define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) + +#define sym_for_each_insn(file, sym, insn) \ + for (insn = find_insn(file, sym->sec, sym->offset); \ + insn && &insn->list != &file->insn_list && \ + insn->sec == sym->sec && \ + insn->offset < sym->offset + sym->len; \ + insn = list_next_entry(insn, list)) + +#define sym_for_each_insn_continue_reverse(file, sym, insn) \ + for (insn = list_prev_entry(insn, list); \ + &insn->list != &file->insn_list && \ + insn->sec == sym->sec && insn->offset >= sym->offset; \ + insn = list_prev_entry(insn, list)) + +#define sec_for_each_insn_from(file, insn) \ + for (; insn; insn = next_insn_same_sec(file, insn)) + +#define sec_for_each_insn_continue(file, insn) \ + for (insn = next_insn_same_sec(file, insn); insn; \ + insn = next_insn_same_sec(file, insn)) + + #endif /* _CHECK_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d8421e1d06be..8b37a9f51e0e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -561,7 +561,7 @@ static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsi static int read_relocs(struct elf *elf) { struct section *sec; - struct reloc *reloc; + struct reloc *reloc, *last_reloc; int i; unsigned int symndx; unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; @@ -579,6 +579,7 @@ static int read_relocs(struct elf *elf) } sec->base->reloc = sec; + last_reloc = NULL; nr_reloc = 0; for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) { @@ -609,6 +610,14 @@ static int read_relocs(struct elf *elf) return -1; } + if (last_reloc && reloc->offset == last_reloc->offset) { + last_reloc->next = reloc; + last_reloc = reloc; + continue; + } + + last_reloc = reloc; + elf_add_reloc(elf, reloc); nr_reloc++; } @@ -892,7 +901,7 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr) static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) { - struct reloc *reloc; + struct reloc *reloc, *p; int idx = 0, size; GElf_Rela *relocs; @@ -911,10 +920,12 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { - relocs[idx].r_offset = reloc->offset; - relocs[idx].r_addend = reloc->addend; - relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); - idx++; + for (p = reloc; p; p = p->next) { + relocs[idx].r_offset = p->offset; + relocs[idx].r_addend = p->addend; + relocs[idx].r_info = GELF_R_INFO(p->sym ? p->sym->idx : 0, p->type); + idx++; + } } return 0; @@ -922,7 +933,7 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { - struct reloc *reloc; + struct reloc *reloc, *p; int nr; sec->changed = true; @@ -930,7 +941,8 @@ int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) nr = 0; list_for_each_entry(reloc, &sec->reloc_list, list) - nr++; + for (p = reloc; p; p = p->next) + nr++; switch (sec->sh.sh_type) { case SHT_REL: return elf_rebuild_rel_reloc_section(sec, nr); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index e6890cc70a25..800ee9572d42 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -62,6 +62,7 @@ struct symbol { struct reloc { struct list_head list; struct hlist_node hash; + struct reloc *next; union { GElf_Rela rela; GElf_Rel rel; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 9df0cd86d310..fdd7468148c9 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -24,6 +24,7 @@ #include "builtin.h" #include "objtool.h" #include "warn.h" +#include "check.h" struct cmd_struct { const char *name; diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 4125d4578b23..c0eecf3e61de 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -9,8 +9,10 @@ #include #include #include +#include #include "elf.h" +#include "cfi.h" #define __weak __attribute__((weak)) diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h new file mode 100644 index 000000000000..9194371a42f7 --- /dev/null +++ b/tools/objtool/orc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "elf.h" +#include "check.h" + +int arch_create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec, + unsigned int idx, struct section *insn_sec, + unsigned long insn_off, struct orc_entry *o); +int arch_create_orc_entry_empty(struct elf *elf, struct section *u_sec, + struct section *ip_relasec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off); +void arch_print_reg(struct orc_entry orc); diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 5e6a95368d35..b593ea08f116 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -8,58 +8,7 @@ #include #include "objtool.h" #include "warn.h" - -static const char *reg_name(unsigned int reg) -{ - switch (reg) { - case ORC_REG_PREV_SP: - return "prevsp"; - case ORC_REG_DX: - return "dx"; - case ORC_REG_DI: - return "di"; - case ORC_REG_BP: - return "bp"; - case ORC_REG_SP: - return "sp"; - case ORC_REG_R10: - return "r10"; - case ORC_REG_R13: - return "r13"; - case ORC_REG_BP_INDIRECT: - return "bp(ind)"; - case ORC_REG_SP_INDIRECT: - return "sp(ind)"; - default: - return "?"; - } -} - -static const char *orc_type_name(unsigned int type) -{ - switch (type) { - case UNWIND_HINT_TYPE_CALL: - return "call"; - case UNWIND_HINT_TYPE_REGS: - return "regs"; - case UNWIND_HINT_TYPE_REGS_PARTIAL: - return "regs (partial)"; - default: - return "?"; - } -} - -static void print_reg(unsigned int reg, int offset) -{ - if (reg == ORC_REG_BP_INDIRECT) - printf("(bp%+d)", offset); - else if (reg == ORC_REG_SP_INDIRECT) - printf("(sp%+d)", offset); - else if (reg == ORC_REG_UNDEFINED) - printf("(und)"); - else - printf("%s%+d", reg_name(reg), offset); -} +#include "orc.h" int orc_dump(const char *_objname) { @@ -153,7 +102,12 @@ int orc_dump(const char *_objname) nr_entries = orc_size / sizeof(*orc); for (i = 0; i < nr_entries; i++) { if (rela_orc_ip) { +#ifdef __loongarch__ +#define COUNT_ORC_PER_IP 8 + if (!gelf_getrela(rela_orc_ip, i * COUNT_ORC_PER_IP, &rela)) { +#else if (!gelf_getrela(rela_orc_ip, i, &rela)) { +#endif WARN_ELF("gelf_getrela"); return -1; } @@ -194,17 +148,7 @@ int orc_dump(const char *_objname) printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i])); } - - printf(" sp:"); - - print_reg(orc[i].sp_reg, orc[i].sp_offset); - - printf(" bp:"); - - print_reg(orc[i].bp_reg, orc[i].bp_offset); - - printf(" type:%s end:%d\n", - orc_type_name(orc[i].type), orc[i].end); + arch_print_reg(orc[i]); } elf_end(elf); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 9ce68b385a1b..181ae3c2a623 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -11,115 +11,7 @@ #include "check.h" #include "warn.h" - -int create_orc(struct objtool_file *file) -{ - struct instruction *insn; - - for_each_insn(file, insn) { - struct orc_entry *orc = &insn->orc; - struct cfi_reg *cfa = &insn->cfi.cfa; - struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; - - if (!insn->sec->text) - continue; - - orc->end = insn->cfi.end; - - if (cfa->base == CFI_UNDEFINED) { - orc->sp_reg = ORC_REG_UNDEFINED; - continue; - } - - switch (cfa->base) { - case CFI_SP: - orc->sp_reg = ORC_REG_SP; - break; - case CFI_SP_INDIRECT: - orc->sp_reg = ORC_REG_SP_INDIRECT; - break; - case CFI_BP: - orc->sp_reg = ORC_REG_BP; - break; - case CFI_BP_INDIRECT: - orc->sp_reg = ORC_REG_BP_INDIRECT; - break; - case CFI_R10: - orc->sp_reg = ORC_REG_R10; - break; - case CFI_R13: - orc->sp_reg = ORC_REG_R13; - break; - case CFI_DI: - orc->sp_reg = ORC_REG_DI; - break; - case CFI_DX: - orc->sp_reg = ORC_REG_DX; - break; - default: - WARN_FUNC("unknown CFA base reg %d", - insn->sec, insn->offset, cfa->base); - return -1; - } - - switch(bp->base) { - case CFI_UNDEFINED: - orc->bp_reg = ORC_REG_UNDEFINED; - break; - case CFI_CFA: - orc->bp_reg = ORC_REG_PREV_SP; - break; - case CFI_BP: - orc->bp_reg = ORC_REG_BP; - break; - default: - WARN_FUNC("unknown BP base reg %d", - insn->sec, insn->offset, bp->base); - return -1; - } - - orc->sp_offset = cfa->offset; - orc->bp_offset = bp->offset; - orc->type = insn->cfi.type; - } - - return 0; -} - -static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, - unsigned int idx, struct section *insn_sec, - unsigned long insn_off, struct orc_entry *o) -{ - struct orc_entry *orc; - struct reloc *reloc; - - /* populate ORC data */ - orc = (struct orc_entry *)u_sec->data->d_buf + idx; - memcpy(orc, o, sizeof(*orc)); - - /* populate reloc for ip */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); - return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx", - insn_off); - return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(int); - reloc->sec = ip_relocsec; - - elf_add_reloc(elf, reloc); - - return 0; -} +#include "orc.h" int create_orc_sections(struct objtool_file *file) { @@ -127,12 +19,6 @@ int create_orc_sections(struct objtool_file *file) struct section *sec, *u_sec, *ip_relocsec; unsigned int idx; - struct orc_entry empty = { - .sp_reg = ORC_REG_UNDEFINED, - .bp_reg = ORC_REG_UNDEFINED, - .type = UNWIND_HINT_TYPE_CALL, - }; - sec = find_section_by_name(file->elf, ".orc_unwind"); if (sec) { WARN("file already has .orc_unwind section, skipping"); @@ -187,7 +73,7 @@ int create_orc_sections(struct objtool_file *file) if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, sizeof(struct orc_entry))) { - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, + if (arch_create_orc_entry(file->elf, u_sec, ip_relocsec, idx, insn->sec, insn->offset, &insn->orc)) return -1; @@ -199,10 +85,9 @@ int create_orc_sections(struct objtool_file *file) /* section terminator */ if (prev_insn) { - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, + if (arch_create_orc_entry_empty(file->elf, u_sec, ip_relocsec, idx, prev_insn->sec, - prev_insn->offset + prev_insn->len, - &empty)) + prev_insn->offset + prev_insn->len)) return -1; idx++; diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 1a2420febd08..5372122c639b 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -87,15 +87,21 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); return -1; } - if (orig_reloc->sym->type != STT_SECTION) { + if (orig_reloc->sym->type == STT_SECTION) { + alt->orig_sec = orig_reloc->sym->sec; + alt->orig_off = orig_reloc->addend; +#ifdef __loongarch__ + } else if (!strncmp(orig_reloc->sym->name, ".L", 2) || + !strncmp(orig_reloc->sym->name, ".ex", 3)) { + alt->orig_sec = orig_reloc->sym->sec; + alt->orig_off = orig_reloc->sym->offset; +#endif + } else { WARN_FUNC("don't know how to handle non-section reloc symbol %s", sec, offset + entry->orig, orig_reloc->sym->name); return -1; } - alt->orig_sec = orig_reloc->sym->sec; - alt->orig_off = orig_reloc->addend; - if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); if (!new_reloc) { @@ -105,7 +111,12 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, } alt->new_sec = new_reloc->sym->sec; - alt->new_off = (unsigned int)new_reloc->addend; +#ifdef __loongarch__ + if (!strncmp(orig_reloc->sym->name, ".L", 2)) + alt->new_off = new_reloc->sym->offset; + else +#endif + alt->new_off = (unsigned int)new_reloc->addend; /* _ASM_EXTABLE_EX hack */ if (alt->new_off >= 0x7ffffff0) diff --git a/tools/objtool/special.h b/tools/objtool/special.h index abddf38ef334..223da33f9743 100644 --- a/tools/objtool/special.h +++ b/tools/objtool/special.h @@ -12,6 +12,8 @@ #define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +struct instruction; + struct special_alt { struct list_head list; @@ -29,6 +31,12 @@ struct special_alt { unsigned int orig_len, new_len; /* group only */ }; +struct alternative { + struct list_head list; + struct instruction *insn; + bool skip_orig; +}; + int special_get_alts(struct elf *elf, struct list_head *alts); void arch_handle_alternative(unsigned short feature, struct special_alt *alt); @@ -36,6 +44,11 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt); bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, struct reloc *reloc); -struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn); +void arch_mark_func_jump_tables(struct objtool_file *file, + struct symbol *func); + +int arch_dynamic_add_jump_table_alts(struct list_head *p_orbit_list, struct objtool_file *file, + struct symbol *func, struct instruction *insn); + +bool arch_is_noreturn(struct symbol *func); #endif /* _SPECIAL_H */ -- Gitee From 86b3531d090e3ad2ad1aa9d60468a30c8713770c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 23 Oct 2021 16:48:08 +0800 Subject: [PATCH 39/59] LoongArch: Use TLB for ioremap() Change-Id: Ifd58d9eabef7d13b9cee35e61dbc095b52cff38e Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 7 +++ arch/loongarch/include/asm/fixmap.h | 16 +++++ arch/loongarch/include/asm/io.h | 27 ++++----- arch/loongarch/kernel/setup.c | 2 +- arch/loongarch/mm/init.c | 64 ++++++++++++++++++++ arch/loongarch/mm/ioremap.c | 93 ++++++++++++++++++++++++++++- arch/loongarch/pci/acpi.c | 76 +++++++++++++++++++++-- 7 files changed, 262 insertions(+), 23 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 7502fa5d3e16..a5f2b13463d3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -95,6 +95,7 @@ config LOONGARCH select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP + select IOREMAP_WITH_TLB select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_REL if MODULES @@ -241,6 +242,9 @@ config 64BIT endchoice +config FIX_EARLYCON_MEM + def_bool y + config PAGE_SIZE_4KB bool @@ -384,6 +388,9 @@ config CPU_SUPPORTS_LSX config CPU_SUPPORTS_LASX bool +config IOREMAP_WITH_TLB + bool + config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index cd810ee0d251..7bcc0f515ab3 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -13,7 +13,23 @@ #define _ASM_FIXMAP_H #include +#include #define NR_FIX_BTMAPS 64 +enum fixed_addresses { + FIX_HOLE, + FIX_EARLYCON_MEM_BASE, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXMAP_PAGE_IO PAGE_KERNEL_SUC + +extern void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#include + #endif diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index a8a3b935c35b..fd98eb38b35a 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -46,14 +46,8 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap #define early_memunmap early_iounmap -static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, - unsigned long prot_val) -{ - if (prot_val == _CACHE_CC) - return (void __iomem *)(unsigned long)(CAC_BASE + offset); - else - return (void __iomem *)(unsigned long)(UNCAC_BASE + offset); -} +#define ioremap_prot ioremap_prot +extern void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, unsigned long prot); /* * ioremap - map bus memory into CPU space @@ -66,8 +60,8 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * address is not guaranteed to be usable directly as a virtual * address. */ -#define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_SUC) +#define ioremap ioremap +extern void __iomem *ioremap(phys_addr_t phys_addr, size_t size); /* * ioremap_wc - map bus memory into CPU space @@ -87,8 +81,8 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * CPU CCA doesn't support WUC, the method shall fall-back to the * _CACHE_SUC option (see cpu_probe() method). */ -#define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), _CACHE_WUC) +#define ioremap_wc ioremap_wc +extern void __iomem *ioremap_wc(phys_addr_t phys_addr, size_t size); /* * ioremap_cache - map bus memory into CPU space @@ -105,12 +99,11 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * the CPU. Also enables full write-combining. Useful for some * memory-like regions on I/O busses. */ -#define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _CACHE_CC) +#define ioremap_cache ioremap_cache +extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); -static inline void iounmap(const volatile void __iomem *addr) -{ -} +#define iounmap iounmap +extern void iounmap(const volatile void __iomem *addr); #define mmiowb() asm volatile ("dbar 0" ::: "memory") diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index b585dd369f52..8c591db24885 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -605,11 +605,11 @@ void __init setup_arch(char **cmdline_p) cpu_probe(); early_init(); + pagetable_init(); bootcmdline_init(cmdline_p); init_initrd(); platform_init(); - pagetable_init(); arch_mem_init(cmdline_p); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 43116dd4af63..b83fb1b1c2d9 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -368,6 +368,70 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif +static pte_t *fixmap_pte(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + p4d = p4d_offset(pgd, addr); + + if (pgd_none(*pgd)) { + pud_t *new; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); +#endif + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); +#endif + } + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } + + return pte_offset_kernel(pmd, addr); +} + +void __init __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + if (!pte_none(*ptep)) { + pte_ERROR(*ptep); + return; + } + + if (pgprot_val(flags)) + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } +} + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index a2cb118238ac..8f582d522ca5 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -7,7 +7,9 @@ * published by the Free Software Foundation. */ -#include +#include +#include +#include void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) { @@ -29,3 +31,92 @@ void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, { return early_memremap(phys_addr, size); } + +#ifdef CONFIG_IOREMAP_WITH_TLB +static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, + pgprot_t prot, void *caller) +{ + unsigned long last_addr; + unsigned long offset = phys_addr & ~PAGE_MASK; + int err; + unsigned long addr; + struct vm_struct *area; + + /* + * Page align the mapping address and size, taking account of any + * offset. + */ + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(size + offset); + + /* + * Don't allow wraparound, zero size or outside PHYS_MASK. + */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = phys_addr; + + err = ioremap_page_range(addr, addr + size, phys_addr, prot); + if (err) { + vunmap((void *)addr); + return NULL; + } + + return (void __iomem *)(offset + addr); +} +#else +static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, + pgprot_t prot, void *caller) +{ + if (pgprot_val(prot) & _CACHE_CC) + return (void __iomem *)(unsigned long)(CAC_BASE + phys_addr); + else + return (void __iomem *)(unsigned long)(UNCAC_BASE + phys_addr); +} +#endif + +void __iomem *ioremap(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL_SUC, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_wc(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL_WUC, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) +{ + return __ioremap_caller(phys_addr, size, PAGE_KERNEL, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, unsigned long prot) +{ + return __ioremap_caller(phys_addr, size, __pgprot(prot), __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *io_addr) +{ +#ifdef CONFIG_IOREMAP_WITH_TLB + unsigned long addr = (unsigned long)io_addr & PAGE_MASK; + + /* + * We could get an address outside vmalloc range in case + * of ioremap_cache() reusing a RAM mapping. + */ + if (is_vmalloc_addr((void *)addr)) + vunmap((void *)addr); +#endif +} + +EXPORT_SYMBOL(iounmap); diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index c390c069e2b7..8f7cc5b20c86 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -228,6 +228,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) return 0; } +/* + * Create a PCI config space window + * - reserve mem region + * - alloc struct pci_config_window with space for all mappings + * - ioremap the config space + */ +struct pci_config_window *arch_pci_ecam_create(struct device *dev, + struct resource *cfgres, struct resource *busr, struct pci_ecam_ops *ops) +{ + int i, err; + unsigned int bus_range, bsz; + struct resource *conflict; + struct pci_config_window *cfg; + + if (busr->start > busr->end) + return ERR_PTR(-EINVAL); + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return ERR_PTR(-ENOMEM); + + cfg->parent = dev; + cfg->ops = ops; + cfg->busr.start = busr->start; + cfg->busr.end = busr->end; + cfg->busr.flags = IORESOURCE_BUS; + bus_range = resource_size(cfgres) >> ops->bus_shift; + + bsz = 1 << ops->bus_shift; + + cfg->res.start = cfgres->start; + cfg->res.end = cfgres->end; + cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + cfg->res.name = "PCI ECAM"; + + conflict = request_resource_conflict(&iomem_resource, &cfg->res); + if (conflict) { + err = -EBUSY; + dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n", + &cfg->res, conflict->name, conflict); + goto err_exit; + } + + cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); + if (!cfg->win) + goto err_exit_iomap; + + if (ops->init) { + err = ops->init(cfg); + if (err) + goto err_exit; + } + dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr); + return cfg; + +err_exit_iomap: + err = -ENOMEM; + dev_err(dev, "ECAM ioremap failed\n"); +err_exit: + pci_ecam_free(cfg); + return ERR_PTR(err); +} + /* * Lookup the bus range for the domain in MCFG, and set up config space * mapping. @@ -252,11 +315,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) bus_shift = ecam_ops->bus_shift ? : 20; - cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); - cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; - cfgres.flags = IORESOURCE_MEM; + if (bus_shift == 20) + cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + else { + cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.end |= BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16); + cfgres.flags = IORESOURCE_MEM; + cfg = arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + } - cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); if (IS_ERR(cfg)) { dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); return NULL; -- Gitee From ddc6470bd08e9e6c6db4223ceffc496c65f80908 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 3 Jun 2021 18:25:38 +0800 Subject: [PATCH 40/59] LoongArch: Add alternative runtime patching support Change-Id: I8960b2e38f08bce031e095138a9c6b2c3239d3b2 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/alternative-asm.h | 82 ++++++ arch/loongarch/include/asm/alternative.h | 175 +++++++++++ arch/loongarch/include/asm/bugs.h | 15 + arch/loongarch/include/asm/inst.h | 8 + arch/loongarch/kernel/Makefile | 3 +- arch/loongarch/kernel/alternative.c | 295 +++++++++++++++++++ arch/loongarch/kernel/module.c | 16 + arch/loongarch/kernel/setup.c | 7 + arch/loongarch/kernel/vmlinux.lds.S | 12 + arch/loongarch/lib/clear_user.S | 68 ++++- arch/loongarch/lib/copy_user.S | 89 +++++- arch/loongarch/lib/memcpy.S | 71 ++++- arch/loongarch/lib/memmove.S | 98 +++++- arch/loongarch/lib/memset.S | 71 ++++- 14 files changed, 981 insertions(+), 29 deletions(-) create mode 100644 arch/loongarch/include/asm/alternative-asm.h create mode 100644 arch/loongarch/include/asm/alternative.h create mode 100644 arch/loongarch/include/asm/bugs.h create mode 100644 arch/loongarch/kernel/alternative.c diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h new file mode 100644 index 000000000000..6a77f6b7cc22 --- /dev/null +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .2byte \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h new file mode 100644 index 000000000000..54ba7d7ded62 --- /dev/null +++ b/arch/loongarch/include/asm/alternative.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x03400000\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .2byte " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input)) + +/* + * This is similar to alternative_input. But it has two features and + * respective instructions. + * + * If CPU has feature2, newinstr2 is used. + * Otherwise, if CPU has feature1, newinstr1 is used. + * Otherwise, oldinstr is used. + */ +#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ + feature2, input...) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ + : : "i" (0), ## input)) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input)) + +/* Like alternative_io, but for replacing a direct call with another one. */ +#define alternative_call(oldfunc, newfunc, feature, output, input...) \ + (asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)) + +/* + * Like alternative_call, but there are two features and respective functions. + * If CPU has feature2, function2 is used. + * Otherwise, if CPU has feature1, function1 is used. + * Otherwise, old function is used. + */ +#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ + output, input...) \ + (asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ + "call %P[new2]", feature2) \ + : output, ASM_CALL_CONSTRAINT \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ + [new2] "i" (newfunc2), ## input)) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a...) a + +/* + * use this macro if you need clobbers but no inputs in + * alternative_{input,io,call}() + */ +#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h new file mode 100644 index 000000000000..651fffe1f743 --- /dev/null +++ b/arch/loongarch/include/asm/bugs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BUGS_H +#define _ASM_BUGS_H + +#include +#include + +extern void check_bugs(void); + +#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index d72072584989..463a1edd86e2 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -72,6 +72,10 @@ #define I_FMA_FFMT_SFT 0 #define LOONGARCHInst_FMA_FFMT(x) (LOONGARCHInst(x) & 0x00000003) +#define Inst_UncondBranchSIMM(x) \ + ((int)((((LOONGARCHInst(x) & 0x3ff) | ((LOONGARCHInst(x) & 0x200) ? 0xfffffc00 : 0)) << 16) \ + | ((LOONGARCHInst(x) & 0x3fffc00) >> 10))) + typedef unsigned int loongarch_instruction; /* Recode table from 16-bit register notation to 32-bit GPR. Do NOT export!!! */ @@ -252,6 +256,10 @@ do { \ #define StoreW(addr, value, res) _StoreW(addr, value, res) #define StoreDW(addr, value, res) _StoreDW(addr, value, res) +#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction) + +#define INSN_NOP 0x03400000 + static inline bool is_branch_insn(union loongarch_instruction insn) { return insn.reg1i21_format.opcode >= beqz_op && diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 936dc2012c9f..7b60dc859813 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -8,7 +8,8 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ process.o ptrace.o reset.o setup.o signal.o io.o \ syscall.o time.o topology.o traps.o unaligned.o \ - cmdline.o switch.o cacheinfo.o cmpxchg.o vdso.o + cmdline.o switch.o cacheinfo.o cmpxchg.o vdso.o \ + alternative.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c new file mode 100644 index 000000000000..7c6593a1d1e7 --- /dev/null +++ b/arch/loongarch/kernel/alternative.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + +#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE) + +static int __initdata_or_module debug_alternative; + +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_WORDS(buf, count, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int _j; \ + union loongarch_instruction *_buf = buf; \ + \ + if (!(count)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (_j = 0; _j < count - 1; _j++) \ + printk(KERN_CONT "<%08x> ", _buf[_j].word); \ + printk(KERN_CONT "<%08x>\n", _buf[_j].word); \ + } \ +} while (0) + +#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) +#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) +#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) +#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) +#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) + +static inline unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX26(h << 16 | l) << 2); +} + +static inline unsigned long bs_dest_21(unsigned long now, unsigned int h, + unsigned int l) +{ + return now + (SIGNEX21(h << 16 | l) << 2); +} + +static inline unsigned long bs_dest_16(unsigned long now, unsigned int si) +{ + return now + (SIGNEX16(si) << 2); +} + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(union loongarch_instruction *insn, + int count) +{ + while (count--) { + insn->word = INSN_NOP; + insn++; + } +} + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +void *text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr); + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, + void *start, void *end) +{ + unsigned int si, si_l, si_h; + unsigned long cur_pc, jump_addr, pc; + long offset; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_l = src->reg0i26_format.simmediate_l; + si_h = src->reg0i26_format.simmediate_h; + switch (src->reg0i26_format.opcode) { + case b_op: + case bl_op: + jump_addr = bs_dest_26(cur_pc, si_h, si_l); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + offset >>= 2; + buf->reg0i26_format.simmediate_h = offset >> 16; + buf->reg0i26_format.simmediate_l = offset; + return; + } + + si_l = src->reg1i21_format.simmediate_l; + si_h = src->reg1i21_format.simmediate_h; + switch (src->reg1i21_format.opcode) { + case beqz_op: + case bnez_op: + case bceqz_op: + jump_addr = bs_dest_21(cur_pc, si_h, si_l); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_4M || offset >= SZ_4M); + offset >>= 2; + buf->reg1i21_format.simmediate_h = offset >> 16; + buf->reg1i21_format.simmediate_l = offset; + return; + } + + si = src->reg2i16_format.simmediate; + switch (src->reg2i16_format.opcode) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + jump_addr = bs_dest_16(cur_pc, si); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128K || offset >= SZ_128K); + offset >>= 2; + buf->reg2i16_format.simmediate = offset; + return; + } +} + +static int __init_or_module copy_alt_insns(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i].word = src[i].word; + + if (is_branch_insn(src[i]) && + src[i].reg2i16_format.opcode != jirl_op) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } else if (is_pc_insn(src[i])) { + pr_err("Not support pcrel instruction at present!"); + return -EINVAL; + } + } + + return 0; +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + * + * Marked "noinline" to cause control flow change and thus insn cache + * to refetch changed I$ lines. + */ +noinline void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) +{ + struct alt_instr *a; + union loongarch_instruction *instr, *replacement; + union loongarch_instruction insnbuf[MAX_PATCH_SIZE]; + unsigned int nr_instr, nr_repl, nr_insnbuf; + + DPRINTK("alt table %px, -> %px", start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->repl_offset + a->repl_offset; + + BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen & 0x3); + BUG_ON(a->replacementlen & 0x3); + + nr_instr = a->instrlen / LOONGARCH_INSN_SIZE; + nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE; + + if (!cpu_has(a->feature)) { + DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + continue; + } + + DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr); + DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement); + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr); + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +void __init alternative_instructions(void) +{ + /* + * Don't stop machine check exceptions while patching. + * MCEs only happen when something got corrupted and in this + * case we must do something about the corruption. + * Ignoring it is worse than a unlikely patching race. + * Also machine checks tend to be broadcast and if one CPU + * goes into machine check the others follow quickly, so we don't + * expect a machine check to cause undue problems during to code + * patching. + */ + + apply_alternatives(__alt_instructions, __alt_instructions_end); + + alternatives_patched = 1; +} + +/** + * text_poke_early - Update instructions on a live kernel at boot time + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy + * + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +void *__init_or_module text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + wmb(); + + for (i = 0; i < nr; i++) + insn[i].word = buf[i].word; + + wmb(); + local_irq_restore(flags); + flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr)); + + return insn; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index f7bd627ec629..5e702fab4d0f 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -17,6 +17,7 @@ #include #include #include +#include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { @@ -657,3 +658,18 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (!strcmp(".altinstructions", secstrs + s->sh_name)) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + } + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 8c591db24885..1d0c1627e488 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -19,7 +19,9 @@ #include #include +#include #include +#include #include #include #include @@ -67,6 +69,11 @@ static struct resource bss_resource = { .name = "Kernel bss", }; unsigned long __kaslr_offset __ro_after_init; EXPORT_SYMBOL(__kaslr_offset); +void __init check_bugs(void) +{ + alternative_instructions(); +} + static void *detect_magic __initdata = detect_memory_region; void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max) diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 8148cb7d7431..d5adef1f7742 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -39,6 +39,18 @@ SECTIONS } :text = 0 _etext = .; /* End of text section */ + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + EXCEPTION_TABLE(16) _sdata = .; /* Start of data section */ diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 97b9998360cc..7d676463f17e 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,8 +3,10 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include #include #include +#include #include #include @@ -20,13 +22,23 @@ .previous .endm +SYM_FUNC_START(__clear_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __clear_user_generic", \ + "b __clear_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) + /* - * unsigned long __clear_user(void *addr, size_t size) + * unsigned long __clear_user_generic(void *addr, size_t size) * * a0: addr * a1: size */ -SYM_FUNC_START(__clear_user) +SYM_FUNC_START(__clear_user_generic) beqz a1, 2f 1: st.b zero, a0, 0 @@ -38,6 +50,54 @@ SYM_FUNC_START(__clear_user) jr ra fixup_ex 1, 3, 0, 1 -SYM_FUNC_END(__clear_user) +SYM_FUNC_END(__clear_user_generic) -EXPORT_SYMBOL(__clear_user) +/* + * unsigned long __clear_user_fast(void *addr, unsigned long size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user_fast) + beqz a1, 10f + + ori a2, zero, 64 + blt a1, a2, 9f + + /* set 64 bytes at a time */ +1: st.d zero, a0, 0 +2: st.d zero, a0, 8 +3: st.d zero, a0, 16 +4: st.d zero, a0, 24 +5: st.d zero, a0, 32 +6: st.d zero, a0, 40 +7: st.d zero, a0, 48 +8: st.d zero, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, -64 + bge a1, a2, 1b + + beqz a1, 10f + + /* set the remaining bytes */ +9: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 9b + + /* return */ +10: move v0, a1 + jr ra + + /* fixup and ex_table */ + fixup_ex 1, 11, 0, 1 + fixup_ex 2, 12, -8, 1 + fixup_ex 3, 13, -16, 1 + fixup_ex 4, 14, -24, 1 + fixup_ex 5, 15, -32, 1 + fixup_ex 6, 16, -40, 1 + fixup_ex 7, 17, -48, 1 + fixup_ex 8, 18, -56, 1 + fixup_ex 9, 11, 0, 0 +SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index b814efd9cbd2..5d667150f649 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,8 +3,10 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include #include #include +#include #include #include @@ -20,14 +22,24 @@ .previous .endm +SYM_FUNC_START(__copy_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __copy_user_generic", \ + "b __copy_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) + /* - * unsigned long __copy_user(void *to, const void *from, size_t n) + * unsigned long __copy_user_generic(void *to, const void *from, size_t n) * * a0: to * a1: from * a2: n */ -SYM_FUNC_START(__copy_user) +SYM_FUNC_START(__copy_user_generic) beqz a2, 3f 1: ld.b t0, a1, 0 @@ -42,6 +54,75 @@ SYM_FUNC_START(__copy_user) fixup_ex 1, 4, 0, 1 fixup_ex 2, 4, 0, 0 -SYM_FUNC_END(__copy_user) +SYM_FUNC_END(__copy_user_generic) -EXPORT_SYMBOL(__copy_user) +/* + * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user_fast) + beqz a2, 19f + + ori a3, zero, 64 + blt a2, a3, 17f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 +2: ld.d t1, a1, 8 +3: ld.d t2, a1, 16 +4: ld.d t3, a1, 24 +5: ld.d t4, a1, 32 +6: ld.d t5, a1, 40 +7: ld.d t6, a1, 48 +8: ld.d t7, a1, 56 +9: st.d t0, a0, 0 +10: st.d t1, a0, 8 +11: st.d t2, a0, 16 +12: st.d t3, a0, 24 +13: st.d t4, a0, 32 +14: st.d t5, a0, 40 +15: st.d t6, a0, 48 +16: st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a3, 1b + + beqz a2, 19f + + /* copy the remaining bytes */ +17: ld.b t0, a1, 0 +18: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 17b + + /* return */ +19: move v0, a2 + jr ra + + /* fixup and ex_table */ + fixup_ex 1, 20, 0, 1 + fixup_ex 2, 21, -8, 1 + fixup_ex 3, 22, -16, 1 + fixup_ex 4, 23, -24, 1 + fixup_ex 5, 24, -32, 1 + fixup_ex 6, 25, -40, 1 + fixup_ex 7, 26, -48, 1 + fixup_ex 8, 27, -56, 1 + fixup_ex 9, 20, 0, 0 + fixup_ex 10, 21, -8, 0 + fixup_ex 11, 22, -16, 0 + fixup_ex 12, 23, -24, 0 + fixup_ex 13, 24, -32, 0 + fixup_ex 14, 25, -40, 0 + fixup_ex 15, 26, -48, 0 + fixup_ex 16, 27, -56, 0 + fixup_ex 17, 20, 0, 0 + fixup_ex 18, 20, 0, 0 +SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index e94899d99624..586ab4fc7b07 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -3,18 +3,31 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +SYM_FUNC_START(memcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memcpy_generic", \ + "b __memcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) + /* - * void *memcpy(void *dst, const void *src, size_t n) + * void *__memcpy_generic(void *dst, const void *src, size_t n) * * a0: dst * a1: src * a2: n */ -SYM_FUNC_START(memcpy) +SYM_FUNC_START(__memcpy_generic) move a3, a0 beqz a2, 2f @@ -27,6 +40,56 @@ SYM_FUNC_START(memcpy) 2: move v0, a3 jr ra -SYM_FUNC_END(memcpy) +SYM_FUNC_END(__memcpy_generic) -EXPORT_SYMBOL(memcpy) +/* + * void *__memcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + st.d t0, a0, 0 + st.d t1, a0, 8 + st.d t2, a0, 16 + st.d t3, a0, 24 + st.d t4, a0, 32 + st.d t5, a0, 40 + st.d t6, a0, 48 + st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move v0, a3 + jr ra +SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index f55d8dcd09dd..5ae764586db3 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -3,18 +3,51 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 3f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + + /* if (src - dst) < 64, copy 1 byte at a time */ +1: ori a3, zero, 64 + sub.d t0, a1, a0 + blt t0, a3, 2f + b memcpy +2: b __memcpy_generic + + /* if (dst - src) < 64, copy 1 byte at a time */ +3: ori a3, zero, 64 + sub.d t0, a0, a1 + blt t0, a3, 4f + b rmemcpy +4: b __rmemcpy_generic +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) + +SYM_FUNC_START(rmemcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __rmemcpy_generic", \ + "b __rmemcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(rmemcpy) + /* - * void *rmemcpy(void *dst, const void *src, size_t n) + * void *__rmemcpy_generic(void *dst, const void *src, size_t n) * * a0: dst * a1: src * a2: n */ -SYM_FUNC_START(rmemcpy) +SYM_FUNC_START(__rmemcpy_generic) move a3, a0 beqz a2, 2f @@ -30,16 +63,59 @@ SYM_FUNC_START(rmemcpy) 2: move v0, a3 jr ra -SYM_FUNC_END(rmemcpy) +SYM_FUNC_END(__rmemcpy_generic) -SYM_FUNC_START(memmove) - blt a0, a1, 1f /* dst < src, memcpy */ - blt a1, a0, 2f /* src < dst, rmemcpy */ - jr ra /* dst == src, return */ +/* + * void *__rmemcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_fast) + move a3, a0 + beqz a2, 3f -1: b memcpy + add.d a0, a0, a2 + add.d a1, a1, a2 -2: b rmemcpy -SYM_FUNC_END(memmove) + ori a4, zero, 64 + blt a2, a4, 2f -EXPORT_SYMBOL(memmove) + /* copy 64 bytes at a time */ +1: ld.d t0, a1, -8 + ld.d t1, a1, -16 + ld.d t2, a1, -24 + ld.d t3, a1, -32 + ld.d t4, a1, -40 + ld.d t5, a1, -48 + ld.d t6, a1, -56 + ld.d t7, a1, -64 + st.d t0, a0, -8 + st.d t1, a0, -16 + st.d t2, a0, -24 + st.d t3, a0, -32 + st.d t4, a0, -40 + st.d t5, a0, -48 + st.d t6, a0, -56 + st.d t7, a0, -64 + + addi.d a0, a0, -64 + addi.d a1, a1, -64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move v0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 009d8ec7fc60..9fcf0d002ab9 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -1,20 +1,39 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2020-2021 Loongson Technology Co., Ltd. + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +.macro fill_to_64 r0 + bstrins.d \r0, \r0, 15, 8 + bstrins.d \r0, \r0, 31, 16 + bstrins.d \r0, \r0, 63, 32 +.endm + +SYM_FUNC_START(memset) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memset_generic", \ + "b __memset_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) + /* - * void *memset(void *s, int c, size_t n) + * void *__memset_generic(void *s, int c, size_t n) * * a0: s * a1: c * a2: n */ -SYM_FUNC_START(memset) +SYM_FUNC_START(__memset_generic) move a3, a0 beqz a2, 2f @@ -25,6 +44,48 @@ SYM_FUNC_START(memset) 2: move v0, a3 jr ra -SYM_FUNC_END(memset) +SYM_FUNC_END(__memset_generic) -EXPORT_SYMBOL(memset) +/* + * void *__memset_fast(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* fill a1 to 64 bits */ + fill_to_64 a1 + + /* set 64 bytes at a time */ +1: st.d a1, a0, 0 + st.d a1, a0, 8 + st.d a1, a0, 16 + st.d a1, a0, 24 + st.d a1, a0, 32 + st.d a1, a0, 40 + st.d a1, a0, 48 + st.d a1, a0, 56 + + addi.d a0, a0, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* set the remaining bytes */ +2: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move v0, a3 + jr ra +SYM_FUNC_END(__memset_fast) -- Gitee From 2c929ef5db8ac3bfe8b610546fd6c327d40691e0 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 13 Oct 2021 08:29:43 +0800 Subject: [PATCH 41/59] LoongArch: BPF: Add eBPF JIT support Change-Id: I54314fc77e2c3174e9b0c984d0b4223bd29e17e5 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 1 + arch/loongarch/Kconfig | 1 + arch/loongarch/configs/loongson3_defconfig | 1 + arch/loongarch/include/asm/perf_event.h | 4 +- .../include/uapi/asm/bpf_perf_event.h | 9 + arch/loongarch/net/Makefile | 7 + arch/loongarch/net/ebpf_jit.c | 978 ++++++++++++++++++ arch/loongarch/net/ebpf_jit.h | 842 +++++++++++++++ .../loongarch/include/uapi/asm/bitsperlong.h | 9 + tools/include/uapi/asm/bitsperlong.h | 2 + tools/testing/selftests/seccomp/seccomp_bpf.c | 6 + 11 files changed, 1859 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/uapi/asm/bpf_perf_event.h create mode 100644 arch/loongarch/net/Makefile create mode 100644 arch/loongarch/net/ebpf_jit.c create mode 100644 arch/loongarch/net/ebpf_jit.h create mode 100644 tools/arch/loongarch/include/uapi/asm/bitsperlong.h diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index e997ba117bd9..4fd77e6cccd0 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -18,4 +18,5 @@ obj- := $(platform-) obj-y += kernel/ obj-y += mm/ +obj-y += net/ obj-y += vdso/ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a5f2b13463d3..fbb20a6e8eae 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -76,6 +76,7 @@ config LOONGARCH select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EBPF_JIT if 64BIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FUTEX_CMPXCHG if FUTEX diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 1c91dd4193a9..e925e30f74d1 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -290,6 +290,7 @@ CONFIG_NET_ACT_BPF=m CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_BPF_JIT=y CONFIG_BT=m CONFIG_BT_HCIBTUSB=m # CONFIG_BT_HCIBTUSB_BCM is not set diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 0b57817eb6dc..a9caaf15738d 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -12,5 +12,7 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ -/* Leave it empty here. The file is required by linux/perf_event.h */ + +#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs + #endif /* __LOONGARCH_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..eb6e2fd2a1f0 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile new file mode 100644 index 000000000000..42fa082caa93 --- /dev/null +++ b/arch/loongarch/net/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for arch/loongarch/net +# +# Copyright (C) 2021 Loongson Technology Corporation Limited +# +obj-$(CONFIG_BPF_JIT) += ebpf_jit.o diff --git a/arch/loongarch/net/ebpf_jit.c b/arch/loongarch/net/ebpf_jit.c new file mode 100644 index 000000000000..f228c8a238a2 --- /dev/null +++ b/arch/loongarch/net/ebpf_jit.c @@ -0,0 +1,978 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ +#include "ebpf_jit.h" + +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 2) +#define REG_TCC (MAX_BPF_JIT_REG + 3) +#define TCC_SAVED (MAX_BPF_JIT_REG + 4) + +#define SAVE_RA BIT(0) +#define SAVE_TCC BIT(1) + +static const int regmap[] = { + /* return value from in-kernel function, and exit value for eBPF program */ + [BPF_REG_0] = LOONGARCH_GPR_A5, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = LOONGARCH_GPR_A0, + [BPF_REG_2] = LOONGARCH_GPR_A1, + [BPF_REG_3] = LOONGARCH_GPR_A2, + [BPF_REG_4] = LOONGARCH_GPR_A3, + [BPF_REG_5] = LOONGARCH_GPR_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = LOONGARCH_GPR_S0, + [BPF_REG_7] = LOONGARCH_GPR_S1, + [BPF_REG_8] = LOONGARCH_GPR_S2, + [BPF_REG_9] = LOONGARCH_GPR_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = LOONGARCH_GPR_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = LOONGARCH_GPR_T0, + /* temporary register for internal BPF JIT */ + [TMP_REG_1] = LOONGARCH_GPR_T1, + [TMP_REG_2] = LOONGARCH_GPR_T2, + [TMP_REG_3] = LOONGARCH_GPR_T3, + /* tail call */ + [REG_TCC] = LOONGARCH_GPR_A6, + /* store A6 in S5 if program do calls */ + [TCC_SAVED] = LOONGARCH_GPR_S5, +}; + +static void mark_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_RA; +} + +static void mark_tail_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_TCC; +} + +static bool seen_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_RA); +} + +static bool seen_tail_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_TCC); +} + +static u8 tail_call_reg(struct jit_ctx *ctx) +{ + if (seen_call(ctx)) + return regmap[TCC_SAVED]; + + return regmap[REG_TCC]; +} + +/* + * eBPF prog stack layout: + * + * high + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP + * | $ra | + * +-------------------------+ + * | $fp | + * +-------------------------+ + * | $s0 | + * +-------------------------+ + * | $s1 | + * +-------------------------+ + * | $s2 | + * +-------------------------+ + * | $s3 | + * +-------------------------+ + * | $s4 | + * +-------------------------+ + * | $s5 | + * +-------------------------+ <--BPF_REG_FP + * | prog->aux->stack_depth | + * | (optional) | + * current $sp -------------> +-------------------------+ + * low + */ +static void build_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + + stack_adjust += sizeof(long); /* LOONGARCH_GPR_RA */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_FP */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S0 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S1 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S2 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S3 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S4 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S5 */ + + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in REG_TCC from the caller. + */ + emit_insn(ctx, addid, regmap[REG_TCC], LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); + + store_offset = stack_adjust - sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); + + if (bpf_stack_adjust) + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); + + /* + * Program contains calls and tail calls, so REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, regmap[TCC_SAVED], regmap[REG_TCC]); + + ctx->stack_size = stack_adjust; +} + +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) +{ + int stack_adjust = ctx->stack_size; + int load_offset; + + load_offset = stack_adjust - sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); + + if (!is_tail_call) { + /* Set return value */ + move_reg(ctx, LOONGARCH_GPR_V0, regmap[BPF_REG_0]); + /* Return to the caller */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + } else { + /* + * Call the next bpf prog and skip the first instruction + * of TCC initialization. + */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, regmap[TMP_REG_3], 1); + } +} + +void build_epilogue(struct jit_ctx *ctx) +{ + __build_epilogue(ctx, false); +} + +/* initialized on the first pass of build_body() */ +static int out_offset = -1; +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + int off; + u8 tcc = tail_call_reg(ctx); + u8 a1 = LOONGARCH_GPR_A1; + u8 a2 = LOONGARCH_GPR_A2; + u8 tmp1 = regmap[TMP_REG_1]; + u8 tmp2 = regmap[TMP_REG_2]; + u8 tmp3 = regmap[TMP_REG_3]; + const int idx0 = ctx->idx; + +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + + /* + * a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_insn(ctx, ldwu, tmp1, a1, off); + /* bgeu $a2, $t1, jmp_offset */ + emit_tailcall_jump(ctx, BPF_JGE, a2, tmp1, jmp_offset); + + /* + * if (TCC-- < 0) + * goto out; + */ + emit_insn(ctx, addid, tmp1, tcc, -1); + emit_tailcall_jump(ctx, BPF_JSLT, tcc, LOONGARCH_GPR_ZERO, jmp_offset); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit_insn(ctx, sllid, tmp2, a2, 3); + emit_insn(ctx, addd, tmp2, tmp2, a1); + off = offsetof(struct bpf_array, ptrs); + emit_insn(ctx, ldd, tmp2, tmp2, off); + /* beq $t2, $zero, jmp_offset */ + emit_tailcall_jump(ctx, BPF_JEQ, tmp2, LOONGARCH_GPR_ZERO, jmp_offset); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_insn(ctx, ldd, tmp3, tmp2, off); + move_reg(ctx, tcc, tmp1); + __build_epilogue(ctx, true); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + + return 0; +#undef cur_offset +#undef jmp_offset +} + +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) +{ + bool is32 = (BPF_CLASS(insn->code) == BPF_ALU); + const u8 code = insn->code; + const u8 cond = BPF_OP(code); + const u8 dst = regmap[insn->dst_reg]; + const u8 src = regmap[insn->src_reg]; + const u8 tmp = regmap[TMP_REG_1]; + const u8 tmp2 = regmap[TMP_REG_2]; + const s16 off = insn->off; + const s32 imm = insn->imm; + int i = insn - ctx->prog->insnsi; + int jmp_offset; + bool func_addr_fixed; + u64 func_addr; + u64 imm64; + int ret; + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + move_imm32(ctx, dst, imm, is32); + break; + + /* dst = dst + src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit_insn(ctx, addd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst + imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_signed_imm12(imm)) { + emit_insn(ctx, addid, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, addd, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - src */ + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_insn(ctx, subd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst - imm */ + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_signed_imm12(-imm)) { + emit_insn(ctx, addid, dst, dst, -imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, subd, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * src */ + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_insn(ctx, muld, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst * imm */ + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, muld, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_insn(ctx, divdu, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst / imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, divdu, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % src */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_insn(ctx, moddu, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst % imm */ + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, moddu, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit_insn(ctx, and, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst & imm */ + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, andi, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, and, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | src */ + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit_insn(ctx, or, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst | imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, or, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ src */ + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit_insn(ctx, xor, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst ^ imm */ + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, xori, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, xor, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst << src (logical) */ + case BPF_ALU | BPF_LSH | BPF_X: + emit_insn(ctx, sllw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_insn(ctx, slld, dst, dst, src); + break; + /* dst = dst << imm (logical) */ + case BPF_ALU | BPF_LSH | BPF_K: + emit_insn(ctx, slliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: + emit_insn(ctx, sllid, dst, dst, imm); + break; + + /* dst = dst >> src (logical) */ + case BPF_ALU | BPF_RSH | BPF_X: + emit_insn(ctx, srlw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: + emit_insn(ctx, srld, dst, dst, src); + break; + /* dst = dst >> imm (logical) */ + case BPF_ALU | BPF_RSH | BPF_K: + emit_insn(ctx, srliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: + emit_insn(ctx, srlid, dst, dst, imm); + break; + + /* dst = dst >> src (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_X: + emit_insn(ctx, sraw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_insn(ctx, srad, dst, dst, src); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_K: + emit_insn(ctx, sraiw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_insn(ctx, sraid, dst, dst, imm); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, sllid, dst, dst, 48); + emit_insn(ctx, srlid, dst, dst, 48); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + /* do nothing */ + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + emit_insn(ctx, revb2h, dst, dst); + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, sllid, dst, dst, 48); + emit_insn(ctx, srlid, dst, dst, 48); + break; + case 32: + emit_insn(ctx, revb2w, dst, dst); + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + emit_insn(ctx, revbd, dst, dst); + break; + } + break; + + /* PC += off if dst cond src */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_cond_jump(ctx, cond, dst, src, jmp_offset); + break; + + /* PC += off if dst cond imm */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm32(ctx, tmp, imm, is32); + emit_cond_jump(ctx, cond, dst, tmp, jmp_offset); + break; + + /* PC += off if dst & src */ + case BPF_JMP | BPF_JSET | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_insn(ctx, and, tmp, dst, src); + emit_cond_jump(ctx, cond, tmp, LOONGARCH_GPR_ZERO, jmp_offset); + break; + /* PC += off if dst & imm */ + case BPF_JMP | BPF_JSET | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, and, tmp, dst, tmp); + emit_cond_jump(ctx, cond, tmp, LOONGARCH_GPR_ZERO, jmp_offset); + break; + + /* PC += off */ + case BPF_JMP | BPF_JA: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_uncond_jump(ctx, jmp_offset, false); + break; + + /* function call */ + case BPF_JMP | BPF_CALL: + mark_call(ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + move_imm64(ctx, tmp, func_addr, is32); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, tmp, 0); + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_V0); + break; + + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + mark_tail_call(ctx); + if (emit_bpf_tail_call(ctx)) + return -EINVAL; + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + emit_sext_32(ctx, regmap[BPF_REG_0]); + /* + * Optimization: when last instruction is EXIT, + * simply fallthrough to epilogue. + */ + if (i == ctx->prog->len - 1) + break; + + jmp_offset = epilogue_offset(ctx); + emit_uncond_jump(ctx, jmp_offset, true); + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + move_imm64(ctx, dst, imm64, is32); + return 1; + + /* dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, ldbu, dst, src, off); + break; + case BPF_H: + emit_insn(ctx, ldhu, dst, src, off); + break; + case BPF_W: + emit_insn(ctx, ldwu, dst, src, off); + break; + case BPF_DW: + emit_insn(ctx, ldd, dst, src, off); + break; + } + } else { + move_imm32(ctx, tmp, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, ldxbu, dst, src, tmp); + break; + case BPF_H: + emit_insn(ctx, ldxhu, dst, src, tmp); + break; + case BPF_W: + emit_insn(ctx, ldxwu, dst, src, tmp); + break; + case BPF_DW: + emit_insn(ctx, ldxd, dst, src, tmp); + break; + } + } + break; + + /* *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + move_imm32(ctx, tmp, imm, is32); + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stb, tmp, dst, off); + break; + case BPF_H: + emit_insn(ctx, sth, tmp, dst, off); + break; + case BPF_W: + emit_insn(ctx, stw, tmp, dst, off); + break; + case BPF_DW: + emit_insn(ctx, std, tmp, dst, off); + break; + } + } else { + move_imm32(ctx, tmp2, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stxb, tmp, dst, tmp2); + break; + case BPF_H: + emit_insn(ctx, stxh, tmp, dst, tmp2); + break; + case BPF_W: + emit_insn(ctx, stxw, tmp, dst, tmp2); + break; + case BPF_DW: + emit_insn(ctx, stxd, tmp, dst, tmp2); + break; + } + } + break; + + /* *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stb, src, dst, off); + break; + case BPF_H: + emit_insn(ctx, sth, src, dst, off); + break; + case BPF_W: + emit_insn(ctx, stw, src, dst, off); + break; + case BPF_DW: + emit_insn(ctx, std, src, dst, off); + break; + } + } else { + move_imm32(ctx, tmp, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stxb, src, dst, tmp); + break; + case BPF_H: + emit_insn(ctx, stxh, src, dst, tmp); + break; + case BPF_W: + emit_insn(ctx, stxw, src, dst, tmp); + break; + case BPF_DW: + emit_insn(ctx, stxd, src, dst, tmp); + break; + } + } + break; + + /* atomic_add: lock *(size *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_W: + case BPF_STX | BPF_XADD | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err_once("unknown atomic op code %02x\n", insn->imm); + return -EINVAL; + } + + move_imm32(ctx, tmp, off, is32); + emit_insn(ctx, addd, tmp, dst, tmp); + switch (BPF_SIZE(insn->code)) { + case BPF_W: + emit_insn(ctx, amaddw, tmp2, src, tmp); + break; + case BPF_DW: + emit_insn(ctx, amaddd, tmp2, src, tmp); + break; + } + break; + + default: + pr_err("bpf_jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +static int build_body(struct jit_ctx *ctx, bool extra_pass) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + if (!ctx->image) + ctx->offset[i] = ctx->idx; + + ret = build_insn(insn, ctx, extra_pass); + if (ret > 0) { + i++; + if (!ctx->image) + ctx->offset[i] = ctx->idx; + continue; + } + if (ret) + return ret; + } + + if (!ctx->image) + ctx->offset[i] = ctx->idx; + + return 0; +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +/* Fill space with illegal instructions */ +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *ptr; + + /* We are guaranteed to have aligned memory */ + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) + *ptr++ = INSN_BREAK; +} + +static int validate_code(struct jit_ctx *ctx) +{ + int i; + union loongarch_instruction insn; + + for (i = 0; i < ctx->idx; i++) { + insn = ctx->image[i]; + /* Check INSN_BREAK */ + if (insn.word == INSN_BREAK) + return -1; + } + + return 0; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header; + struct jit_data *jit_data; + struct jit_ctx ctx; + bool tmp_blinded = false; + bool extra_pass = false; + int image_size; + u8 *image_ptr; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + image_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.prog = prog; + + ctx.offset = kcalloc(prog->len + 1, sizeof(*ctx.offset), GFP_KERNEL); + if (!ctx.offset) { + prog = orig_prog; + goto out_off; + } + + /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ + if (build_body(&ctx, extra_pass)) { + prog = orig_prog; + goto out_off; + } + build_prologue(&ctx); + ctx.epilogue_offset = ctx.idx; + build_epilogue(&ctx); + + /* Now we know the actual image size. + * As each LoongArch instruction is of length 32bit, + * we are translating number of JITed intructions into + * the size required to store these JITed code. + */ + image_size = sizeof(u32) * ctx.idx; + /* Now we know the size of the structure to make */ + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (!header) { + prog = orig_prog; + goto out_off; + } + + /* 2. Now, the actual pass to generate final JIT code */ + ctx.image = (union loongarch_instruction *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + /* And we're done */ + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.image); + + /* Update the icache */ + bpf_flush_icache(header, ctx.image + ctx.idx); + + if (!prog->is_func || extra_pass) { + if (extra_pass && ctx.idx != jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d != %d\n", + ctx.idx, jit_data->ctx.idx); + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + goto out_off; + } + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } + prog->bpf_func = (void *)ctx.image; + prog->jited = 1; + prog->jited_len = image_size; + + if (!prog->is_func || extra_pass) { +out_off: + kfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + + out_offset = -1; + return prog; +} diff --git a/arch/loongarch/net/ebpf_jit.h b/arch/loongarch/net/ebpf_jit.h new file mode 100644 index 000000000000..54e5266f83a2 --- /dev/null +++ b/arch/loongarch/net/ebpf_jit.h @@ -0,0 +1,842 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +struct jit_ctx { + const struct bpf_prog *prog; + unsigned int idx; + unsigned int flags; + unsigned int epilogue_offset; + u32 *offset; + union loongarch_instruction *image; + u32 stack_size; +}; + +struct jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + +#define emit_insn(ctx, func, ...) \ +do { \ + if (ctx->image != NULL) { \ + union loongarch_instruction *insn = &ctx->image[ctx->idx]; \ + emit_##func(insn, ##__VA_ARGS__); \ + } \ + ctx->idx++; \ +} while (0) + +static inline bool is_unsigned_imm(unsigned long val, unsigned int bit) +{ + return val < (1UL << bit); +} + +static inline bool is_signed_imm(long val, unsigned int bit) +{ + return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); +} + +#define is_signed_imm12(val) is_signed_imm(val, 12) +#define is_signed_imm16(val) is_signed_imm(val, 16) +#define is_signed_imm26(val) is_signed_imm(val, 26) +#define is_signed_imm32(val) is_signed_imm(val, 32) +#define is_signed_imm52(val) is_signed_imm(val, 52) +#define is_unsigned_imm12(val) is_unsigned_imm(val, 12) +#define is_unsigned_imm32(val) is_unsigned_imm(val, 32) + +static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx) +{ + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas la64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1)); +} + +static inline int epilogue_offset(const struct jit_ctx *ctx) +{ + int to = ctx->epilogue_offset; + int from = ctx->idx; + + return (to - from); +} + +static inline void emit_ldbu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldbu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldhu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldhu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldwu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldwu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldd(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldd_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_stb(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = stb_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_sth(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = sth_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_stw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = stw_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_std(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = std_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldxbu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxbu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxhu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxhu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxwu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxwu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxb(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxb_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxh(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxh_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_amaddw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rk, enum loongarch_gpr rj) +{ + insn->reg3_format.opcode = amaddw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rk = rk; + insn->reg3_format.rj = rj; +} + +static inline void emit_amaddd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rk, enum loongarch_gpr rj) +{ + insn->reg3_format.opcode = amaddd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rk = rk; + insn->reg3_format.rj = rj; +} + +static inline void emit_addd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = addd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_addiw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = addiw_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_addid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = addid_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_subd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = subd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_muld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = muld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_divdu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = divdu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_moddu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = moddu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_and(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = and_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_andi(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2i12_format.opcode = andi_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_or(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = or_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ori(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2i12_format.opcode = ori_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_xor(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = xor_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_xori(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2i12_format.opcode = xori_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_lu12iw(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = lu12iw_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_lu32id(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = lu32id_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_lu52id(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = lu52id_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_sllw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = sllw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_slliw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = slliw_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_slld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = slld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sllid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = sllid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_srlw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srlw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_srliw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = srliw_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_srld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_srlid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = srlid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_sraw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = sraw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sraiw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = sraid_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_srad(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srad_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sraid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = sraid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_beq(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = beq_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bne(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bne_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_blt(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = blt_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bge(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bge_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bltu(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bltu_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bgeu(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bgeu_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_b(union loongarch_instruction *insn, int offset) +{ + unsigned int simmediate_l, simmediate_h; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn->reg0i26_format.opcode = b_op; + insn->reg0i26_format.simmediate_l = simmediate_l; + insn->reg0i26_format.simmediate_h = simmediate_h; +} + +static inline void emit_jirl(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int offset) +{ + insn->reg2i16_format.opcode = jirl_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rd = rd; + insn->reg2i16_format.rj = rj; +} + +static inline void emit_pcaddu18i(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = pcaddu18i_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_revb2h(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revb2h_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +static inline void emit_revb2w(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revb2w_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +static inline void emit_revbd(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revbd_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +/* Zero-extend 32 bits into 64 bits */ +static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + /* Clear the upper 32 bits */ + emit_insn(ctx, lu32id, reg, 0); +} + +/* Signed-extend 32 bits into 64 bits */ +static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg) +{ + emit_insn(ctx, addiw, reg, reg, 0); +} + +static inline void move_imm32(struct jit_ctx *ctx, enum loongarch_gpr rd, + int imm32, bool is32) +{ + int si20; + u32 ui12; + + /* or rd, $zero, $zero */ + if (imm32 == 0) { + emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO); + return; + } + + /* addiw rd, $zero, imm_11_0(signed) */ + if (is_signed_imm12(imm32)) { + emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm32); + goto zext; + } + + /* ori rd, $zero, imm_11_0(unsigned) */ + if (is_unsigned_imm12(imm32)) { + emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm32); + goto zext; + } + + /* lu12iw rd, imm_31_12(signed) */ + si20 = (imm32 >> 12) & 0xfffff; + emit_insn(ctx, lu12iw, rd, si20); + + /* ori rd, rd, imm_11_0(unsigned) */ + ui12 = imm32 & 0xfff; + if (ui12 != 0) + emit_insn(ctx, ori, rd, rd, ui12); + +zext: + emit_zext_32(ctx, rd, is32); +} + +static inline void move_imm64(struct jit_ctx *ctx, enum loongarch_gpr rd, + long imm64, bool is32) +{ + int imm32, si20, si12; + long imm52; + + si12 = (imm64 >> 52) & 0xfff; + imm52 = imm64 & 0xfffffffffffff; + /* lu52id rd, $zero, imm_63_52(signed) */ + if (si12 != 0 && imm52 == 0) { + emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, si12); + return; + } + + imm32 = imm64 & 0xffffffff; + move_imm32(ctx, rd, imm32, is32); + + if (!is_signed_imm32(imm64)) { + if (imm52 != 0) { + /* lu32id rd, imm_51_32(signed) */ + si20 = (imm64 >> 32) & 0xfffff; + emit_insn(ctx, lu32id, rd, si20); + } + + /* lu52id rd, rd, imm_63_52(signed) */ + if (!is_signed_imm52(imm64)) + emit_insn(ctx, lu52id, rd, rd, si12); + } +} + +static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd, + enum loongarch_gpr rj) +{ + emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO); +} + +static inline int invert_jump_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JNE: + case BPF_JSET: + return BPF_JEQ; + case BPF_JGT: + return BPF_JLE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLT: + return BPF_JGE; + case BPF_JLE: + return BPF_JGT; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline void cond_jump_offs16(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + switch (cond) { + case BPF_JEQ: + /* PC += jmp_offset if rj == rd */ + emit_insn(ctx, beq, rj, rd, jmp_offset); + return; + case BPF_JNE: + case BPF_JSET: + /* PC += jmp_offset if rj != rd */ + emit_insn(ctx, bne, rj, rd, jmp_offset); + return; + case BPF_JGT: + /* PC += jmp_offset if rj > rd (unsigned) */ + emit_insn(ctx, bltu, rd, rj, jmp_offset); + return; + case BPF_JLT: + /* PC += jmp_offset if rj < rd (unsigned) */ + emit_insn(ctx, bltu, rj, rd, jmp_offset); + return; + case BPF_JGE: + /* PC += jmp_offset if rj >= rd (unsigned) */ + emit_insn(ctx, bgeu, rj, rd, jmp_offset); + return; + case BPF_JLE: + /* PC += jmp_offset if rj <= rd (unsigned) */ + emit_insn(ctx, bgeu, rd, rj, jmp_offset); + return; + case BPF_JSGT: + /* PC += jmp_offset if rj > rd (signed) */ + emit_insn(ctx, blt, rd, rj, jmp_offset); + return; + case BPF_JSLT: + /* PC += jmp_offset if rj < rd (signed) */ + emit_insn(ctx, blt, rj, rd, jmp_offset); + return; + case BPF_JSGE: + /* PC += jmp_offset if rj >= rd (signed) */ + emit_insn(ctx, bge, rj, rd, jmp_offset); + return; + case BPF_JSLE: + /* PC += jmp_offset if rj <= rd (signed) */ + emit_insn(ctx, bge, rd, rj, jmp_offset); + return; + } +} + +static inline void cond_jump_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + cond = invert_jump_cond(cond); + cond_jump_offs16(ctx, cond, rj, rd, 2); + emit_insn(ctx, b, jmp_offset); +} + +static inline void cond_jump_offs32(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + s64 upper, lower; + + upper = (jmp_offset + (1 << 15)) >> 16; + lower = jmp_offset & 0xffff; + + cond = invert_jump_cond(cond); + cond_jump_offs16(ctx, cond, rj, rd, 3); + + /* + * jmp_addr = jmp_offset << 2 + * tmp2 = PC + jmp_addr[31, 18] + 18'b0 + */ + emit_insn(ctx, pcaddu18i, LOONGARCH_GPR_T2, upper << 2); + + /* jump to (tmp2 + jmp_addr[17, 2] + 2'b0) */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T2, lower + 1); +} + +static inline void uncond_jump_offs26(struct jit_ctx *ctx, int jmp_offset) +{ + emit_insn(ctx, b, jmp_offset); +} + +static inline void uncond_jump_offs32(struct jit_ctx *ctx, int jmp_offset, bool is_exit) +{ + s64 upper, lower; + + upper = (jmp_offset + (1 << 15)) >> 16; + lower = jmp_offset & 0xffff; + + if (is_exit) + lower -= 1; + + /* + * jmp_addr = jmp_offset << 2; + * tmp1 = PC + jmp_addr[31, 18] + 18'b0 + */ + emit_insn(ctx, pcaddu18i, LOONGARCH_GPR_T1, upper << 2); + + /* jump to (tmp1 + jmp_addr[17, 2] + 2'b0) */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, lower + 1); +} + +static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) + cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); + else if (is_signed_imm26(jmp_offset)) + cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); + else + cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); +} + +static inline void emit_uncond_jump(struct jit_ctx *ctx, int jmp_offset, bool is_exit) +{ + if (is_signed_imm26(jmp_offset)) + uncond_jump_offs26(ctx, jmp_offset); + else + uncond_jump_offs32(ctx, jmp_offset, is_exit); +} + +static inline void emit_tailcall_jump(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) + cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); + else if (is_signed_imm26(jmp_offset)) + cond_jump_offs26(ctx, cond, rj, rd, jmp_offset - 1); + else + cond_jump_offs32(ctx, cond, rj, rd, jmp_offset - 2); +} diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..d4e32b3d4843 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_LOONGARCH_BITSPERLONG_H +#define __ASM_LOONGARCH_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) + +#include + +#endif /* __ASM_LOONGARCH_BITSPERLONG_H */ diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index e8fc1fdfde2a..e21d0185d89c 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -19,6 +19,8 @@ #include "../../../arch/alpha/include/uapi/asm/bitsperlong.h" #elif defined(__sw_64__) #include "../../../arch/sw_64/include/uapi/asm/bitsperlong.h" +#elif defined(__loongarch__) +#include "../../../arch/loongarch/include/uapi/asm/bitsperlong.h" #else #include #endif diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index dc21dc49b426..4644c25c83a8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -125,6 +125,8 @@ struct seccomp_data { # define __NR_seccomp 277 # elif defined(__csky__) # define __NR_seccomp 277 +# elif defined(__loongarch__) +# define __NR_seccomp 277 # elif defined(__hppa__) # define __NR_seccomp 338 # elif defined(__powerpc__) @@ -1730,6 +1732,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) NT_ARM_SYSTEM_CALL, &__v)); \ } while (0) # define SYSCALL_RET(_regs) (_regs).regs[0] +#elif defined(__loongarch__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[11] +# define SYSCALL_RET(_regs) (_regs).regs[4] #elif defined(__riscv) && __riscv_xlen == 64 # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).a7 -- Gitee From 165e9c03259ac899ccd24c1796007ba3ebf5a6a9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 3 Jun 2021 14:52:25 +0800 Subject: [PATCH 42/59] LoongArch: Add STACKPROTECTOR and STACKVALIDATOR support Change-Id: Ief64bb0e21848587953a7e27809ab38c00e52922 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ arch/loongarch/boot/compressed/decompress.c | 7 ++++ arch/loongarch/include/asm/stackprotector.h | 39 +++++++++++++++++++++ arch/loongarch/kernel/asm-offsets.c | 3 ++ arch/loongarch/kernel/process.c | 6 ++++ arch/loongarch/kernel/switch.S | 5 +++ include/linux/compiler.h | 15 ++++++++ 7 files changed, 77 insertions(+) create mode 100644 arch/loongarch/include/asm/stackprotector.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index fbb20a6e8eae..87b7247e94d7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -93,6 +93,8 @@ config LOONGARCH select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_STACKPROTECTOR + select HAVE_STACK_VALIDATION select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c index e3076445d2f4..2dce80144374 100644 --- a/arch/loongarch/boot/compressed/decompress.c +++ b/arch/loongarch/boot/compressed/decompress.c @@ -71,6 +71,13 @@ void error(char *x) #include "../../../../lib/decompress_unzstd.c" #endif +const unsigned long __stack_chk_guard = 0x000a0dff; + +void __stack_chk_fail(void) +{ + error("stack-protector: Kernel stack is corrupted\n"); +} + void decompress_kernel(unsigned long boot_heap_start) { unsigned long zimage_start, zimage_size; diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h new file mode 100644 index 000000000000..c51813a181fb --- /dev/null +++ b/arch/loongarch/include/asm/stackprotector.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on LoongArch. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 95f353c67089..85e29a96a7b4 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -69,6 +69,9 @@ void output_task_defines(void) OFFSET(TASK_FLAGS, task_struct, flags); OFFSET(TASK_MM, task_struct, mm); OFFSET(TASK_PID, task_struct, pid); +#if defined(CONFIG_STACKPROTECTOR) + OFFSET(TASK_STACK_CANARY, task_struct, stack_canary); +#endif DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); BLANK(); } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 61a37573019c..566909d8d6f3 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -181,6 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +#ifdef CONFIG_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + bool in_task_stack(unsigned long stack, struct task_struct *task, struct stack_info *info) { diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index df1c46b6cbe3..db5b3959c664 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -29,6 +29,11 @@ SYM_FUNC_START(__switch_to) stptr.d ra, a0, THREAD_REG01 stptr.d a3, a0, THREAD_SCHED_RA stptr.d a4, a0, THREAD_SCHED_CFA +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) + la t7, __stack_chk_guard + LONG_L t8, a1, TASK_STACK_CANARY + LONG_S t8, t7, 0 +#endif move tp, a2 cpu_restore_nonscratch a1 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 475d0a3ce059..5bfca2f2112c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -113,6 +113,20 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * The __COUNTER__ based labels are a hack to make each instance of the macros * unique, to convince GCC not to merge duplicate inline asm statements. */ +#ifdef __loongarch__ +#define annotate_reachable() ({ \ + asm volatile("%0:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long %0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#define annotate_unreachable() ({ \ + asm volatile("%0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#else #define annotate_reachable() ({ \ asm volatile("%c0:\n\t" \ ".pushsection .discard.reachable\n\t" \ @@ -125,6 +139,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".long %c0b - .\n\t" \ ".popsection\n\t" : : "i" (__COUNTER__)); \ }) +#endif #define ASM_UNREACHABLE \ "999:\n\t" \ ".pushsection .discard.unreachable\n\t" \ -- Gitee From e69c0a9893a6dcd6bb71c0551f51be1eb54fc31e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 13 Jun 2021 18:13:09 +0800 Subject: [PATCH 43/59] LoongArch: Add orc stack unwinder support Change-Id: I0f2d36d09fab402393a9b4b51ddd4dac8fac3b2c Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/Kconfig.debug | 15 + arch/loongarch/boot/Makefile | 2 + arch/loongarch/boot/compressed/Makefile | 2 + arch/loongarch/configs/loongson3_defconfig | 1 + arch/loongarch/include/asm/module.h | 6 + arch/loongarch/include/asm/orc_lookup.h | 35 ++ arch/loongarch/include/asm/orc_types.h | 79 ++++ arch/loongarch/include/asm/stackframe.h | 3 + arch/loongarch/include/asm/unwind.h | 16 +- arch/loongarch/include/asm/unwind_hints.h | 94 ++++ arch/loongarch/kernel/Makefile | 4 + arch/loongarch/kernel/entry.S | 5 + arch/loongarch/kernel/genex.S | 4 + arch/loongarch/kernel/module.c | 28 +- arch/loongarch/kernel/process.c | 9 +- arch/loongarch/kernel/setup.c | 3 + arch/loongarch/kernel/stacktrace.c | 70 +++ arch/loongarch/kernel/traps.c | 39 +- arch/loongarch/kernel/unwind_orc.c | 520 +++++++++++++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 + arch/loongarch/mm/tlb.c | 14 +- arch/loongarch/mm/tlbex.S | 2 + arch/loongarch/power/Makefile | 2 + arch/loongarch/vdso/Makefile | 1 + 25 files changed, 930 insertions(+), 28 deletions(-) create mode 100644 arch/loongarch/include/asm/orc_lookup.h create mode 100644 arch/loongarch/include/asm/orc_types.h create mode 100644 arch/loongarch/include/asm/unwind_hints.h create mode 100644 arch/loongarch/kernel/unwind_orc.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 87b7247e94d7..db5a20e3a74b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -92,6 +92,7 @@ config LOONGARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_STACK_VALIDATION diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 3a0e6385eb32..51639222024c 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -71,6 +71,21 @@ config UNWINDER_PROLOGUE information is needed, at least the address and length of each function. Some of the addresses it reports may be incorrect. +config UNWINDER_ORC + bool "ORC unwinder" + select STACK_VALIDATION + help + This option enables the ORC (Oops Rewind Capability) unwinder for + unwinding kernel stack traces. It uses a custom data format which is + a simplified version of the DWARF Call Frame Information standard. + + This unwinder is more accurate across interrupt entry frames than the + frame pointer unwinder. It also enables a 5-10% performance + improvement across the entire kernel compared to frame pointers. + + Enabling this option will increase the kernel's runtime memory usage + by roughly 3-5MB, depending on your kernel config. + endchoice config DEBUG_ZBOOT diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile index b09257a75756..91ef912e2076 100644 --- a/arch/loongarch/boot/Makefile +++ b/arch/loongarch/boot/Makefile @@ -7,6 +7,8 @@ # Drop some uninteresting sections in the kernel. # This is only relevant for ELF kernels but doesn't hurt a.out # +OBJECT_FILES_NON_STANDARD := y + drop-sections := .comment .note .options strip-flags := $(addprefix --remove-section=,$(drop-sections)) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index 76c5b19cb0e0..e64de8dbbac4 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -3,6 +3,8 @@ # Author: Huacai Chen # Copyright (C) 2020 Loongson Technology Corporation Limited +OBJECT_FILES_NON_STANDARD := y + include $(srctree)/arch/loongarch/Kbuild.platforms # set the default size of the mallocing area for decompressing diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index e925e30f74d1..b9ccbce3419c 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -781,3 +781,4 @@ CONFIG_SCHEDSTATS=y # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="e1000e.InterruptThrottleRate=4,4,4,4" +CONFIG_UNWINDER_ORC=y diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index 6e6df997ed2b..b2d69083aad6 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -6,10 +6,16 @@ #define _ASM_MODULE_H #include +#include #define RELA_STACK_DEPTH 16 struct mod_arch_specific { +#ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif }; #endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h new file mode 100644 index 000000000000..f41f1e2f9b67 --- /dev/null +++ b/arch/loongarch/include/asm/orc_lookup.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ORC_LOOKUP_H +#define _ASM_ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP ((unsigned long)_stext) +#define LOOKUP_STOP_IP ((unsigned long)_etext) + +#endif /* LINKER_SCRIPT */ + +#endif /* _ASM_ORC_LOOKUP_H */ diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h new file mode 100644 index 000000000000..be3a095e69e3 --- /dev/null +++ b/arch/loongarch/include/asm/orc_types.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_ORC_TYPES_H +#define _ASM_ORC_TYPES_H + +#include +#include + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and FP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is + * usually based on. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_SP 2 +#define ORC_REG_FP 3 +#define ORC_REG_MAX 15 + +/* + * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the + * caller's SP right before it made the call). Used for all callable + * functions, i.e. all C code and all callable asm functions. + * + * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points + * to a fully populated pt_regs from a syscall, interrupt, or exception. + * + * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset + * points to the iret return frame. + * + * The UNWIND_HINT macros are used only for the unwind_hint struct. They + * aren't used in struct orc_entry due to size and complexity constraints. + * Objtool converts them to real types when it converts the hints to orc + * entries. + */ +#define ORC_TYPE_CALL 0 +#define ORC_TYPE_REGS 1 +#define ORC_TYPE_REGS_IRET 2 +#define UNWIND_HINT_TYPE_SAVE 3 +#define UNWIND_HINT_TYPE_RESTORE 4 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and FP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + signed short sp_offset; + signed short fp_offset; + signed short ra_offset; + unsigned int sp_reg:4; + unsigned int fp_reg:4; + unsigned int ra_reg:4; + unsigned int type:2; + unsigned int end:1; + unsigned int unused:1; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 4f78e2dca625..d6cd7d92305b 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -12,6 +12,7 @@ #include #include #include +#include /* Make the addition of cfi info a little easier. */ .macro cfi_rel_offset reg offset=0 docfi=0 @@ -149,6 +150,7 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: + UNWIND_HINT_REGS .endm .macro SAVE_ALL docfi=0 @@ -206,6 +208,7 @@ .macro RESTORE_SP_AND_RET docfi=0 cfi_ld sp, PT_R3, \docfi + UNWIND_HINT ertn .endm diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index 71b36ccf786d..4b5794d6a546 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -7,6 +7,7 @@ #ifndef _ASM_UNWIND_H #define _ASM_UNWIND_H +#include #include #include @@ -15,10 +16,13 @@ struct unwind_state { struct stack_info stack_info; struct task_struct *task; -#ifdef CONFIG_UNWINDER_PROLOGUE + int graph_idx; +#if defined(CONFIG_UNWINDER_PROLOGUE) unsigned long sp, pc, ra; bool enable; bool first; +#elif defined(CONFIG_UNWINDER_ORC) + unsigned long sp, pc, fp, ra; #else /* CONFIG_UNWINDER_GUESS */ unsigned long sp, pc; bool first; @@ -40,4 +44,14 @@ static inline bool unwind_error(struct unwind_state *state) { return state->error; } + +#ifdef CONFIG_UNWINDER_ORC +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, + void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline void unwind_module_init(struct module *mod, void *orc_ip, + size_t orc_ip_size, void *orc, size_t orc_size) {} +#endif /* CONFIG_UNWINDER_ORC */ #endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h new file mode 100644 index 000000000000..d5b699c2f018 --- /dev/null +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Most of this ideas comes from x86. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_UNWIND_HINTS_H +#define _ASM_UNWIND_HINTS_H + +#include "orc_types.h" + +#ifdef __ASSEMBLY__ + +/* + * In asm, there are two kinds of code: normal C-type callable functions and + * the rest. The normal callable functions can be called by other code, and + * don't do anything unusual with the stack. Such normal callable functions + * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * category. In this case, no special debugging annotations are needed because + * objtool can automatically generate the ORC data for the ORC unwinder to read + * at runtime. + * + * Anything which doesn't fall into the above category, such as syscall and + * interrupt handlers, tends to not be called directly by other functions, and + * often does unusual non-C-function-type things with the stack pointer. Such + * code needs to be annotated such that objtool can understand it. The + * following CFI hint macros are for this type of code. + * + * These macros provide hints to objtool about the state of the stack at each + * instruction. Objtool starts from the hints and follows the code flow, + * making automatic CFI adjustments when it sees pushes and pops, filling out + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0 +#ifdef CONFIG_STACK_VALIDATION +.Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ + .long .Lunwind_hint_ip_\@ - . + .short \sp_offset + .byte \sp_reg + .byte \type + .byte \end + .balign 4 + .popsection +#endif +.endm + +.macro UNWIND_HINT_EMPTY + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1 +.endm + +.macro UNWIND_HINT_REGS base=ORC_REG_SP offset=0 + UNWIND_HINT sp_reg=\base sp_offset=\offset type=ORC_TYPE_REGS +.endm + +.macro UNWIND_HINT_FUNC offset=0 + UNWIND_HINT sp_offset=\offset +.endm + +.macro NOT_SIBLING_CALL_HINT +876: .pushsection .discard.not_sibling_call + .long 876b - . + .popsection +.endm + +#else /* !__ASSEMBLY__ */ + +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ + "987: \n\t" \ + ".pushsection .discard.unwind_hints\n\t" \ + /* struct unwind_hint */ \ + ".long 987b - .\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ + ".byte " __stringify(sp_reg) "\n\t" \ + ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ + ".popsection\n\t" + +#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) + +#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) + +#define NOT_SIBLING_CALL_HINT \ + "876:\n\t" \ + ".pushsection .discard.not_sibling_call\n\t" \ + ".long 876b - .\n\t" \ + ".popsection\n\t" + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_UNWIND_HINTS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 7b60dc859813..6d36333981c4 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -3,6 +3,9 @@ # Makefile for the Linux/LoongArch kernel. # +OBJECT_FILES_NON_STANDARD_head.o :=y +OBJECT_FILES_NON_STANDARD_relocate_kernel.o :=y + extra-y := head.o vmlinux.lds obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ @@ -31,6 +34,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 1803840381d3..145714e35168 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -14,6 +14,7 @@ #include #include #include +#include .text .cfi_sections .debug_frame @@ -58,6 +59,8 @@ SYM_FUNC_START(handle_syscall) SAVE_STATIC + UNWIND_HINT_REGS + move u0, t0 li.d tp, ~_THREAD_MASK and tp, tp, sp @@ -69,6 +72,7 @@ SYM_FUNC_START(handle_syscall) SYM_FUNC_END(handle_syscall) SYM_CODE_START(ret_from_fork) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, sp bl syscall_exit_to_user_mode @@ -78,6 +82,7 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) SYM_CODE_START(ret_from_kernel_thread) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, s1 jirl ra, s0, 0 diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 03a058c48a5f..4ff6ae66f97d 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -9,6 +9,7 @@ #include #include #include +#include .align 5 SYM_FUNC_START(__arch_cpu_idle) @@ -28,6 +29,7 @@ SYM_FUNC_END(__arch_cpu_idle) SYM_FUNC_START(handle_vint) BACKUP_T0T1 SAVE_ALL + UNWIND_HINT_REGS la.abs t1, __arch_cpu_idle LONG_L t0, sp, PT_ERA /* 32 byte rollback region */ @@ -39,6 +41,7 @@ SYM_FUNC_START(handle_vint) move a1, sp la.abs t0, do_vint jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_FUNC_END(handle_vint) @@ -67,6 +70,7 @@ SYM_FUNC_END(except_vec_cex) move a0, sp la.abs t0, do_\handler jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_FUNC_END(handle_\exception) .endm diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 5e702fab4d0f..d8b494c7c3c3 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -19,6 +19,8 @@ #include #include +#include + static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) { if (RELA_STACK_DEPTH <= *rela_stack_top) @@ -663,13 +665,29 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - const Elf_Shdr *s, *se; - const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + char *secstrings; + const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL; + + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (!strcmp(".altinstructions", secstrs + s->sh_name)) - apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".orc_unwind", secstrings + s->sh_name)) + orc = s; + if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) + orc_ip = s; } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + + if (orc && orc_ip) + unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, + (void *)orc->sh_addr, orc->sh_size); + return 0; } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 566909d8d6f3..217a7c78141f 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -270,7 +270,9 @@ unsigned long get_wchan(struct task_struct *task) * Just calculate a simpler wchan value here. * Do nothing here. */ -#else /* CONFIG_UNWINDER_PROLOGUE */ + return 0; +#endif + stack_page_end = stack_page + THREAD_SIZE - 32; frame = thread_saved_fp(task); @@ -284,9 +286,11 @@ unsigned long get_wchan(struct task_struct *task) state.stack_info.begin = stack_page; state.stack_info.end = stack_page_end; state.stack_info.next_sp = 0; + state.pc = pc; +#ifdef CONFIG_UNWINDER_PROLOGUE state.enable = true; state.first = true; - state.pc = pc; +#endif while (in_sched_functions(pc)) { if (!unwind_next_frame(&state)) { @@ -297,7 +301,6 @@ unsigned long get_wchan(struct task_struct *task) } out: -#endif /* CONFIG_UNWINDER_GUESS */ put_task_stack(task); return pc; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1d0c1627e488..f4522706a0be 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -29,6 +29,7 @@ #include #include #include +#include DEFINE_PER_CPU(unsigned long, kernelsp); unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; @@ -625,4 +626,6 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); paging_init(); + + unwind_init(); } diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 8a13d1136288..2635d81141e5 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -104,3 +104,73 @@ void save_stack_trace_tsk(struct task_struct *tsk, save_context_stack(tsk, trace, NULL, consume); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE + +static __always_inline int +__save_stack_trace_reliable(struct stack_trace *trace, + struct task_struct *tsk) +{ + struct unwind_state state; + struct pt_regs dummyregs; + struct pt_regs *regs = &dummyregs; + unsigned long addr; + + if (tsk == current) { + regs->csr_era = (unsigned long)__builtin_return_address(0); + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + } else { + regs->csr_era = thread_saved_ra(tsk); + regs->regs[3] = thread_saved_fp(tsk); + } + + for (unwind_start(&state, tsk, regs); + !unwind_done(&state) && !unwind_error(&state); + unwind_next_frame(&state)) { + + addr = unwind_get_return_address(&state); + + /* + * A NULL or invalid return address probably means there's some + * generated code which __kernel_text_address() doesn't know + * about. + */ + if (!addr) + return -EINVAL; + + if (!consume_entry(trace, addr)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + + return 0; +} + +/* + * This function returns an error if it detects any unreliable features of the + * stack. Otherwise it guarantees that the stack trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is inactive. + */ +int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + int ret; + + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ + if (!try_get_task_stack(tsk)) + return 0; + + ret = __save_stack_trace_reliable(trace, tsk); + + put_task_stack(tsk); + + return ret; +} +#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 26dae1774980..51c76a927b0c 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -64,6 +64,31 @@ extern asmlinkage void handle_reserved(void); extern asmlinkage void handle_watch(void); extern asmlinkage void handle_vint(void); +void *exception_table[EXCCODE_INT_START] = { + [0 ... EXCCODE_INT_START - 1] = handle_reserved, + + [EXCCODE_TLBI] = handle_tlb_load, + [EXCCODE_TLBL] = handle_tlb_load, + [EXCCODE_TLBS] = handle_tlb_store, + [EXCCODE_TLBM] = handle_tlb_modify, + [EXCCODE_TLBNR] = handle_tlb_protect, + [EXCCODE_TLBNX] = handle_tlb_protect, + [EXCCODE_TLBPE] = handle_tlb_protect, + [EXCCODE_ADE] = handle_ade, + [EXCCODE_ALE] = handle_ale, + [EXCCODE_SYS] = handle_sys, + [EXCCODE_BP] = handle_bp, + [EXCCODE_INE] = handle_ri, + [EXCCODE_IPE] = handle_ri, + [EXCCODE_FPDIS] = handle_fpu, + [EXCCODE_LSXDIS] = handle_lsx, + [EXCCODE_LASXDIS] = handle_lasx, + [EXCCODE_FPE] = handle_fpe, + [EXCCODE_BTDIS] = handle_lbt, + [EXCCODE_WATCH] = handle_watch, +}; +EXPORT_SYMBOL_GPL(exception_table); + static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -828,18 +853,8 @@ void __init trap_init(void) for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); - set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); - set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); - set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); - set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); - set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); - set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); - set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); - set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); - set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); - set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); cache_error_setup(); diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c new file mode 100644 index 000000000000..6feb9edec81f --- /dev/null +++ b/arch/loongarch/kernel/unwind_orc.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/x86/kernel/unwind_orc.c + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define orc_warn(fmt, ...) \ + printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) + +#define orc_warn_current(args...) \ +({ \ + if (state->task == current) \ + orc_warn(args); \ +}) + +extern int __start_orc_unwind_ip[]; +extern int __stop_orc_unwind_ip[]; +extern struct orc_entry __start_orc_unwind[]; +extern struct orc_entry __stop_orc_unwind[]; + +static bool orc_init __ro_after_init; +static unsigned int lookup_num_blocks __ro_after_init; + +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; + +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, + unsigned int num_entries, unsigned long ip) +{ + int *first = ip_table; + int *last = ip_table + num_entries - 1; + int *mid = first, *found = first; + + if (!num_entries) + return NULL; + + /* + * Do a binary range search to find the rightmost duplicate of a given + * starting address. Some entries are section terminators which are + * "weak" entries for ensuring there are no gaps. They should be + * ignored when they conflict with a real entry. + */ + while (first <= last) { + mid = first + ((last - first) / 2); + + if (orc_ip(mid) <= ip) { + found = mid; + first = mid + 1; + } else + last = mid - 1; + } + + return u_table + (found - ip_table); +} + +#ifdef CONFIG_MODULES +static struct orc_entry *orc_module_find(unsigned long ip) +{ + struct module *mod; + + mod = __module_address(ip); + if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip) + return NULL; + return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, + mod->arch.num_orcs, ip); +} +#else +static struct orc_entry *orc_module_find(unsigned long ip) +{ + return NULL; +} +#endif + +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_FP, + .sp_offset = 16, + .fp_reg = ORC_REG_PREV_SP, + .fp_offset = -16, + .ra_reg = ORC_REG_PREV_SP, + .ra_offset = -8, + .end = 0, +}; + +static struct orc_entry *orc_find(unsigned long ip) +{ + struct orc_entry *orc = NULL; + + /* Although exception occurs, CSR_ERA is not 0. */ + if (ip == 0) + return NULL; + + /* For non-init vmlinux addresses, use the fast lookup table: */ + if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { + unsigned int idx, start, stop; + + idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; + + if (unlikely((idx >= lookup_num_blocks - 1))) { + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); + return NULL; + } + + start = orc_lookup[idx]; + stop = orc_lookup[idx + 1] + 1; + + if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || + (__start_orc_unwind + stop > __stop_orc_unwind))) { + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); + return NULL; + } + + return __orc_find(__start_orc_unwind_ip + start, + __start_orc_unwind + start, stop - start, ip); + } + + /* vmlinux .init slow lookup: */ + if (init_kernel_text(ip)) + return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); + + /* Module lookup: */ + orc = orc_module_find(ip); + if (orc) + return orc; + + return NULL; +} + +#ifdef CONFIG_MODULES +static DEFINE_MUTEX(sort_mutex); + +static void orc_sort_swap(void *_a, void *_b, int size) +{ + struct orc_entry *orc_a, *orc_b; + struct orc_entry orc_tmp; + int *a = _a, *b = _b, tmp; + int delta = _b - _a; + + /* Swap the .orc_unwind_ip entries: */ + tmp = *a; + *a = *b + delta; + *b = tmp - delta; + + /* Swap the corresponding .orc_unwind entries: */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + orc_b = cur_orc_table + (b - cur_orc_ip_table); + orc_tmp = *orc_a; + *orc_a = *orc_b; + *orc_b = orc_tmp; +} + +static int orc_sort_cmp(const void *_a, const void *_b) +{ + struct orc_entry *orc_a; + const int *a = _a, *b = _b; + unsigned long a_val = orc_ip(a); + unsigned long b_val = orc_ip(b); + + if (a_val > b_val) + return 1; + if (a_val < b_val) + return -1; + + /* + * The "weak" section terminator entries need to always be on the left + * to ensure the lookup code skips them in favor of real entries. + * These terminator entries exist to handle any gaps created by + * whitelisted .o files which didn't get objtool generation. + */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; +} + +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, + void *_orc, size_t orc_size) +{ + int *orc_ip = _orc_ip; + struct orc_entry *orc = _orc; + unsigned int num_entries = orc_ip_size / sizeof(int); + + WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(*orc) != 0 || + num_entries != orc_size / sizeof(*orc)); + + /* + * The 'cur_orc_*' globals allow the orc_sort_swap() callback to + * associate an .orc_unwind_ip table entry with its corresponding + * .orc_unwind entry so they can both be swapped. + */ + mutex_lock(&sort_mutex); + cur_orc_ip_table = orc_ip; + cur_orc_table = orc; + sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); + mutex_unlock(&sort_mutex); + + mod->arch.orc_unwind_ip = orc_ip; + mod->arch.orc_unwind = orc; + mod->arch.num_orcs = num_entries; +} +#endif + +void __init unwind_init(void) +{ + size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip; + size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind; + size_t num_entries = orc_ip_size / sizeof(int); + struct orc_entry *orc; + int i; + + if (!num_entries || orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(struct orc_entry) != 0 || + num_entries != orc_size / sizeof(struct orc_entry)) { + orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n"); + return; + } + + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ + + /* Initialize the fast lookup table: */ + lookup_num_blocks = orc_lookup_end - orc_lookup; + for (i = 0; i < lookup_num_blocks - 1; i++) { + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + num_entries, + LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i)); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + + orc_lookup[i] = orc - __start_orc_unwind; + } + + /* Initialize the ending block: */ + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, + LOOKUP_STOP_IP); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind; + + orc_init = true; +} + +static inline bool task_on_another_cpu(struct task_struct *task) +{ +#ifdef CONFIG_SMP + return task != current && task->on_cpu; +#else + return false; +#endif +} + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + state->task = task; + + if (!orc_init) + goto err; + + if (task_on_another_cpu(task)) + goto err; + + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + state->fp = regs->regs[22]; + + if (get_stack_info(state->sp, state->task, &state->stack_info)) + goto err; + + return; + +err: + state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; +} + +#define INSN_LINK_OFFSET 4 + +extern void *exception_table[]; +extern asmlinkage void handle_vint(void); +extern asmlinkage void handle_reserved(void); + +static inline unsigned long bt_address(unsigned long ra) +{ + extern unsigned long eentry; + + if (__kernel_text_address(ra)) + return ra; + + /* We are in preempt_disable() here */ + if (__module_text_address(ra)) + return ra; + + if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { + unsigned long type = (ra - eentry) / VECSIZE; + unsigned long offset = (ra - eentry) % VECSIZE; + unsigned long func; + switch (type) { + case 0 ... EXCCODE_INT_START-1: + func = (unsigned long)exception_table[type]; + break; + case EXCCODE_INT_START ... EXCCODE_INT_END-1: + func = (unsigned long)handle_vint; + break; + default: + func = (unsigned long)handle_reserved; + return 0; + } + + return func + offset; + } + + return ra; +} + +static inline bool on_stack(struct stack_info *info, unsigned long addr, + size_t len) +{ + unsigned long begin = info->begin; + unsigned long end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && + addr + len > begin && addr + len <= end); +} + +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, + size_t len) +{ + struct stack_info *info = &state->stack_info; + + if (!on_stack(info, addr, len) && + (get_stack_info(addr, state->task, info))) + return false; + + return true; +} + +static bool is_entry_func(unsigned long addr) +{ + extern u32 kernel_entry; + extern u32 kernel_entry_end; + + return addr >= (unsigned long)&kernel_entry && + addr < (unsigned long)&kernel_entry_end; +} + +#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + struct orc_entry *orc; + struct pt_regs *regs; + unsigned long *p, pc; + + if (unwind_done(state)) + return false; + + /* Don't let modules unload while we're reading their ORC data. */ + preempt_disable(); + + if (is_entry_func(state->pc)) + goto end; + + orc = orc_find(state->pc); + if (!orc) { + orc = &orc_fp_entry; + state->error = true; + } + + switch (orc->sp_reg) { + case ORC_REG_SP: + state->sp = state->sp + orc->sp_offset; + break; + case ORC_REG_FP: + state->sp = state->fp; + break; + default: + orc_warn("unknown SP base reg %d at %pB\n", + orc->sp_reg, (void *)state->pc); + goto err; + } + + switch (orc->fp_reg) { + case ORC_REG_PREV_SP: + p = (unsigned long *)(state->sp + orc->fp_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + state->fp = *p; + break; + case ORC_REG_UNDEFINED: + /* Nothing. */ + break; + default: + orc_warn("unknown FP base reg %d at %pB\n", + orc->fp_reg, (void *)state->pc); + goto err; + } + + switch (orc->type) { + case ORC_TYPE_CALL: + if (orc->ra_reg == ORC_REG_PREV_SP) { + p = (unsigned long *)(state->sp + orc->ra_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + *p, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); + + pc -= INSN_LINK_OFFSET; + } else if (orc->ra_reg == ORC_REG_UNDEFINED) { + if (!state->ra || state->ra == state->pc) + goto err; + + pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + state->ra, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); + + pc -= INSN_LINK_OFFSET; + state->ra = 0; + } else { + orc_warn("unknown ra base reg %d at %pB\n", + orc->ra_reg, (void *)state->pc); + goto err; + } + break; + case ORC_TYPE_REGS: + if (state->stack_info.type == STACK_TYPE_IRQ && state->sp == info->end) + regs = (struct pt_regs *)info->next_sp; + else + regs = (struct pt_regs *)state->sp; + + if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs))) + goto err; + + if ((info->end == (unsigned long)regs + sizeof(*regs)) && + !regs->regs[3] && !regs->regs[1]) + goto end; + + if (user_mode(regs)) + goto end; + + pc = regs->csr_era; + if (!__kernel_text_address(pc)) + goto err; + + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->fp = regs->regs[22]; + get_stack_info(state->sp, state->task, info); + + break; + default: + orc_warn("unknown .orc_unwind entry type %d at %pB\n", + orc->type, (void *)state->pc); + goto err; + } + + state->pc = bt_address(pc); + if (!state->pc) { + pr_err("cannot find unwind pc at %pK\n", (void *)pc); + goto err; + } + + preempt_enable(); + return true; + +err: + state->error = true; + +end: + preempt_enable(); + state->stack_info.type = STACK_TYPE_UNKNOWN; + return false; +} + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + return __kernel_text_address(state->pc) ? state->pc : 0; +} diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index d5adef1f7742..a28d1ca2755b 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #define PAGE_SIZE _PAGE_SIZE @@ -57,6 +58,8 @@ SECTIONS RO_DATA(4096) RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) + ORC_UNWIND_TABLE + /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 9994987abe09..fca7f6ac2243 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -15,6 +15,8 @@ #include #include +extern void *exception_table[]; + void local_flush_tlb_all(void) { invtlb_all(INVTLB_CURRENT_ALL, 0, 0); @@ -251,6 +253,7 @@ static void output_pgtable_bits_defines(void) void setup_tlb_handler(void) { + int i; static int run_once; setup_pw(); @@ -260,13 +263,10 @@ void setup_tlb_handler(void) if (!run_once) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); - set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + + for (i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); + run_once++; } } diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index c35f26d34f7b..8a9ebae513b9 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -23,6 +23,7 @@ li.w a1, \write la.abs t0, do_page_fault jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_FUNC_END(tlb_do_page_fault_\write) .endm @@ -39,6 +40,7 @@ SYM_FUNC_START(handle_tlb_protect) REG_S a2, sp, PT_BVADDR la.abs t0, do_page_fault jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_FUNC_END(handle_tlb_protect) diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile index 08a200357c4e..04643f29d78e 100644 --- a/arch/loongarch/power/Makefile +++ b/arch/loongarch/power/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_suspend_asm.o := y + obj-y += common.o obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 6b6e16732c60..444e945c6f77 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +OBJECT_FILES_NON_STANDARD := y # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. -- Gitee From c943552f653f1c5a7aa1ab53de54a4c1a71fa36d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 44/59] LoongArch: Add checksum optimization Change-Id: I634781c2276ad78f472bf9fb3b9a85de09ac77bf Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-prototypes.h | 1 + arch/loongarch/include/asm/checksum.h | 218 ++++++++++ arch/loongarch/lib/Makefile | 4 +- arch/loongarch/lib/csum_partial.S | 428 ++++++++++++++++++++ 4 files changed, 650 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/checksum.h create mode 100644 arch/loongarch/lib/csum_partial.S diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index a63d09f8ee86..f901ed043c71 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include diff --git a/arch/loongarch/include/asm/checksum.h b/arch/loongarch/include/asm/checksum.h new file mode 100644 index 000000000000..f0cda469bbaa --- /dev/null +++ b/arch/loongarch/include/asm/checksum.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_CHECKSUM_H +#define _ASM_CHECKSUM_H + +#ifdef CONFIG_GENERIC_CSUM +#include +#else + +#include + +#include + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); +__wsum csum_partial_copy(const void *src, void *dst, int len); + +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +static inline +__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len) +{ + might_fault(); + if (!access_ok(src, len)) + return 0; + return csum_partial_copy(src, dst, len); +} + +#define HAVE_CSUM_COPY_USER +static inline +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) +{ + might_fault(); + if (!access_ok(dst, len)) + return 0; + return csum_partial_copy(src, dst, len); +} + +/* + * the same as csum_partial, but copies from user space (but on LoongArch + * we have just one address space, so this is identical to the above) + */ +#define _HAVE_ARCH_CSUM_AND_COPY +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) +{ + return csum_partial_copy(src, dst, len); +} + +/* + * Fold a partial checksum without adding pseudo headers + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + + sum += (sum << 16); + csum = (sum < csum); + sum >>= 16; + sum += csum; + + return (__force __sum16)~sum; +} +#define csum_fold csum_fold + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik , adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + const unsigned int *word = iph; + const unsigned int *stop = word + ihl; + unsigned int csum; + int carry; + + csum = word[0]; + csum += word[1]; + carry = (csum < word[1]); + csum += carry; + + csum += word[2]; + carry = (csum < word[2]); + csum += carry; + + csum += word[3]; + carry = (csum < word[3]); + csum += carry; + + word += 4; + do { + csum += *word; + carry = (csum < *word); + csum += carry; + word++; + } while (word != stop); + + return csum_fold(csum); +} +#define ip_fast_csum ip_fast_csum + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum sum) +{ + __asm__( +#ifdef CONFIG_64BIT + " add.d %0, %0, %2 \n" + " add.d %0, %0, %3 \n" + " add.d %0, %0, %4 \n" + " slli.d $t7, %0, 32 \n" + " add.d %0, %0, $t7 \n" + " sltu $t7, %0, $t7 \n" + " srai.d %0, %0, 32 \n" + " add.w %0, %0, $t7 \n" +#endif + : "=r" (sum) + : "0" ((__force unsigned long)daddr), + "r" ((__force unsigned long)saddr), + "r" ((proto + len) << 8), + "r" ((__force unsigned long)sum) + : "t7"); + + return sum; +} +#define csum_tcpudp_nofold csum_tcpudp_nofold + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, + __wsum sum) +{ + __wsum tmp; + + __asm__( + " add.w %0, %0, %5 # proto (long in network byte order)\n" + " sltu $t7, %0, %5 \n" + " add.w %0, %0, $t7 \n" + + " add.w %0, %0, %6 # csum\n" + " sltu $t7, %0, %6 \n" + " ld.w %1, %2, 0 # four words source address\n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %2, 4 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %2, 8 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %2, 12 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %3, 0 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %3, 4 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %3, 8 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " ld.w %1, %3, 12 \n" + " add.w %0, %0, $t7 \n" + " add.w %0, %0, %1 \n" + " sltu $t7, %0, %1 \n" + + " add.w %0, %0, $t7 # Add final carry\n" + : "=&r" (sum), "=&r" (tmp) + : "r" (saddr), "r" (daddr), + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "t7"); + + return csum_fold(sum); +} + +#include +#endif /* CONFIG_GENERIC_CSUM */ + +#endif /* _ASM_CHECKSUM_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 2f337487878e..5a654738874a 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -4,4 +4,6 @@ # lib-y += delay.o memset.o memcpy.o memmove.o clear_user.o \ - copy_user.o strncpy_user.o strnlen_user.o dump_tlb.o + copy_user.o strncpy_user.o strnlen_user.o csum_partial.o dump_tlb.o + +lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) diff --git a/arch/loongarch/lib/csum_partial.S b/arch/loongarch/lib/csum_partial.S new file mode 100644 index 000000000000..1d504246ecae --- /dev/null +++ b/arch/loongarch/lib/csum_partial.S @@ -0,0 +1,428 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Quick'n'dirty IP checksum ... + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define LOAD ld.d +#define LOAD32 ld.wu +#define ADD add.d +#define SUB sub.d +#define SLL slli.d +#define SRL srli.d +#define SLLV sll.d +#define SRLV srl.d + +#define NBYTES 8 +#define LOG_NBYTES 3 +#define UNIT(unit) ((unit)*NBYTES) +#define ADDRMASK (NBYTES-1) + +#define _ASM_EXTABLE(from, to) \ + .section __ex_table, "a"; \ + PTR from, to; \ + .previous + +#define ADDC(sum,reg) \ + ADD sum, sum, reg; \ + sltu t8, sum, reg; \ + ADD sum, sum, t8; \ + +#define ADDC32(sum,reg) \ + add.w sum, sum, reg; \ + sltu t8, sum, reg; \ + add.w sum, sum, t8; \ + +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + LOAD _t0, src, (offset + UNIT(0)); \ + LOAD _t1, src, (offset + UNIT(1)); \ + LOAD _t2, src, (offset + UNIT(2)); \ + LOAD _t3, src, (offset + UNIT(3)); \ + ADDC(_t0, _t1); \ + ADDC(_t2, _t3); \ + ADDC(sum, _t0); \ + ADDC(sum, _t2) + +/* + * a0: source address + * a1: length of the area to checksum + * a2: partial checksum + */ + +#define src a0 +#define sum t6 + + .text + .align 5 +SYM_FUNC_START(csum_partial) + or sum, zero, zero + or t7, zero, zero + + sltui t8, a1, 0x8 + or t2, a1, zero + bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ + + andi t7, src, 0x1 /* odd buffer? */ + +.Lhword_align: + andi t8, src, 0x2 + beqz t7, .Lword_align + + ld.bu t0, src, 0x0 + LONG_ADDIU a1, a1, -1 + slli.w t0, t0, 8 + ADDC(sum, t0) + PTR_ADDIU src, src, 1 + andi t8, src, 0x2 + +.Lword_align: + sltui t4, a1, 56 + beqz t8, .Ldword_align + + ld.hu t0, src, 0x0 + LONG_ADDIU a1, a1, -2 + ADDC(sum, t0) + sltui t4, a1, 56 + PTR_ADDIU src, src, 0x2 + +.Ldword_align: + or t8, a1, zero + bnez t4, .Ldo_end_words + + andi t4, src, 0x4 + andi t8, src, 0x8 + beqz t4, .Lqword_align + + LOAD32 t0, src, 0x0 + LONG_ADDIU a1, a1, -4 + ADDC(sum, t0) + PTR_ADDIU src, src, 4 + andi t8, src, 0x8 + +.Lqword_align: + andi t4, src, 0x10 + beqz t8, .Loword_align + + ld.d t0, src, 0 + LONG_ADDIU a1, a1, -8 + ADDC(sum, t0) + PTR_ADDIU src, src, 8 + andi t4, src, 0x10 + +.Loword_align: + LONG_SRL t8, a1, 0x7 + beqz t4, .Lbegin_movement + + ld.d t0, src, 0x00 + ld.d t1, src, 0x08 + ADDC(sum, t0) + ADDC(sum, t1) + LONG_ADDIU a1, a1, -16 + PTR_ADDIU src, src, 16 + LONG_SRL t8, a1, 0x7 + +.Lbegin_movement: + andi t2, a1, 0x40 + beqz t8, 1f + + or t5, t8, zero +.Lmove_128bytes: + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) + addi.d t5, t5, -1 + addi.d src, src, 0x80 + bnez t5, .Lmove_128bytes + +1: + or t4, t2, zero + andi t2, a1, 0x20 + beqz t4, 1f + +.Lmove_64bytes: + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) + PTR_ADDIU src, src, 64 + +1: + andi t8, a1, 0x1c + beqz t2, .Ldo_end_words + +.Lmove_32bytes: + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) + andi t8, a1, 0x1c + PTR_ADDIU src, src, 32 + +.Ldo_end_words: + andi t2, a1, 0x3 + beqz t8, .Lsmall_csumcpy + LONG_SRL t8, t8, 0x2 + +.Lend_words: + or t4, t8, zero +1: LOAD32 t0, src, 0x0 + LONG_ADDIU t4, t4, -1 + ADDC(sum, t0) + PTR_ADDIU src, src, 4 + bnez t4, 1b + +/* unknown src alignment and < 8 bytes to go */ +.Lsmall_csumcpy: + or a1, t2, zero + + andi t4, a1, 4 + andi t0, a1, 2 + beqz t4, 1f + + /* Still a full word to go */ + ld.w t1, src, 0x0 + PTR_ADDIU src, src, 4 + slli.d t1, t1, 32 /* clear lower 32bit */ + ADDC(sum, t1) + +1: or t1, zero, zero + or t4, t0, zero + andi t0, a1, 1 + beqz t4, 1f + + /* Still a halfword to go */ + ld.hu t1, src, 0x0 + PTR_ADDIU src, src, 2 + +1: slli.w t1, t1, 16 + beqz t0, 1f + + ld.bu t2, src, 0 + + or t1, t1, t2 + +1: ADDC(sum, t1) + + /* fold checksum */ + slli.d t4, sum, 32 + add.d sum, sum, t4 + sltu t4, sum, t4 + srai.d sum, sum, 32 + add.w sum, sum, t4 + + /* odd buffer alignment? */ + beqz t7, 1f + revb.2h sum, sum + /* Add the passed partial csum. */ +1: ADDC32(sum, a2) + or v0, sum, zero + jirl zero, ra, 0x0 +SYM_FUNC_END(csum_partial) +EXPORT_SYMBOL(csum_partial) + +/* + * checksum and copy routines based on memcpy.S + * + * csum_partial_copy(src, dst, len) + * + * See "Spec" in memcpy.S for details. Unlike __copy_user, all + * function in this file use the standard calling convention. + */ + +#define src a0 +#define dst a1 +#define len a2 +#define rem t4 +#define sum t6 +#define odd t7 + +SYM_FUNC_START(csum_partial_copy_generic) + xor sum, sum, sum + bge zero, len, .Lend + + andi odd, dst, 0x1 + beqz odd, .Leven +1: ld.b t0, src, 0 +2: st.b t0, dst, 0 + slli.d t1, t0, 8 + add.d sum, sum, t1 + addi.d len, len, -1 + addi.d src, src, 1 + addi.d dst, dst, 1 +.Leven: + ori t4, zero, 2 + blt len, t4, .Lendloop /* more than 2 bytes */ + +3: ld.hu t1, src, 0 +4: st.h t1, dst, 0 + addi.d src, src, 2 + addi.d dst, dst, 2 + addi.d len, len, -2 + ADDC32(sum, t1) + bge len, t4, 3b + +.Lendloop: + andi t0, len, 1 + beqz t0, .Lend +5: ld.b t0, src, 0 +6: st.b t0, dst, 0 + ADDC32(sum, t0) +.Lend: + beqz odd, 7f + revb.2h sum, sum +7: + move v0, sum + jirl zero, ra, 0x0 + + .section .fixup, "ax" +8: + move v0, zero + jirl zero, ra, 0x0 + .previous + + _ASM_EXTABLE(1b, 8b) + _ASM_EXTABLE(2b, 8b) + _ASM_EXTABLE(3b, 8b) + _ASM_EXTABLE(4b, 8b) + _ASM_EXTABLE(5b, 8b) + _ASM_EXTABLE(6b, 8b) +SYM_FUNC_END(csum_partial_copy_generic) + +SYM_FUNC_START(csum_partial_copy_fast) + or sum, zero, zero + sltui t2, len, NBYTES + bnez t2, .Lcopy_bytes_checklen +.Lmore_than_NBYTES: + beqz len, .Ldone + andi rem, len, (NBYTES - 1) + /* if rem == len means that len < NBYTES */ + beq rem, len, .Lcopy_bytes +1: /* 8 <= len */ + ld.d t0, src, 0x0 + addi.d src, src, NBYTES + addi.d len, len, -NBYTES +2: st.d t0, dst, 0x0 + ADDC(sum, t0) + addi.d dst, dst, NBYTES + bne rem, len, 1b +.Lcopy_bytes_checklen: + beqz len, .Ldone +.Lcopy_bytes: + /* len < 8 copy one by one */ + or t2, zero, zero /* t2 store len bytes of data */ + or t3, zero, zero /* t3 for increase shift */ + /* copy byte 0 */ +5: ld.b t0, src, 0 + addi.d len, len, -1 +6: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 1 */ +7: ld.b t0, src, 1 + addi.d len, len, -1 +8: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 2 */ +9: ld.b t0, src, 2 + addi.d len, len, -1 +10: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 3 */ +11: ld.b t0, src, 3 + addi.d len, len, -1 +12: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 4 */ +13: ld.b t0, src, 4 + addi.d len, len, -1 +14: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 5 */ +15: ld.b t0, src, 5 + addi.d len, len, -1 +16: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 + beqz len, .Lcopy_bytes_done + /* copy byte 6 */ +17: ld.b t0, src, 6 + addi.d len, len, -1 +18: st.b t0, dst, 0 + addi.d dst, dst, 1 + sll.d t0, t0, t3 + addi.d t3, t3, 8 + or t2, t2, t0 +.Lcopy_bytes_done: + /* copy one by one then calc csum */ + ADDC(sum, t2) +.Ldone: + /* fold checksum to 32bit */ + slli.d t4, sum, 32 + add.d sum, sum, t4 + sltu t4, sum, t4 + srai.d sum, sum, 32 + add.w sum, sum, t4 +19: + move v0, sum + jirl zero, ra, 0x0 + + .section .fixup, "ax" +20: + move v0, zero + jirl zero, ra, 0x0 + .previous + + _ASM_EXTABLE(1b, 20b) + _ASM_EXTABLE(2b, 20b) + _ASM_EXTABLE(5b, 20b) + _ASM_EXTABLE(6b, 20b) + _ASM_EXTABLE(7b, 20b) + _ASM_EXTABLE(8b, 20b) + _ASM_EXTABLE(9b, 20b) + _ASM_EXTABLE(10b, 20b) + _ASM_EXTABLE(11b, 20b) + _ASM_EXTABLE(12b, 20b) + _ASM_EXTABLE(13b, 20b) + _ASM_EXTABLE(14b, 20b) + _ASM_EXTABLE(15b, 20b) + _ASM_EXTABLE(16b, 20b) + _ASM_EXTABLE(17b, 20b) + _ASM_EXTABLE(18b, 20b) +SYM_FUNC_END(csum_partial_copy_fast) + +SYM_FUNC_START(csum_partial_copy) + ALTERNATIVE "b csum_partial_copy_generic", \ + "b csum_partial_copy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(csum_partial_copy) + +EXPORT_SYMBOL(csum_partial_copy) -- Gitee From b4bc5179efb15c0afc05dd55b4d3c65cdf316309 Mon Sep 17 00:00:00 2001 From: Min Zhou Date: Mon, 19 Jul 2021 17:08:07 +0800 Subject: [PATCH 45/59] LoongArch: crypto: Add crc32 and crc32c hw accelerated module With a blatant copy of some MIPS bits we introduce the crc32 and crc32c hw accelerated module to LoongArch. LoongArch has provided these instructions to calculate crc32 and crc32c: * crc.w.b.w * crc.w.h.w * crc.w.w.w * crc.w.d.w * crcc.w.b.w * crcc.w.h.w * crcc.w.w.w * crcc.w.d.w So we should make use of these instructions to improve the performance of calculation for crc32(c) checksums. As can be seen from the following test results, using hardware instructions of crc32(c) can improve the performance by 58%. Software implement Hardware instructions Buffer size time cost (seconds) time cost (seconds) Improve 100 KB 0.000845 0.000534 59.1% 1 MB 0.007758 0.004836 59.4% 10 MB 0.076593 0.047682 59.4% 100 MB 0.756734 0.479126 58.5% 1000 MB 7.563841 4.778266 58.5% After the hw accelerated module has been registered, we can see its information from /proc/crypto which lists all installed cryptographic ciphers used by kernel, including additional details for each. For example: $ cat /proc/crypto ... name : crc32c driver : crc32c-la-hw module : kernel priority : 300 refcnt : 2 selftest : passed internal : no type : shash blocksize : 1 digestsize : 4 name : crc32 driver : crc32-la-hw module : kernel priority : 300 refcnt : 1 selftest : passed internal : no type : shash blocksize : 1 digestsize : 4 ... Change-Id: I75cf7a4590dd0dd57940d6c911fb567f5c062be0 Signed-off-by: Min Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 + arch/loongarch/Makefile | 2 + arch/loongarch/crypto/Makefile | 6 + arch/loongarch/crypto/crc32.c | 325 +++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 arch/loongarch/crypto/Makefile create mode 100644 arch/loongarch/crypto/crc32.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index db5a20e3a74b..beee2ded84b9 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -109,6 +109,7 @@ config LOONGARCH select PERF_USE_VMALLOC select RTC_LIB select SYSCTL_EXCEPTION_TRACE + select LOONGARCH_CRC32_SUPPORT menu "Machine selection" @@ -161,6 +162,10 @@ config SYS_SUPPORTS_HOTPLUG_CPU config GENERIC_CSUM def_bool y +config LOONGARCH_CRC32_SUPPORT + bool + default y + config SYS_SUPPORTS_RELOCATABLE bool help diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 54cd8cc48321..e0742a1bc475 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,8 @@ else KBUILD_IMAGE = vmlinux endif +drivers-$(CONFIG_LOONGARCH_CRC32_SUPPORT) += arch/loongarch/crypto/ + # # Board-dependent options and extra files # diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile new file mode 100644 index 000000000000..c241e8af20a3 --- /dev/null +++ b/arch/loongarch/crypto/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for LoongArch crypto files.. +# + +obj-$(CONFIG_LOONGARCH_CRC32_SUPPORT) += crc32.o diff --git a/arch/loongarch/crypto/crc32.c b/arch/loongarch/crypto/crc32.c new file mode 100644 index 000000000000..891a18072a2f --- /dev/null +++ b/arch/loongarch/crypto/crc32.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32.c - CRC32 and CRC32C using loongarch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ + +#include +#include + +#include + +#include + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t" \ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) \ + _CRC32(crc, value, size, crc) + +#define CRC32C(crc, value, size) \ + _CRC32(crc, value, size, crcc) + +static u32 crc32_la_le_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + +#ifdef CONFIG_64BIT + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { +#else /* !CONFIG_64BIT */ + while (len >= sizeof(u32)) { +#endif + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} + +static u32 crc32c_la_le_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + +#ifdef CONFIG_64BIT + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { +#else /* !CONFIG_64BIT */ + while (len >= sizeof(u32)) { +#endif + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + return crc; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +struct chksum_ctx { + u32 key; +}; + +struct chksum_desc_ctx { + u32 crc; +}; + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + + return 0; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(tfm); + + if (keylen != sizeof(mctx->key)) + return -EINVAL; + mctx->key = get_unaligned_le32(key); + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32_la_le_hw(ctx->crc, data, length); + return 0; +} + +static int chksumc_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32c_la_le_hw(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(~ctx->crc, out); + return 0; +} + +static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(crc32_la_le_hw(crc, data, len), out); + return 0; +} + +static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(~crc32c_la_le_hw(crc, data, len), out); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(ctx->crc, data, len, out); +} + +static int chksumc_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksumc_finup(ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksum_finup(mctx->key, data, length, out); +} + +static int chksumc_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksumc_finup(mctx->key, data, length, out); +} + +static int chksum_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = 0; + return 0; +} + +static int chksumc_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static struct shash_alg crc32_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-la-hw", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksum_cra_init, + } +}; + +static struct shash_alg crc32c_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksumc_update, + .final = chksumc_final, + .finup = chksumc_finup, + .digest = chksumc_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-la-hw", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksumc_cra_init, + } +}; + +static int __init crc32_mod_init(void) +{ + int err; + + if (!cpu_has(CPU_FEATURE_UAL)) { + return 0; + } + + err = crypto_register_shash(&crc32_alg); + + if (err) + return err; + + err = crypto_register_shash(&crc32c_alg); + + if (err) { + crypto_unregister_shash(&crc32_alg); + return err; + } + + return 0; +} + +static void __exit crc32_mod_exit(void) +{ + if (!cpu_has(CPU_FEATURE_UAL)) { + return; + } + + crypto_unregister_shash(&crc32_alg); + crypto_unregister_shash(&crc32c_alg); +} + +MODULE_DESCRIPTION("CRC32 and CRC32C using loongarch crc* instructions"); +MODULE_LICENSE("GPL v2"); + +module_init(crc32_mod_init); +module_exit(crc32_mod_exit); -- Gitee From 3d00ed5a7a43ef35d98cf04413243fd9165f00ab Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 12 Apr 2021 11:30:39 +0800 Subject: [PATCH 46/59] LoongArch: Add HIGHMEM and FDT support for future Change-Id: Ice536b1b716aad9bc2f556d63ebf55dd6ab09c5d Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 29 +++ arch/loongarch/Makefile | 8 +- arch/loongarch/boot/dts/Makefile | 4 + arch/loongarch/boot/dts/loongson/Makefile | 4 + .../boot/dts/loongson/loongson3.dtsi | 235 ++++++++++++++++++ .../boot/dts/loongson/loongson3_ls7a.dts | 159 ++++++++++++ arch/loongarch/include/asm/bootinfo.h | 4 + arch/loongarch/include/asm/fixmap.h | 51 ++++ arch/loongarch/include/asm/highmem.h | 53 ++++ arch/loongarch/include/asm/kmap_types.h | 6 + .../include/asm/mach-loongson64/boot_param.h | 2 + arch/loongarch/include/asm/setup.h | 11 + arch/loongarch/kernel/head.S | 14 ++ arch/loongarch/kernel/setup.c | 88 +++++++ arch/loongarch/loongson64/env.c | 4 +- arch/loongarch/loongson64/init.c | 2 + arch/loongarch/loongson64/setup.c | 15 ++ arch/loongarch/mm/pgtable-64.c | 6 + 18 files changed, 693 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/boot/dts/Makefile create mode 100644 arch/loongarch/boot/dts/loongson/Makefile create mode 100644 arch/loongarch/boot/dts/loongson/loongson3.dtsi create mode 100644 arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts create mode 100644 arch/loongarch/include/asm/highmem.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index beee2ded84b9..03c3b219f252 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -133,8 +133,11 @@ config MACH_LOONGSON64 select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_RELOCATABLE select ZONE_DMA32 + select USE_OF + select BUILTIN_DTB help This enables the support of Loongson 64-bit family of machines. These machines are based on new Loongson-3 processors (Old Loongson is MIPS @@ -189,6 +192,7 @@ config CPU_LOONGSON64 depends on SYS_HAS_CPU_LOONGSON64 select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_LSX select CPU_SUPPORTS_LASX select GPIOLIB @@ -391,6 +395,22 @@ config CPU_HAS_LASX If unsure, say Y. +# +# - Highmem only makes sense for the 32-bit kernel. +# - We use SYS_SUPPORTS_HIGHMEM to offer highmem only for systems where we +# know they might have memory configurations that could make use of highmem +# support. +# +config HIGHMEM + bool "High Memory Support" + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM + +config CPU_SUPPORTS_HIGHMEM + bool + +config SYS_SUPPORTS_HIGHMEM + bool + config CPU_SUPPORTS_LSX bool @@ -662,6 +682,15 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config USE_OF + bool + select OF + select OF_EARLY_FLATTREE + select IRQ_DOMAIN + +config BUILTIN_DTB + bool + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index e0742a1bc475..4320512e4273 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -8,6 +8,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs KBUILD_DEFCONFIG := loongson3_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -133,7 +134,7 @@ boot-y := vmlinux.bin bootz-y := vmlinuz bootz-y += vmlinuz.bin -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): vmlinux FORCE @@ -153,6 +154,9 @@ endif CLEAN_FILES += vmlinux +# device-trees +core-y += arch/loongarch/boot/dts/ + install: $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) ifdef CONFIG_SYS_SUPPORTS_ZBOOT @@ -172,5 +176,7 @@ define archhelp echo ' vmlinux.bin - Raw binary boot image' echo ' vmlinuz - Compressed boot(zboot) image' echo ' vmlinuz.bin - Raw binary zboot image' + echo ' dtbs - Device-tree blobs for enabled boards' + echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)' echo endef diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile new file mode 100644 index 000000000000..94c45818e4c9 --- /dev/null +++ b/arch/loongarch/boot/dts/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +subdir-y += loongson + +obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y)) diff --git a/arch/loongarch/boot/dts/loongson/Makefile b/arch/loongarch/boot/dts/loongson/Makefile new file mode 100644 index 000000000000..9def5f6ee9c6 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_CPU_LOONGSON64) += loongson3_ls7a.dtb + +obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/loongarch/boot/dts/loongson/loongson3.dtsi b/arch/loongarch/boot/dts/loongson/loongson3.dtsi new file mode 100644 index 000000000000..2f985a7ddaf2 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/loongson3.dtsi @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/ { + /* + * Loongson-3 may have as many as 4 nodes, each node has 4 cores. + * Each core has its own pcache and cores in the same node share scache. + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x0>; + l2-cache = <&vcache0>; + next-level-cache = <&scache0>; + }; + + cpu@1 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x1>; + l2-cache = <&vcache1>; + next-level-cache = <&scache0>; + }; + + cpu@2 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x2>; + l2-cache = <&vcache2>; + next-level-cache = <&scache0>; + }; + + cpu@3 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x3>; + l2-cache = <&vcache3>; + next-level-cache = <&scache0>; + }; + + cpu@4 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x4>; + l2-cache = <&vcache4>; + next-level-cache = <&scache1>; + }; + + cpu@5 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x5>; + l2-cache = <&vcache5>; + next-level-cache = <&scache1>; + }; + + cpu@6 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x6>; + l2-cache = <&vcache6>; + next-level-cache = <&scache1>; + }; + + cpu@7 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x7>; + l2-cache = <&vcache7>; + next-level-cache = <&scache1>; + }; + + cpu@8 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x8>; + l2-cache = <&vcache8>; + next-level-cache = <&scache2>; + }; + + cpu@9 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x9>; + l2-cache = <&vcache9>; + next-level-cache = <&scache2>; + }; + + cpu@a { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xa>; + l2-cache = <&vcachea>; + next-level-cache = <&scache2>; + }; + + cpu@b { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xb>; + l2-cache = <&vcacheb>; + next-level-cache = <&scache2>; + }; + + cpu@c { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xc>; + l2-cache = <&vcachec>; + next-level-cache = <&scache3>; + }; + + cpu@d { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xd>; + l2-cache = <&vcached>; + next-level-cache = <&scache3>; + }; + + cpu@e { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xe>; + l2-cache = <&vcachee>; + next-level-cache = <&scache3>; + }; + + cpu@f { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xf>; + l2-cache = <&vcachef>; + next-level-cache = <&scache3>; + }; + + vcache0: l2-cache0 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache1: l2-cache1 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache2: l2-cache2 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache3: l2-cache3 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache4: l2-cache4 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache5: l2-cache5 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache6: l2-cache6 { + compatible = "cahce"; + next-level-cache = <&scache1>; + }; + + vcache7: l2-cache7 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache8: l2-cache8 { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcache9: l2-cache9 { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcachea: l2-cachea { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcacheb: l2-cacheb { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcachec: l2-cachec { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcached: l2-cached { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcachee: l2-cachee { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcachef: l2-cachef { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + scache0: l3-cache0 { + compatible = "cache"; + }; + + scache1: l3-cache1 { + compatible = "cache"; + }; + + scache2: l3-cache2 { + compatible = "cache"; + }; + + scache3: l3-cache3 { + compatible = "cache"; + }; + }; +}; diff --git a/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts b/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts new file mode 100644 index 000000000000..684c58c4f1c1 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts @@ -0,0 +1,159 @@ +/dts-v1/; +#include "loongson3.dtsi" +/ { + model = "loongson,generic"; + compatible = "loongson,loongson3"; + #address-cells = <2>; + #size-cells = <2>; + + memory { + name = "memory"; + device_type = "memory"; + }; + + cpuic: interrupt-controller { + compatible = "loongson,cpu-interrupt-controller"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + platic: interrupt-controller@1bd00040 { + compatible = "loongson,ls7a-interrupt-controller"; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <3>; + interrupt-parent = <&cpuic>; + }; + + aliases { + i2c0 = &i2c0; + i2c1 = &i2c1; + i2c2 = &i2c2; + i2c3 = &i2c3; + i2c4 = &i2c4; + i2c5 = &i2c5; + }; + + platform { + compatible = "loongson,nbus", "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + enable-lpc-irq; + ranges = <0x000 0x00000000 0x000 0x00000000 0x20000000 + 0x000 0x40000000 0x000 0x40000000 0x40000000 + 0xe00 0x00000000 0xe00 0x00000000 0x80000000>; + + uart0: serial@10080000 { + device_type = "serial"; + compatible = "ns16550,loongson"; + reg = <0 0x10080000 0x100>; + clock-frequency = <50000000>; + interrupts = <72>; + interrupt-parent = <&platic>; + no-loopback-test; + }; + + gpio: gpio@100e0000 { + compatible = "loongson,ls7a-gpio"; + reg = <0 0x100e0000 0xc00>; + gpio-controller; + #gpio-cells = <2>; + ngpios = <57>; + conf_offset = <0x800>; + out_offset = <0x900>; + in_offset = <0xa00>; + gpio_base = <16>; + interrupts = <124>; + interrupt-parent = <&platic>; + }; + + i2c0: i2c@10090000 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090000 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c1: i2c@10090100 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090100 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c2: i2c@10090200 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090200 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c3: i2c@10090300 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090300 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c4: i2c@10090400 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090400 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c5: i2c@10090500 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090500 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + rtc0: rtc@100d0100 { + compatible = "loongson,ls7a-rtc"; + reg = <0 0x100d0100 0x100>; + interrupts = <116>; + interrupt-parent = <&platic>; + }; + + pwm0: pwm@100a0000 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0000 0x10>; + interrupts = <88>; + interrupt-parent = <&platic>; + }; + + pwm1: pwm@100a0100 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0100 0x10>; + interrupts = <89>; + interrupt-parent = <&platic>; + }; + + pwm2: pwm@100a0200 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0200 0x10>; + interrupts = <90>; + interrupt-parent = <&platic>; + }; + + pwm3: pwm@100a0300 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0300 0x10>; + interrupts = <91>; + interrupt-parent = <&platic>; + }; + }; +}; diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 90e35850cb26..6d6daaa1a831 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -31,6 +31,10 @@ extern char arcs_cmdline[COMMAND_LINE_SIZE]; */ extern unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; +#ifdef CONFIG_USE_OF +extern unsigned long fw_passed_dtb; +#endif + extern unsigned long initrd_start, initrd_end; /* diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index 7bcc0f515ab3..c915a46379b8 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -14,15 +14,59 @@ #include #include +#ifdef CONFIG_HIGHMEM +#include +#include +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ #define NR_FIX_BTMAPS 64 +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ enum fixed_addresses { FIX_HOLE, +#define FIX_N_COLOURS 8 + FIX_CMAP_BEGIN, + FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * 2), +#ifdef CONFIG_HIGHMEM + /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_BEGIN = FIX_CMAP_END + 1, + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXMAP_PAGE_IO PAGE_KERNEL_SUC @@ -32,4 +76,11 @@ extern void __set_fixmap(enum fixed_addresses idx, #include +/* + * Called from pgtable_init() + */ +extern void fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base); + + #endif diff --git a/arch/loongarch/include/asm/highmem.h b/arch/loongarch/include/asm/highmem.h new file mode 100644 index 000000000000..c555be1008d4 --- /dev/null +++ b/arch/loongarch/include/asm/highmem.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *pkmap_page_table; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define LAST_PKMAP 1024 + +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +extern void *kmap(struct page *page); +extern void kunmap(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void *kmap_atomic_pfn(unsigned long pfn); + +#define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases) + +extern void kmap_init(void); + +#define kmap_prot PAGE_KERNEL + +#endif /* __KERNEL__ */ + +#endif /* _ASM_HIGHMEM_H */ diff --git a/arch/loongarch/include/asm/kmap_types.h b/arch/loongarch/include/asm/kmap_types.h index 5c2173103727..16665dc2431b 100644 --- a/arch/loongarch/include/asm/kmap_types.h +++ b/arch/loongarch/include/asm/kmap_types.h @@ -2,6 +2,12 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H +#ifdef CONFIG_DEBUG_HIGHMEM +#define __WITH_KM_FENCE +#endif + #include +#undef __WITH_KM_FENCE + #endif diff --git a/arch/loongarch/include/asm/mach-loongson64/boot_param.h b/arch/loongarch/include/asm/mach-loongson64/boot_param.h index 4c1c5fe0ec06..7445366dd703 100644 --- a/arch/loongarch/include/asm/mach-loongson64/boot_param.h +++ b/arch/loongarch/include/asm/mach-loongson64/boot_param.h @@ -101,6 +101,8 @@ struct loongson_system_configuration { u64 vgabios_addr; }; +extern void *loongson_fdt_blob; +extern u32 __dtb_loongson3_ls7a_begin[]; extern struct boot_params *efi_bp; extern struct loongson_board_info b_info; extern struct loongsonlist_mem_map *loongson_mem_map; diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index ebd9d210f287..e7ad63079c07 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -18,4 +18,15 @@ extern void per_cpu_trap_init(int cpu); extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); +#ifdef CONFIG_USE_OF + +struct boot_param_header; + +extern void __dt_setup_arch(void *bph); +extern void device_tree_init(void); + +#else /* CONFIG_OF */ +static inline void device_tree_init(void) { } +#endif /* CONFIG_OF */ + #endif /* __SETUP_H */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 7c47592d1aa3..e62f4b34bcf5 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -36,6 +36,15 @@ SYM_CODE_START(kernel_entry) # kernel entry point la.abs t0, 0f jirl zero, t0, 0 0: +#ifdef CONFIG_USE_OF + + li.d t1, -2 + or t2, a1, zero + beq a0, t1, dtb_found + + li.d t2, 0 +dtb_found: +#endif la t0, __bss_start # clear .bss st.d zero, t0, 0 la t1, __bss_stop - LONGSIZE @@ -53,6 +62,11 @@ SYM_CODE_START(kernel_entry) # kernel entry point la t0, fw_arg3 st.d a3, t0, 0 +#ifdef CONFIG_USE_OF + la t0, fw_passed_dtb + st.d t2, t0, 0 +#endif + /* KScratch3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index f4522706a0be..f1f0163b4639 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -10,11 +10,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include @@ -31,6 +34,8 @@ #include #include +#include + DEFINE_PER_CPU(unsigned long, kernelsp); unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3; struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; @@ -190,6 +195,70 @@ static unsigned long __init init_initrd(void) #endif +static void __init dt_bootmem_init(void) +{ + phys_addr_t ramstart, ramend; + unsigned long start, end; + int i; + + ramstart = memblock_start_of_DRAM(); + ramend = memblock_end_of_DRAM(); + + /* + * Sanity check any INITRD first. We don't take it into account + * for bootmem setup initially, rely on the end-of-kernel-code + * as our memory range starting point. Once bootmem is inited we + * will reserve the area used for the initrd. + */ + init_initrd(); + + /* Reserve memory occupied by kernel. */ + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); + + /* + * Reserve any memory between the start of RAM and PHYS_OFFSET + */ + if (ramstart > PHYS_OFFSET) + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); + + if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) { + pr_info("Wasting %lu bytes for tracking %lu unused pages\n", + (unsigned long)((PFN_UP(ramstart) - ARCH_PFN_OFFSET) * sizeof(struct page)), + (unsigned long)(PFN_UP(ramstart) - ARCH_PFN_OFFSET)); + } + + min_low_pfn = ARCH_PFN_OFFSET; + max_pfn = PFN_DOWN(ramend); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { + /* + * Skip highmem here so we get an accurate max_low_pfn if low + * memory stops short of high memory. + * If the region overlaps HIGHMEM_START, end is clipped so + * max_pfn excludes the highmem portion. + */ + if (start >= PFN_DOWN(HIGHMEM_START)) + continue; + if (end > PFN_DOWN(HIGHMEM_START)) + end = PFN_DOWN(HIGHMEM_START); + if (end > max_low_pfn) + max_low_pfn = end; + } + + if (min_low_pfn >= max_low_pfn) + panic("Incorrect memory mapping !!!"); + + if (max_pfn > PFN_DOWN(HIGHMEM_START)) { +#ifdef CONFIG_HIGHMEM + highstart_pfn = PFN_DOWN(HIGHMEM_START); + highend_pfn = max_pfn; +#else + max_low_pfn = PFN_DOWN(HIGHMEM_START); + max_pfn = max_low_pfn; +#endif + } +} + static int usermem __initdata; static int __init early_parse_mem(char *p) @@ -469,6 +538,12 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + + if (loongson_fdt_blob) + dt_bootmem_init(); + #ifdef CONFIG_PROC_VMCORE if (setup_elfcorehdr && setup_elfcorehdr_size) { printk(KERN_INFO "kdump reserved memory at %lx-%lx\n", @@ -488,6 +563,8 @@ static void __init arch_mem_init(char **cmdline_p) reserve_oldmem_region(node, start_pfn, end_pfn); } + device_tree_init(); + /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate @@ -506,6 +583,8 @@ static void __init arch_mem_init(char **cmdline_p) memblock_reserve(__pa_symbol(&__nosave_begin), __pa_symbol(&__nosave_end) - __pa_symbol(&__nosave_begin)); + fdt_init_reserved_mem(); + memblock_dump_all(); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); @@ -629,3 +708,12 @@ void __init setup_arch(char **cmdline_p) unwind_init(); } + +#ifdef CONFIG_USE_OF +unsigned long fw_passed_dtb; + +void __init __dt_setup_arch(void *bph) +{ + early_init_dt_scan(bph); +} +#endif diff --git a/arch/loongarch/loongson64/env.c b/arch/loongarch/loongson64/env.c index e8ebcf12802c..acf31fa16c90 100644 --- a/arch/loongarch/loongson64/env.c +++ b/arch/loongarch/loongson64/env.c @@ -21,13 +21,15 @@ struct boot_params *efi_bp; struct loongsonlist_mem_map *loongson_mem_map; struct loongsonlist_vbios *pvbios; struct loongson_system_configuration loongson_sysconf; -EXPORT_SYMBOL(loongson_sysconf); u64 loongson_chipcfg[MAX_PACKAGES]; u64 loongson_chiptemp[MAX_PACKAGES]; u64 loongson_freqctrl[MAX_PACKAGES]; unsigned long long smp_group[MAX_PACKAGES]; +void *loongson_fdt_blob; +EXPORT_SYMBOL(loongson_sysconf); + static void __init register_addrs_set(u64 *registers, const u64 addr, int num) { u64 i; diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 39b4162ccc07..fafd7fd33397 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -29,6 +29,8 @@ struct loongson_board_info b_info; static const char dmi_empty_string[] = " "; +extern void *loongson_fdt_blob; + static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; diff --git a/arch/loongarch/loongson64/setup.c b/arch/loongarch/loongson64/setup.c index 701708769bc0..4310f2676a28 100644 --- a/arch/loongarch/loongson64/setup.c +++ b/arch/loongarch/loongson64/setup.c @@ -10,6 +10,8 @@ */ #include #include +#include +#include #include #ifdef CONFIG_VT @@ -27,6 +29,8 @@ const char *get_system_type(void) void __init plat_mem_setup(void) { + if (loongson_fdt_blob) + __dt_setup_arch(loongson_fdt_blob); } static int __init register_gop_device(void) @@ -39,3 +43,14 @@ static int __init register_gop_device(void) return PTR_ERR_OR_ZERO(pd); } subsys_initcall(register_gop_device); + +#define NR_CELLS 6 + +void __init device_tree_init(void) +{ + if (!initial_boot_params) + return; + + if (early_init_dt_verify(initial_boot_params)) + unflatten_and_copy_device_tree(); +} diff --git a/arch/loongarch/mm/pgtable-64.c b/arch/loongarch/mm/pgtable-64.c index 7f3fbe3afc88..116165bd2dd0 100644 --- a/arch/loongarch/mm/pgtable-64.c +++ b/arch/loongarch/mm/pgtable-64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, void __init pagetable_init(void) { + unsigned long vaddr; pgd_t *pgd_base; /* Initialize the entire pgd. */ @@ -114,4 +116,8 @@ void __init pagetable_init(void) pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); #endif pgd_base = swapper_pg_dir; + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; } -- Gitee From facb87faecce1a3242436ddb9e0e10710dd46a8f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 47/59] LoongArch: Add debug machanism support Debug machanism include: gdb, ftrace, kprobe, uprobe and jump_label Change-Id: I8ac3245be9a3f9999ef179f06f8b431bfc347e25 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 15 + arch/loongarch/Kconfig.debug | 7 + arch/loongarch/Makefile | 5 + arch/loongarch/include/asm/debug.h | 18 + arch/loongarch/include/asm/ftrace.h | 50 ++ arch/loongarch/include/asm/inst.h | 118 +++++ arch/loongarch/include/asm/jump_label.h | 62 +++ arch/loongarch/include/asm/kgdb.h | 32 ++ arch/loongarch/include/asm/kprobes.h | 83 ++++ arch/loongarch/include/asm/module.h | 12 + arch/loongarch/include/asm/module.lds.h | 9 + arch/loongarch/include/asm/stackframe.h | 4 + arch/loongarch/include/asm/uprobes.h | 40 ++ arch/loongarch/kernel/Makefile | 21 +- arch/loongarch/kernel/alternative.c | 22 - arch/loongarch/kernel/entry.S | 4 + arch/loongarch/kernel/ftrace.c | 97 ++++ arch/loongarch/kernel/ftrace_dyn.c | 355 ++++++++++++++ arch/loongarch/kernel/inst.c | 321 +++++++++++++ arch/loongarch/kernel/jump_label.c | 40 ++ arch/loongarch/kernel/kgdb.c | 598 ++++++++++++++++++++++++ arch/loongarch/kernel/kprobes.c | 420 +++++++++++++++++ arch/loongarch/kernel/mcount.S | 94 ++++ arch/loongarch/kernel/mcount_dyn.S | 162 +++++++ arch/loongarch/kernel/module.c | 18 +- arch/loongarch/kernel/setup.c | 17 + arch/loongarch/kernel/spinlock_test.c | 125 +++++ arch/loongarch/kernel/traps.c | 6 + arch/loongarch/kernel/unaligned.c | 39 ++ arch/loongarch/kernel/unwind_orc.c | 11 + arch/loongarch/kernel/uprobes.c | 239 ++++++++++ arch/loongarch/mm/fault.c | 9 + scripts/recordmcount.c | 40 ++ 33 files changed, 3068 insertions(+), 25 deletions(-) create mode 100644 arch/loongarch/include/asm/debug.h create mode 100644 arch/loongarch/include/asm/ftrace.h create mode 100644 arch/loongarch/include/asm/jump_label.h create mode 100644 arch/loongarch/include/asm/kgdb.h create mode 100644 arch/loongarch/include/asm/kprobes.h create mode 100644 arch/loongarch/include/asm/module.lds.h create mode 100644 arch/loongarch/include/asm/uprobes.h create mode 100644 arch/loongarch/kernel/ftrace.c create mode 100644 arch/loongarch/kernel/ftrace_dyn.c create mode 100644 arch/loongarch/kernel/inst.c create mode 100644 arch/loongarch/kernel/jump_label.c create mode 100644 arch/loongarch/kernel/kgdb.c create mode 100644 arch/loongarch/kernel/kprobes.c create mode 100644 arch/loongarch/kernel/mcount.S create mode 100644 arch/loongarch/kernel/mcount_dyn.S create mode 100644 arch/loongarch/kernel/spinlock_test.c create mode 100644 arch/loongarch/kernel/uprobes.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 03c3b219f252..2022a8e5c021 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -36,6 +36,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_QUEUED_RWLOCKS @@ -66,6 +67,8 @@ config LOONGARCH select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_COMPILER_H + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -73,17 +76,26 @@ config LOONGARCH select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if 64BIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GENERIC_VDSO select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KPROBES + select HAVE_KPROBES_ON_FTRACE + select HAVE_KRETPROBES select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC @@ -159,6 +171,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config ARCH_SUPPORTS_UPROBES + bool + config SYS_SUPPORTS_HOTPLUG_CPU bool diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 51639222024c..300812e5612a 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -106,3 +106,10 @@ config DEBUG_ZBOOT After the compressed kernel support works, please disable this option to reduce the kernel image size and speed up the booting procedure a little. + +config SPINLOCK_TEST + bool "Enable spinlock timing tests in debugfs" + depends on DEBUG_FS + default n + help + Add several files to the debugfs to test spinlock speed. diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 4320512e4273..996175d864bb 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -30,6 +30,11 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif +ifdef CONFIG_DYNAMIC_FTRACE + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + cflags-y += $(call cc-option, -mno-check-zero-division) ifdef CONFIG_64BIT diff --git a/arch/loongarch/include/asm/debug.h b/arch/loongarch/include/asm/debug.h new file mode 100644 index 000000000000..8395000b575d --- /dev/null +++ b/arch/loongarch/include/asm/debug.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __LOONGARCH_ASM_DEBUG_H__ +#define __LOONGARCH_ASM_DEBUG_H__ + +#include + +/* + * loongarch_debugfs_dir corresponds to the "loongarch" directory at the top + * level of the DebugFS hierarchy. LoongArch-specific DebugFS entires should + * be placed beneath this directory. + */ +extern struct dentry *loongarch_debugfs_dir; + +#endif /* __LOONGARCH_ASM_DEBUG_H__ */ diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h new file mode 100644 index 000000000000..452d468dee74 --- /dev/null +++ b/arch/loongarch/include/asm/ftrace.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive for + * more details. + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _ASM_LOONGARCH_FTRACE_H +#define _ASM_LOONGARCH_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +#ifndef CONFIG_DYNAMIC_FTRACE +extern void _mcount(void); +#define mcount _mcount +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; + +struct module_ftrace_tramp { + u32 addu16id; + u32 lu32id; + u32 lu52id; + u32 jirl; +}; + +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ +#endif /* _ASM_LOONGARCH_FTRACE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 463a1edd86e2..ccd57d448e5b 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -10,8 +10,12 @@ */ #ifndef _ASM_INST_H #define _ASM_INST_H +#include #include +#include +#include + #include /* HACHACHAHCAHC ... */ @@ -258,7 +262,95 @@ do { \ #define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction) +enum loongarch_gpr { + LOONGARCH_GPR_ZERO = 0, + LOONGARCH_GPR_RA = 1, + LOONGARCH_GPR_TP = 2, + LOONGARCH_GPR_SP = 3, + LOONGARCH_GPR_A0 = 4, + LOONGARCH_GPR_A1, + LOONGARCH_GPR_A2, + LOONGARCH_GPR_A3, + LOONGARCH_GPR_A4, + LOONGARCH_GPR_A5, + LOONGARCH_GPR_A6, + LOONGARCH_GPR_A7, + LOONGARCH_GPR_V0 = 4, + LOONGARCH_GPR_V1 = 5, + LOONGARCH_GPR_T0 = 12, + LOONGARCH_GPR_T1, + LOONGARCH_GPR_T2, + LOONGARCH_GPR_T3, + LOONGARCH_GPR_T4, + LOONGARCH_GPR_T5, + LOONGARCH_GPR_T6, + LOONGARCH_GPR_T7, + LOONGARCH_GPR_T8, + LOONGARCH_GPR_FP = 22, + LOONGARCH_GPR_S0 = 23, + LOONGARCH_GPR_S1, + LOONGARCH_GPR_S2, + LOONGARCH_GPR_S3, + LOONGARCH_GPR_S4, + LOONGARCH_GPR_S5, + LOONGARCH_GPR_S6, + LOONGARCH_GPR_S7, + LOONGARCH_GPR_S8, + LOONGARCH_GPR_MAX +}; + #define INSN_NOP 0x03400000 +#define INSN_BREAK 0x0002a000 + +#define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 +#define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 +#define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 + +#define ADDR_IMMSHIFT_ADDU16ID 16 +#define ADDR_IMMSHIFT_LU32ID 32 +#define ADDR_IMMSHIFT_LU52ID 52 + +#define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) + +static inline bool cond_beqz(struct pt_regs *regs, int rj) +{ + return regs->regs[rj] == 0; +} + +static inline bool cond_bnez(struct pt_regs *regs, int rj) +{ + return regs->regs[rj] != 0; +} + +static inline bool cond_beq(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] == regs->regs[rd]; +} + +static inline bool cond_bne(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] != regs->regs[rd]; +} + +static inline bool cond_blt(struct pt_regs *regs, int rj, int rd) +{ + return (long)regs->regs[rj] < (long)regs->regs[rd]; +} + +static inline bool cond_bge(struct pt_regs *regs, int rj, int rd) +{ + return (long)regs->regs[rj] >= (long)regs->regs[rd]; +} + +static inline bool cond_bltu(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] < regs->regs[rd]; +} + +static inline bool cond_bgeu(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] >= regs->regs[rd]; +} static inline bool is_branch_insn(union loongarch_instruction insn) { @@ -272,4 +364,30 @@ static inline bool is_pc_insn(union loongarch_instruction insn) insn.reg1i20_format.opcode <= pcaddu18i_op; } +unsigned long bs_dest_16(unsigned long now, unsigned int si); +unsigned long bs_dest_21(unsigned long now, unsigned int h, unsigned int l); +unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l); + +int simu_branch(struct pt_regs *regs, union loongarch_instruction insn); +int simu_pc(struct pt_regs *regs, union loongarch_instruction insn); + +int larch_insn_read(void *addr, u32 *insnp); +int larch_insn_write(void *addr, u32 insn); +int larch_insn_patch_text(void *addr, u32 insn); + +u32 larch_insn_gen_nop(void); +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); + +u32 larch_insn_gen_addu16id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); +u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); +u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); + +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, + unsigned long pc, unsigned long dest); + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, + enum loongarch_gpr rk); +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h new file mode 100644 index 000000000000..2ec97c9a6ca0 --- /dev/null +++ b/arch/loongarch/include/asm/jump_label.h @@ -0,0 +1,62 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_LOONGARCH_JUMP_LABEL_H +#define _ASM_LOONGARCH_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include + +#define JUMP_LABEL_NOP_SIZE 4 + +#ifdef CONFIG_64BIT +#define WORD_INSN ".dword" +#else +#define WORD_INSN ".word" +#endif + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tnop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tb %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +#ifdef CONFIG_64BIT +typedef u64 jump_label_t; +#else +typedef u32 jump_label_t; +#endif + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_LOONGARCH_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h new file mode 100644 index 000000000000..67f2d3a69eec --- /dev/null +++ b/arch/loongarch/include/asm/kgdb.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KGDB_H_ +#define __ASM_KGDB_H_ + +#ifdef __KERNEL__ + +#ifdef CONFIG_32BIT +#define KGDB_GDB_REG_SIZE 32 +#define GDB_SIZEOF_REG sizeof(u32) +#else /* CONFIG_CPU_32BIT */ +#define KGDB_GDB_REG_SIZE 64 +#define GDB_SIZEOF_REG sizeof(u64) +#endif + +#define BUFMAX 2048 +#define DBG_ALL_REG_NUM 78 +#define DBG_MAX_REG_NUM 33 +#define NUMREGBYTES (DBG_MAX_REG_NUM * sizeof(GDB_SIZEOF_REG)) +#define NUMCRITREGBYTES (12 * sizeof(GDB_SIZEOF_REG)) +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +extern void arch_kgdb_breakpoint(void); +extern void *saved_vectors[32]; +extern void handle_exception(struct pt_regs *regs); +extern void breakinst(void); +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); + +#endif /* __KERNEL__ */ + +#endif /* __ASM_KGDB_H_ */ diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h new file mode 100644 index 000000000000..25e83fa7e3a9 --- /dev/null +++ b/arch/loongarch/include/asm/kprobes.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H + +#include + +#ifdef CONFIG_KPROBES +#include +#include + +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct kprobe; +struct pt_regs; + +typedef union loongarch_instruction kprobe_opcode_t; + +#define MAX_INSN_SIZE 2 + +#define flush_insn_slot(p) \ +do { \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ +} while (0) + + +#define kretprobe_blacklist_size 0 + +void arch_remove_kprobe(struct kprobe *p); +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_SR; + unsigned long saved_SR; + unsigned long saved_era; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_SR; + unsigned long kprobe_saved_SR; + unsigned long kprobe_saved_era; + /* Per-thread fields, used while emulating branches */ + unsigned long flags; + unsigned long target_era; + struct prev_kprobe prev_kprobe; +}; + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_KPROBES_H */ diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index b2d69083aad6..92ca64238362 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -16,6 +16,18 @@ struct mod_arch_specific { int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif +#ifdef CONFIG_DYNAMIC_FTRACE + void *ftrace_trampolines[1 + CONFIG_DYNAMIC_FTRACE_WITH_REGS]; +#endif }; +#ifndef CONFIG_DYNAMIC_FTRACE +static inline int apply_ftrace_tramp(struct module *mod, void *addr, size_t size) +{ + return 0; +} +#else +extern int apply_ftrace_tramp(struct module *mod, void *addr, size_t size); +#endif + #endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h new file mode 100644 index 000000000000..c4735a70e5ca --- /dev/null +++ b/arch/loongarch/include/asm/module.lds.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +SECTIONS { + . = ALIGN(4); + .ftrace_tramp : { + LONG(0); LONG(0); LONG(0); LONG(0); + LONG(0); LONG(0); LONG(0); LONG(0); + } +} diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index d6cd7d92305b..440bd918a4ca 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -150,6 +150,10 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: +#ifdef CONFIG_KGDB + li.w t0, CSR_CRMD_WE + csrxchg t0, t0, LOONGARCH_CSR_CRMD +#endif UNWIND_HINT_REGS .endm diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h new file mode 100644 index 000000000000..3760f9033e5b --- /dev/null +++ b/arch/loongarch/include/asm/uprobes.h @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef __ASM_UPROBES_H +#define __ASM_UPROBES_H + +#include +#include + +#include +#include + +/* + * We want this to be defined as union loongarch_instruction but that makes the + * generic code blow up. + */ +typedef u32 uprobe_opcode_t; + +#define MAX_UINSN_BYTES 8 +#define UPROBE_XOL_SLOT_BYTES 128 /* Max. cache line size */ + +#define UPROBE_BRK_UPROBE 0x002a000c /* break 12 */ +#define UPROBE_BRK_UPROBE_XOL 0x002a000d /* break 13 */ + +#define UPROBE_SWBP_INSN UPROBE_BRK_UPROBE +#define UPROBE_SWBP_INSN_SIZE 4 + +struct arch_uprobe { + unsigned long resume_era; + u32 insn[2]; + u32 ixol[2]; +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +}; + +#endif /* __ASM_UPROBES_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 6d36333981c4..bda9428f32fd 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -12,11 +12,24 @@ obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ process.o ptrace.o reset.o setup.o signal.o io.o \ syscall.o time.o topology.o traps.o unaligned.o \ cmdline.o switch.o cacheinfo.o cmpxchg.o vdso.o \ - alternative.o + alternative.o inst.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o +ifdef CONFIG_FUNCTION_TRACER + ifndef CONFIG_DYNAMIC_FTRACE + obj-y += mcount.o ftrace.o + CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + else + obj-y += mcount_dyn.o ftrace_dyn.o + CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE) + endif + CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) +endif + obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -39,4 +52,10 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o +obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_SPINLOCK_TEST) += spinlock_test.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index 7c6593a1d1e7..1927b82c6ff8 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -51,28 +51,6 @@ do { \ } \ } while (0) -#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) -#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) -#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) -#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) -#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) - -static inline unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) -{ - return now + (SIGNEX26(h << 16 | l) << 2); -} - -static inline unsigned long bs_dest_21(unsigned long now, unsigned int h, - unsigned int l) -{ - return now + (SIGNEX21(h << 16 | l) << 2); -} - -static inline unsigned long bs_dest_16(unsigned long now, unsigned int si) -{ - return now + (SIGNEX16(si) << 2); -} - /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void __init_or_module add_nops(union loongarch_instruction *insn, int count) diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 145714e35168..0fa8518c812a 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -59,6 +59,10 @@ SYM_FUNC_START(handle_syscall) SAVE_STATIC +#ifdef CONFIG_KGDB + li.w t1, CSR_CRMD_WE + csrxchg t1, t1, LOONGARCH_CSR_CRMD +#endif UNWIND_HINT_REGS move u0, t0 diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c new file mode 100644 index 000000000000..ce125881033d --- /dev/null +++ b/arch/loongarch/kernel/ftrace.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for replacing ftrace calls with jumps. + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Thanks goes to Steven Rostedt for writing the original x86 version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * As `call _mcount` follows LoongArch psABI, ra-saved operation and + * stack operation can be found before this insn. + */ +static inline bool is_ra_save_ins(union loongarch_instruction *ip) +{ + /* st.d $ra, $sp, offset */ + return ip->reg2i12_format.opcode == std_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 1; +} + +static inline bool is_stack_open_ins(union loongarch_instruction *ip) +{ + /* addi.d $sp, $sp, -imm */ + return ip->reg2i12_format.opcode == addid_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 3 && + ip->reg2i12_format.simmediate < 0; +} + +static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off) +{ + union loongarch_instruction *insn; + int limit = 32; + + insn = (union loongarch_instruction *)insn_addr; + + do { + insn--; + limit--; + + if (is_ra_save_ins(insn)) + *ra_off = insn->reg2i12_format.simmediate; + + } while (!is_stack_open_ins(insn) && limit); + + if (!limit) + return -EINVAL; + + return 0; +} + +void prepare_ftrace_return(unsigned long self_addr, + unsigned long callsite_sp, unsigned long old) +{ + int ra_off; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + if (ftrace_get_parent_ra_addr(self_addr, &ra_off)) + goto out; + + if (!function_graph_enter(old, self_addr, 0, NULL)) { + *(unsigned long *)(callsite_sp + ra_off) = return_hooker; + } + + return; + +out: + ftrace_graph_stop(); + WARN_ON(1); + return; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c new file mode 100644 index 000000000000..3bb1931d1d96 --- /dev/null +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +#include +#include + +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + if (validate) { + if (larch_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + + if (larch_insn_patch_text((void *)pc, new)) + return -EPERM; + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = larch_insn_gen_bl(pc, (unsigned long)func); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * The compiler has inserted 2 NOPs before the regular function prologue. + * T series registers are available and safe because of LoongArch psABI. + * + * At runtime, replace nop with bl to enable ftrace call and replace bl with + * nop to disable ftrace call. The bl requires us to save the original RA value, + * so here it saves RA at t0. + * details are: + * + * | Compiled | Disabled | Enabled | + * +------------+------------------------+------------------------+ + * | nop | move t0, ra | move t0, ra | + * | nop | nop | bl ftrace_caller | + * | func_body | func_body | func_body | + * + * The RA value will be recovered by ftrace_regs_entry, and restored into RA + * before returning to the regular function prologue. When a function is not + * being traced, the move t0, ra is not harmful. + */ + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + unsigned long pc; + u32 old, new; + + pc = rec->ip; + old = larch_insn_gen_nop(); + new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); + + return ftrace_modify_code(pc, old, new, true); +} + +static inline int __get_mod(struct module **mod, unsigned long addr) +{ + preempt_disable(); + *mod = __module_text_address(addr); + preempt_enable(); + + if (WARN_ON(!(*mod))) + return -EINVAL; + + return 0; +} + +static inline int __get_mod_caller(struct module *mod, + void *caller, unsigned long *addr) +{ + if (caller == ftrace_caller) { + *addr = (unsigned long)mod->arch.ftrace_trampolines[0]; + return 0; + } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (caller == ftrace_regs_caller) { + *addr = (unsigned long)mod->arch.ftrace_trampolines[1]; + return 0; + } +#endif + + pr_err("ftrace: no trampoline for %ps\n", caller); + return -EINVAL; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + ret = __get_mod_caller(mod, (void *)addr, &addr); + if (ret) + return ret; + } + + old = larch_insn_gen_nop(); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + ret = __get_mod_caller(mod, (void *)addr, &addr); + if (ret) + return ret; + } + + new = larch_insn_gen_nop(); + old = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + ret = __get_mod_caller(mod, (void *)addr, &addr); + if (ret) + return ret; + + ret = __get_mod_caller(mod, (void *)old_addr, &old_addr); + if (ret) + return ret; + } + + old = larch_insn_gen_bl(pc, old_addr); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +void arch_ftrace_update_code(int command) +{ + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +static inline int patch_tramp(struct module_ftrace_tramp *p, void *addr) +{ + u32 addu16id, lu32id, lu52id, jirl; + enum loongarch_gpr zero = LOONGARCH_GPR_ZERO; + enum loongarch_gpr t1 = LOONGARCH_GPR_T1; + unsigned long pos = (unsigned long)addr; + + addu16id = larch_insn_gen_addu16id(t1, zero, ADDR_IMM(pos, ADDU16ID)); + lu32id = larch_insn_gen_lu32id(t1, ADDR_IMM(pos, LU32ID)); + lu52id = larch_insn_gen_lu52id(t1, t1, ADDR_IMM(pos, LU52ID)); + jirl = larch_insn_gen_jirl(zero, t1, 0, (pos & 0xffff)); + + return larch_insn_patch_text(&p->addu16id, addu16id) || + larch_insn_patch_text(&p->lu32id, lu32id) || + larch_insn_patch_text(&p->lu52id, lu52id) || + larch_insn_patch_text(&p->jirl, jirl); + + return 0; +} + +int apply_ftrace_tramp(struct module *mod, void *addr, size_t size) +{ + int ret; + struct module_ftrace_tramp *p = addr; + + /* + * Add 2 module_ftrace_tramps for calling ftrace_caller + * and ftrace_regs_caller. So that we can only change bl + * to enable or disable dynamic ftrace. + */ + if (WARN_ON(size != 2 * sizeof(struct module_ftrace_tramp))) + return -EINVAL; + + ret = patch_tramp(p, ftrace_caller); + if (ret) + return -EINVAL; + + mod->arch.ftrace_trampolines[0] = p; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ret = patch_tramp(p + 1, ftrace_regs_caller); + if (ret) + return ret; + + mod->arch.ftrace_trampolines[1] = p + 1; +#endif + + return 0; +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_call(void); + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + if (!function_graph_enter(old, self_addr, 0, parent)) + *parent = return_hooker; +} + +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc, func; + u32 branch, nop; + + pc = (unsigned long)&ftrace_graph_call; + func = (unsigned long)&ftrace_graph_caller; + + branch = larch_insn_gen_b(pc, func); + nop = larch_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KPROBES_ON_FTRACE +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + return; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_ip = regs->csr_era; + + regs->csr_era = ip; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) { + /* + * Emulate singlestep (and also recover regs->csr_era) + * as if there is a nop. + */ + regs->csr_era = (unsigned long)p->addr + LOONGARCH_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + regs->csr_era = orig_ip; + } + /* + * If pre_handler returns !0, it changes regs->ip. We have to + * skip emulating post_handler. + */ + __this_cpu_write(current_kprobe, NULL); + } +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} +#endif /* CONFIG_KPROBES_ON_FTRACE */ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c new file mode 100644 index 000000000000..1a899b1fc7e3 --- /dev/null +++ b/arch/loongarch/kernel/inst.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include +#include + +enum inst_brk_from_type { + BRK_FROM_GEN_B = 0x100, + BRK_FROM_GEN_BL, + BRK_FROM_GEN_JIRL, +}; + +#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) +#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) +#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) +#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) +#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) + +unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX26(h << 16 | l) << 2); +} + +unsigned long bs_dest_21(unsigned long now, unsigned int h, + unsigned int l) +{ + return now + (SIGNEX21(h << 16 | l) << 2); +} + +unsigned long bs_dest_16(unsigned long now, unsigned int si) +{ + return now + (SIGNEX16(si) << 2); +} + +int simu_branch(struct pt_regs *regs, union loongarch_instruction insn) +{ + unsigned int si, si_l, si_h, rd, rj; + unsigned long era = regs->csr_era; + + if (era & 3) + return -EFAULT; + + si_l = insn.reg0i26_format.simmediate_l; + si_h = insn.reg0i26_format.simmediate_h; + switch (insn.reg0i26_format.opcode) { + case b_op: + regs->csr_era = bs_dest_26(era, si_h, si_l); + return 0; + case bl_op: + regs->csr_era = bs_dest_26(era, si_h, si_l); + regs->regs[1] = era + LOONGARCH_INSN_SIZE; + return 0; + } + + si_l = insn.reg1i21_format.simmediate_l; + si_h = insn.reg1i21_format.simmediate_h; + rj = insn.reg1i21_format.rj; + switch (insn.reg1i21_format.opcode) { + case beqz_op: + if (cond_beqz(regs, rj)) + regs->csr_era = bs_dest_21(era, si_h, si_l); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + return 0; + case bnez_op: + if (cond_bnez(regs, rj)) + regs->csr_era = bs_dest_21(era, si_h, si_l); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + return 0; + } + + si = insn.reg2i16_format.simmediate; + rj = insn.reg2i16_format.rj; + rd = insn.reg2i16_format.rd; + switch (insn.reg2i16_format.opcode) { + case beq_op: + if (cond_beq(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bne_op: + if (cond_bne(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case blt_op: + if (cond_blt(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bge_op: + if (cond_bge(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bltu_op: + if (cond_bltu(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bgeu_op: + if (cond_bgeu(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case jirl_op: + /* Use bs_dest_16() to calc the dest. */ + regs->csr_era = bs_dest_16(regs->regs[rj], si); + regs->regs[rd] = era + LOONGARCH_INSN_SIZE; + break; + default: + return -EINVAL; + } + + return 0; +} + +int simu_pc(struct pt_regs *regs, union loongarch_instruction insn) +{ + unsigned long era = regs->csr_era; + unsigned int rd = insn.reg1i20_format.rd; + unsigned int si = insn.reg1i20_format.simmediate; + + if (era & 3) + return -EFAULT; + + switch (insn.reg1i20_format.opcode) { + case pcaddi_op: + regs->regs[rd] = era + (SIGNEX20(si) << 2); + break; + case pcalau12i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 12); + regs->regs[rd] &= ~((1 << 12) - 1); + break; + case pcaddu12i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 12); + break; + case pcaddu18i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 18); + break; + default: + return -EINVAL; + } + + regs->csr_era += LOONGARCH_INSN_SIZE; + + return 0; +} + +static DEFINE_RAW_SPINLOCK(patch_lock); + +int larch_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE); + if (!ret) + *insnp = val; + + return ret; +} + +int larch_insn_write(void *addr, u32 insn) +{ + int ret; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE); + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +int larch_insn_patch_text(void *addr, u32 insn) +{ + int ret; + u32 *tp = addr; + + if ((unsigned long)tp & 3) + return -EINVAL; + + ret = larch_insn_write(tp, insn); + if (!ret) + flush_icache_range((unsigned long)tp, + (unsigned long)tp + LOONGARCH_INSN_SIZE); + + return ret; +} + +u32 larch_insn_gen_nop(void) +{ + return INSN_NOP; +} + +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest) +{ + unsigned int simmediate_l, simmediate_h; + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) + return INSN_BREAK | BRK_FROM_GEN_B; + + offset >>= 2; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = b_op; + insn.reg0i26_format.simmediate_l = simmediate_l; + insn.reg0i26_format.simmediate_h = simmediate_h; + + return insn.word; +} + +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) +{ + unsigned int simmediate_l, simmediate_h; + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) + return INSN_BREAK | BRK_FROM_GEN_BL; + + offset >>= 2; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = bl_op; + insn.reg0i26_format.simmediate_l = simmediate_l; + insn.reg0i26_format.simmediate_h = simmediate_h; + + return insn.word; +} + +u32 larch_insn_gen_addu16id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + insn.reg2i16_format.opcode = addu16id_op; + insn.reg2i16_format.rd = rd; + insn.reg2i16_format.rj = rj; + insn.reg2i16_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) +{ + union loongarch_instruction insn; + + insn.reg1i20_format.opcode = lu32id_op; + insn.reg1i20_format.rd = rd; + insn.reg1i20_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + insn.reg2i12_format.opcode = lu52id_op; + insn.reg2i12_format.rd = rd; + insn.reg2i12_format.rj = rj; + insn.reg2i12_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, + unsigned long pc, unsigned long dest) +{ + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128K || offset >= SZ_128K) + return INSN_BREAK | BRK_FROM_GEN_JIRL; + + insn.reg2i16_format.opcode = jirl_op; + insn.reg2i16_format.rd = rd; + insn.reg2i16_format.rj = rj; + insn.reg2i16_format.simmediate = offset >> 2; + + return insn.word; +} + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, + enum loongarch_gpr rk) +{ + union loongarch_instruction insn; + + insn.reg3_format.opcode = or_op; + insn.reg3_format.rd = rd; + insn.reg3_format.rj = rj; + insn.reg3_format.rk = rk; + + return insn.word; +} + +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + return larch_insn_gen_or(rd, rj, 0); +} diff --git a/arch/loongarch/kernel/jump_label.c b/arch/loongarch/kernel/jump_label.c new file mode 100644 index 000000000000..951cb884b915 --- /dev/null +++ b/arch/loongarch/kernel/jump_label.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +void arch_jump_label_transform(struct jump_entry *e, + enum jump_label_type type) +{ + + u32 insn, *insn_p; + + insn_p = (u32 *)e->code; + + if (type == JUMP_LABEL_JMP) + insn = larch_insn_gen_b((unsigned long)e->code, (unsigned long)e->target); + else + insn = larch_insn_gen_nop(); + + mutex_lock(&text_mutex); + *insn_p = insn; + + flush_icache_range((unsigned long)insn_p, + (unsigned long)insn_p + sizeof(*insn_p)); + + mutex_unlock(&text_mutex); +} diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c new file mode 100644 index 000000000000..c96e8174530a --- /dev/null +++ b/arch/loongarch/kernel/kgdb.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +static int kgdb_watch_dcount; +static int kgdb_watch_icount; +int kgdb_watch_activated; + +int param_set_dcount(const char *val, const struct kernel_param *kp) +{ + int dbcn, d, ret; + ret = kstrtoint(val, 0, &d); + if (ret < 0) + return ret; + dbcn = csr_readl(LOONGARCH_CSR_MWPC) & 0x3f; + if (d > dbcn) + return -EINVAL; + boot_cpu_data.watch_dreg_count = dbcn - d; + *(int *)kp->arg = d; + return 0; +} + +int param_set_icount(const char *val, const struct kernel_param *kp) +{ + int ibcn, d, ret; + ret = kstrtoint(val, 0, &d); + if (ret < 0) + return ret; + ibcn = csr_readl(LOONGARCH_CSR_FWPC) & 0x3f; + if (d > ibcn) + return -EINVAL; + boot_cpu_data.watch_ireg_count = ibcn - d; + *(int *)kp->arg = d; + return 0; +} + +const struct kernel_param_ops param_ops_dcount = { + .set = param_set_dcount, + .get = param_get_int, +}; + +const struct kernel_param_ops param_ops_icount = { + .set = param_set_icount, + .get = param_get_int, +}; + +module_param_cb(kgdb_watch_dcount, ¶m_ops_dcount, &kgdb_watch_dcount, 0644); +module_param_cb(kgdb_watch_icount, ¶m_ops_icount, &kgdb_watch_icount, 0644); + +static struct hard_trap_info { + unsigned char tt; /* Trap type code for LoongArch */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 1, SIGBUS }, + { 2, SIGBUS }, + { 3, SIGBUS }, + { 4, SIGBUS }, + { 5, SIGBUS }, + { 6, SIGBUS }, + { 7, SIGBUS }, + { 8, SIGBUS }, + { 9, SIGBUS }, + { 10, SIGBUS }, + { 12, SIGTRAP }, /* break */ + { 13, SIGBUS }, + { 14, SIGBUS }, + { 15, SIGFPE }, + { 16, SIGFPE }, + { 17, SIGFPE }, + { 18, SIGFPE }, + { 0, 0} /* Must be last */ +}; + +struct dbg_reg_def_t dbg_reg_def[DBG_ALL_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) }, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) }, + { "f0", GDB_SIZEOF_REG, 0 }, + { "f1", GDB_SIZEOF_REG, 1 }, + { "f2", GDB_SIZEOF_REG, 2 }, + { "f3", GDB_SIZEOF_REG, 3 }, + { "f4", GDB_SIZEOF_REG, 4 }, + { "f5", GDB_SIZEOF_REG, 5 }, + { "f6", GDB_SIZEOF_REG, 6 }, + { "f7", GDB_SIZEOF_REG, 7 }, + { "f8", GDB_SIZEOF_REG, 8 }, + { "f9", GDB_SIZEOF_REG, 9 }, + { "f10", GDB_SIZEOF_REG, 10 }, + { "f11", GDB_SIZEOF_REG, 11 }, + { "f12", GDB_SIZEOF_REG, 12 }, + { "f13", GDB_SIZEOF_REG, 13 }, + { "f14", GDB_SIZEOF_REG, 14 }, + { "f15", GDB_SIZEOF_REG, 15 }, + { "f16", GDB_SIZEOF_REG, 16 }, + { "f17", GDB_SIZEOF_REG, 17 }, + { "f18", GDB_SIZEOF_REG, 18 }, + { "f19", GDB_SIZEOF_REG, 19 }, + { "f20", GDB_SIZEOF_REG, 20 }, + { "f21", GDB_SIZEOF_REG, 21 }, + { "f22", GDB_SIZEOF_REG, 22 }, + { "f23", GDB_SIZEOF_REG, 23 }, + { "f24", GDB_SIZEOF_REG, 24 }, + { "f25", GDB_SIZEOF_REG, 25 }, + { "f26", GDB_SIZEOF_REG, 26 }, + { "f27", GDB_SIZEOF_REG, 27 }, + { "f28", GDB_SIZEOF_REG, 28 }, + { "f29", GDB_SIZEOF_REG, 29 }, + { "f30", GDB_SIZEOF_REG, 30 }, + { "f31", GDB_SIZEOF_REG, 31 }, + { "fcc0", 1, 0 }, + { "fcc1", 1, 1 }, + { "fcc2", 1, 2 }, + { "fcc3", 1, 3 }, + { "fcc4", 1, 4 }, + { "fcc5", 1, 5 }, + { "fcc6", 1, 6 }, + { "fcc7", 1, 7 }, + { "fcsr", GDB_SIZEOF_REG, 0 }, + { "scr0", GDB_SIZEOF_REG, offsetof(struct thread_struct, scr0) }, + { "scr1", GDB_SIZEOF_REG, offsetof(struct thread_struct, scr1) }, + { "scr2", GDB_SIZEOF_REG, offsetof(struct thread_struct, scr2) }, + { "scr3", GDB_SIZEOF_REG, offsetof(struct thread_struct, scr2) }, +}; + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + int fp_reg; + + if (regno < 0 || regno >= DBG_ALL_REG_NUM) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1 && regno < 33) { + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + } else if (current && dbg_reg_def[regno].offset != -1 && regno < 78) { + /* FP registers 32 -> 77 */ + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + return 0; + if (regno == 72) { + /* Process the fcsr/fsr (register 70) */ + memcpy((void *)¤t->thread.fpu.fcsr, mem, + dbg_reg_def[regno].size); + } else if (regno >= 64 && regno < 72) { + /* Process the fcc */ + fp_reg = dbg_reg_def[regno].offset; + memcpy((char *)¤t->thread.fpu.fcc + fp_reg, mem, + dbg_reg_def[regno].size); + } else if (regno >= 73 && regno < 77) { + /* Process the scr */ + memcpy((void *)¤t->thread + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + } else { + fp_reg = dbg_reg_def[regno].offset; + memcpy((void *)¤t->thread.fpu.fpr[fp_reg], mem, + dbg_reg_def[regno].size); + } + + restore_fp(current); + } + + return 0; +} + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + int fp_reg; + + if (regno >= DBG_ALL_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1 && regno < 33) { + /* First 32 registers */ + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + } else if (current && dbg_reg_def[regno].offset != -1 && regno < 78) { + /* FP registers 32 -> 77 */ + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + goto out; + save_fp(current); + if (regno == 72) { + /* Process the fcsr/fsr (register 70) */ + memcpy(mem, (void *)¤t->thread.fpu.fcsr, + dbg_reg_def[regno].size); + } else if (regno >= 64 && regno < 72) { + /* Process the fcc */ + fp_reg = dbg_reg_def[regno].offset; + memcpy(mem, (char *)¤t->thread.fpu.fcc + fp_reg, + dbg_reg_def[regno].size); + } else if (regno >= 73 && regno < 77) { + /* Process the scr */ + memcpy(mem, (void *)¤t->thread + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + } else { + fp_reg = dbg_reg_def[regno].offset; + memcpy(mem, (void *)¤t->thread.fpu.fpr[fp_reg], + dbg_reg_def[regno].size); + } + } + +out: + return dbg_reg_def[regno].name; + +} + +void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__( + ".globl breakinst\n\t" + "nop\n" + "breakinst:\tbreak 0\n\t" + "nop\n\t"); +} + +static int compute_signal(int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGTRAP; /* default for things we don't know about */ +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + int reg; +#if (KGDB_GDB_REG_SIZE == 32) + u32 *ptr = (u32 *)gdb_regs, *gdbregs = ptr; +#else + u64 *ptr = (u64 *)gdb_regs, *gdbregs = ptr; +#endif + + *(ptr++) = 0; + *(ptr++) = p->thread.reg01; + *(ptr++) = (long)p; + *(ptr++) = p->thread.reg03; + for (reg = 4; reg < 23; reg++) + *(ptr++) = 0; + + /* S0 - S8 */ + *(ptr++) = p->thread.reg23; + *(ptr++) = p->thread.reg24; + *(ptr++) = p->thread.reg25; + *(ptr++) = p->thread.reg26; + *(ptr++) = p->thread.reg27; + *(ptr++) = p->thread.reg28; + *(ptr++) = p->thread.reg29; + *(ptr++) = p->thread.reg30; + *(ptr++) = p->thread.reg31; + + /* + * PC + * use return address (RA), i.e. the moment after return from resume() + */ + *(ptr++) = p->thread.reg01; + + ptr = gdbregs + 73; + *(ptr++) = p->thread.scr0; + *(ptr++) = p->thread.scr1; + *(ptr++) = p->thread.scr2; + *(ptr++) = p->thread.scr3; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->csr_era = pc; +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + int trap = read_csr_excode(); + +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(smp_processor_id(), regs); + + if (kgdb_handle_exception(trap, compute_signal(trap), cmd, regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_setting_breakpoint)) + if ((regs->csr_era == (unsigned long)breakinst)) + regs->csr_era += 4; + + /* In SMP mode, __flush_cache_all does IPI */ + local_irq_enable(); + flush_cache_all(); + + return NOTIFY_STOP; +} + +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP +int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig, + + }; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + return kgdb_loongarch_notify(NULL, cmd, &args); +} +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_loongarch_notify, +}; + +/* + * Handle the 'c' command + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + + regs->csr_prmd |= CSR_PRMD_PWE; + + switch (remcom_in_buffer[0]) { + case 'c': + /* handle the optional parameter */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->csr_era = address; + + return 0; + } + + return -1; +} + +static struct hw_breakpoint { + unsigned enabled; + unsigned long addr; + int len; + int type; + struct perf_event * __percpu *pev; +} dbreakinfo[NUM_WATCH_REGS], ibreakinfo[NUM_WATCH_REGS]; + +static int +kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + struct hw_breakpoint *breakinfo = (bptype == BP_HARDWARE_BREAKPOINT) ? + ibreakinfo : dbreakinfo; + int count = (bptype == BP_HARDWARE_BREAKPOINT) ? kgdb_watch_icount : + kgdb_watch_dcount; + + for (i = 0; i < count; i++) + if (!breakinfo[i].enabled) + break; + if (i == count) + return -1; + + breakinfo[i].type = bptype; + breakinfo[i].len = len; + breakinfo[i].addr = addr; + breakinfo[i].enabled |= 1; + + return 0; +} + + +static int +kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + struct hw_breakpoint *breakinfo = (bptype == BP_HARDWARE_BREAKPOINT) ? + ibreakinfo : dbreakinfo; + int count = (bptype == BP_HARDWARE_BREAKPOINT) ? kgdb_watch_icount : + kgdb_watch_dcount; + + for (i = 0; i < count; i++) + if (breakinfo[i].addr == addr && breakinfo[i].enabled) + break; + if (i == count) + return -1; + + breakinfo[i].enabled &= ~1; + + return 0; +} + +static void kgdb_disable_hw_debug(struct pt_regs *regs) +{ + csr_xchgl(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + regs->csr_prmd &= ~CSR_PRMD_PWE; +} + +static void kgdb_remove_all_hw_break(void) +{ + int i, j, mask; + + for (mask = 0, i = 0, j = boot_cpu_data.watch_ireg_count; + i < kgdb_watch_icount; i++, j++) { + if (!(ibreakinfo[i].enabled & 2)) + continue; + ibreakinfo[i].enabled = 0; + watch_csrwr(0, LOONGARCH_CSR_IB0ADDR + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8 * j); + mask |= 1 << j; + } + watch_csrwr(mask, LOONGARCH_CSR_FWPS); + + for (mask = 0, i = 0, j = boot_cpu_data.watch_dreg_count; i < kgdb_watch_dcount; + i++, j++) { + if (!(dbreakinfo[i].enabled & 2)) + continue; + dbreakinfo[i].enabled = 0; + watch_csrwr(0, LOONGARCH_CSR_DB0ADDR + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8 * j); + mask |= 1 << j; + } + watch_csrwr(mask, LOONGARCH_CSR_MWPS); + + csr_xchgl(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + + kgdb_watch_activated = 0; +} + +static void kgdb_correct_hw_break(void) +{ + int i, j, dbc, activated = 0; + + for (i = 0, j = boot_cpu_data.watch_ireg_count; i < kgdb_watch_icount; i++, j++) { + if ((ibreakinfo[i].enabled & 3) == 2) { + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8*j); + ibreakinfo[i].enabled = 0; + continue; + } else if (!ibreakinfo[i].enabled) + continue; + ibreakinfo[i].enabled |= 2; + watch_csrwr(ibreakinfo[i].addr, LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(0x1e, LOONGARCH_CSR_IB0CTL + 8*j); + watch_csrwr(0x10000, LOONGARCH_CSR_FWPS); + activated = 1; + } + + for (i = 0, j = boot_cpu_data.watch_dreg_count; i < kgdb_watch_dcount; i++, j++) { + if ((dbreakinfo[i].enabled & 3) == 2) { + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8*j); + dbreakinfo[i].enabled = 0; + continue; + } else if (!dbreakinfo[i].enabled) + continue; + dbreakinfo[i].enabled |= 2; + dbc = 0x1e; + switch (dbreakinfo[i].len) { + case 8: + break; + case 4: + dbc |= (1<<10); + break; + case 2: + dbc |= (2<<10); + break; + case 1: + dbc |= (3<<10); + break; + default: + break; + } + + if (dbreakinfo[i].type == BP_WRITE_WATCHPOINT) { + dbc |= 1<<9; + } else if (BP_READ_WATCHPOINT) { + dbc |= 1<<8; + } else { + dbc |= 3<<8; + } + + watch_csrwr(dbreakinfo[i].addr, LOONGARCH_CSR_DB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*j); + watch_csrwr(dbc, LOONGARCH_CSR_DB0CTL + 8*j); + activated = 1; + } + + csr_xchgl(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = activated; +} + +const struct kgdb_arch arch_kgdb_ops = { + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_debug, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, + .gdb_bpt_instr = { 0x00, 0x00, break_op >> 1, 0x00 }, +}; + +int kgdb_arch_init(void) +{ + int ibcn, dbcn; + + register_die_notifier(&kgdb_notifier); + dbcn = csr_readl(LOONGARCH_CSR_MWPC) & 0x3f; + ibcn = csr_readl(LOONGARCH_CSR_FWPC) & 0x3f; + boot_cpu_data.watch_dreg_count = dbcn - kgdb_watch_dcount; + boot_cpu_data.watch_ireg_count = ibcn - kgdb_watch_icount; + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c new file mode 100644 index 000000000000..58996bcf4c56 --- /dev/null +++ b/arch/loongarch/kernel/kprobes.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Kernel Probes (KProbes) + * + * arch/loongarch/kernel/kprobes.c + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static const union loongarch_instruction breakpoint_insn = { + .reg0i15_format = { + .opcode = break_op, + .simmediate = BRK_KPROBE_BP, + } +}; + +static const union loongarch_instruction breakpoint2_insn = { + .reg0i15_format = { + .opcode = break_op, + .simmediate = BRK_KPROBE_SSTEPBP, + } +}; + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +#define SS_FAIL -1 +#define SS_NONE 0 +#define SS_BRANCH 1 +#define SS_PC 2 + +/* + * insn_has_ll_or_sc function checks whether instruction is ll or sc + * one; putting breakpoint on top of atomic ll/sc pair is bad idea; + * so we need to prevent it and refuse kprobes insertion for such + * instructions; cannot do much about breakpoint in the middle of + * ll/sc pair; it is upto user to avoid those places + */ +static int __kprobes insn_has_ll_or_sc(union loongarch_instruction insn) +{ + int ret = 0; + + switch (insn.reg2i14_format.opcode) { + case llw_op: + case lld_op: + case scw_op: + case scd_op: + ret = 1; + break; + } + return ret; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + union loongarch_instruction insn; + int ret = 0; + + insn = p->addr[0]; + + if (insn_has_ll_or_sc(insn)) { + pr_notice("Kprobes for ll and sc instructions are not" + "supported\n"); + ret = -EINVAL; + goto out; + } + + if (insn.reg1i21_format.opcode == bceqz_op) { + pr_notice("Kprobes for bceqz and bcnez instructions are not" + "supported\n"); + ret = -EINVAL; + goto out; + } + + /* insn: must be on special executable page on loongarch. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) { + ret = -ENOMEM; + goto out; + } + + /* + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break trap instruction at + * index one. + */ + memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t)); + + p->ainsn.insn[1] = breakpoint2_insn; + p->opcode = *p->addr; + +out: + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = breakpoint_insn; + flush_insn_slot(p); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_insn_slot(p); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_SR = kcb->kprobe_old_SR; + kcb->prev_kprobe.saved_SR = kcb->kprobe_saved_SR; + kcb->prev_kprobe.saved_era = kcb->kprobe_saved_era; +} + +static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_SR = kcb->prev_kprobe.old_SR; + kcb->kprobe_saved_SR = kcb->prev_kprobe.saved_SR; + kcb->kprobe_saved_era = kcb->prev_kprobe.saved_era; +} + +static void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_SR = kcb->kprobe_old_SR = (regs->csr_prmd & CSR_PRMD_PIE); + kcb->kprobe_saved_era = regs->csr_era; +} + +static int prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + regs->csr_prmd &= ~CSR_PRMD_PIE; + + if (is_branch_insn(p->opcode)) { + if (!simu_branch(regs, p->opcode)) + return SS_BRANCH; + } else if (is_pc_insn(p->opcode)) { + if (!simu_pc(regs, p->opcode)) + return SS_PC; + } else { + regs->csr_era = (unsigned long)&p->ainsn.insn[0]; + return SS_NONE; + } + + pr_notice("Kprobes: Error in simulate insn\n"); + regs->csr_era = (unsigned long)&p->ainsn.insn[0]; + return SS_FAIL; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + int ss; + kprobe_opcode_t *addr; + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *) regs->csr_era; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kcb->kprobe_status == KPROBE_HIT_SS && + p->ainsn.insn->word == breakpoint_insn.word) { + regs->csr_prmd &= ~CSR_PRMD_PIE; + regs->csr_prmd |= kcb->kprobe_saved_SR; + goto no_kprobe; + } + /* + * We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + ss = prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + if (!SS_NONE) { + restore_previous_kprobe(kcb); + preempt_enable_no_resched(); + } + return 1; + } else if (addr->word != breakpoint_insn.word) { + /* + * The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (addr->word != breakpoint_insn.word) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + if (p->pre_handler && p->pre_handler(p, regs)) { + /* handler has already set things up, so skip ss setup */ + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + + ss = prepare_singlestep(p, regs); + + if (ss == SS_NONE) { + kcb->kprobe_status = KPROBE_HIT_SS; + } else if (ss == SS_BRANCH || ss == SS_PC) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (p->post_handler) + p->post_handler(p, regs, 0); + reset_current_kprobe(); + preempt_enable_no_resched(); + } else { + if (p->fault_handler) + p->fault_handler(p, regs, 0); + reset_current_kprobe(); + preempt_enable_no_resched(); + } + + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; + +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + regs->csr_era = kcb->kprobe_saved_era + LOONGARCH_INSN_SIZE; + regs->csr_prmd |= kcb->kprobe_saved_SR; + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + regs->csr_era = kcb->kprobe_saved_era + LOONGARCH_INSN_SIZE; + regs->csr_prmd |= kcb->kprobe_old_SR; + + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BREAK: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEPBP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe causes the + * handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile( + /* Keep the assembler from reordering and placing JR here. */ + "nop\n\t" + ".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n\t" + "nop\n\t" + : : : "memory"); +} + +void kretprobe_trampoline(void); + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->regs[1]; + ri->fp = NULL; + + /* Replace the return addr with trampoline addr */ + regs->regs[1] = (unsigned long)kretprobe_trampoline; +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, + kretprobe_trampoline, NULL); + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline) + return 1; + + return 0; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *)kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S new file mode 100644 index 000000000000..2bbb0b3f101c --- /dev/null +++ b/arch/loongarch/kernel/mcount.S @@ -0,0 +1,94 @@ +/* + * LoongArch specific _mcount support + * + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text + +#define MCOUNT_STACK_SIZE (2 * SZREG) +#define MCOUNT_S0_OFFSET (0) +#define MCOUNT_RA_OFFSET (SZREG) + + .macro MCOUNT_SAVE_REGS + PTR_ADDIU sp, sp, -MCOUNT_STACK_SIZE + PTR_S s0, sp, MCOUNT_S0_OFFSET + PTR_S ra, sp, MCOUNT_RA_OFFSET + move s0, a0 + .endm + + .macro MCOUNT_RESTORE_REGS + move a0, s0 + PTR_L ra, sp, MCOUNT_RA_OFFSET + PTR_L s0, sp, MCOUNT_S0_OFFSET + PTR_ADDIU sp, sp, MCOUNT_STACK_SIZE + .endm + + +SYM_FUNC_START(_mcount) + la t1, ftrace_stub + la t2, ftrace_trace_function /* Prepare t2 for (1) */ + PTR_L t2, t2, 0 + beq t1, t2, fgraph_trace + + MCOUNT_SAVE_REGS + + move a0, ra /* arg0: self return address */ + move a1, s0 /* arg1: parent's return address */ + jirl ra, t2, 0 /* (1) call *ftrace_trace_function */ + + MCOUNT_RESTORE_REGS + +fgraph_trace: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la t1, ftrace_stub + la t3, ftrace_graph_return + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller + la t1, ftrace_graph_entry_stub + la t3, ftrace_graph_entry + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller +#endif + + .globl ftrace_stub +ftrace_stub: + jirl zero, ra, 0 +SYM_FUNC_END(_mcount) +EXPORT_SYMBOL(_mcount) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_FUNC_START(ftrace_graph_caller) + MCOUNT_SAVE_REGS + + PTR_ADDIU a0, ra, -4 /* arg0: Callsite self return addr */ + PTR_ADDIU a1, sp, MCOUNT_STACK_SIZE /* arg1: Callsite sp */ + move a2, s0 /* arg2: Callsite parent ra */ + bl prepare_ftrace_return + + MCOUNT_RESTORE_REGS + jirl zero, ra, 0 +SYM_FUNC_END(ftrace_graph_caller) + +SYM_FUNC_START(return_to_handler) + PTR_ADDIU sp, sp, -2 * SZREG + PTR_S v0, sp, 0 + PTR_S v1, sp, SZREG + + bl ftrace_return_to_handler + + /* restore the real parent address: v0 -> ra */ + move ra, v0 + + PTR_L v0, sp, 0 + PTR_L v1, sp, SZREG + PTR_ADDIU sp, sp, 2 * SZREG + jirl zero, ra, 0 +SYM_FUNC_END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S new file mode 100644 index 000000000000..c6bbe9a1870c --- /dev/null +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Jinyang He + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + + .text +/* + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the + * regular C function prologue. When PC arrived here, the last 2 instructions + * as follows, + * move t0, ra + * bl callsite (for modules, callsite is a tramplione) + * + * modules tramplione as follows, + * addu16i.d t1, zero, callsite[31:16] + * lu32i.d t1, callsite[51:32] + * lu52i.d t1, t1, callsite[63:52] + * jirl zero, t1, callsite[15:0] >> 2 + * + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to + * that the T series regs are available and safe because each C functions + * follows the LoongArch psABI well. + */ + + .macro ftrace_regs_entry allregs=0 + PTR_ADDIU sp, sp, -PT_SIZE + /* Save trace function ra at PT_ERA */ + PTR_S ra, sp, PT_ERA + /* Save parent ra at PT_R1(RA) */ + PTR_S t0, sp, PT_R1 + PTR_S a0, sp, PT_R4 + PTR_S a1, sp, PT_R5 + PTR_S a2, sp, PT_R6 + PTR_S a3, sp, PT_R7 + PTR_S a4, sp, PT_R8 + PTR_S a5, sp, PT_R9 + PTR_S a6, sp, PT_R10 + PTR_S a7, sp, PT_R11 + PTR_S fp, sp, PT_R22 + + .if \allregs + PTR_S t0, sp, PT_R12 + PTR_S t1, sp, PT_R13 + PTR_S t2, sp, PT_R14 + PTR_S t3, sp, PT_R15 + PTR_S t4, sp, PT_R16 + PTR_S t5, sp, PT_R17 + PTR_S t6, sp, PT_R18 + PTR_S t7, sp, PT_R19 + PTR_S t8, sp, PT_R20 + PTR_S s0, sp, PT_R23 + PTR_S s1, sp, PT_R24 + PTR_S s2, sp, PT_R25 + PTR_S s3, sp, PT_R26 + PTR_S s4, sp, PT_R27 + PTR_S s5, sp, PT_R28 + PTR_S s6, sp, PT_R29 + PTR_S s7, sp, PT_R30 + PTR_S s8, sp, PT_R31 + PTR_S tp, sp, PT_R2 + PTR_S sp, sp, PT_R3 + /* Clear it for later use as a flag sometimes. */ + PTR_S zero, sp, PT_R0 + PTR_S $r21, sp, PT_R21 + .endif + + UNWIND_HINT_REGS + .endm + +SYM_CODE_START(ftrace_caller) + UNWIND_HINT + ftrace_regs_entry allregs=0 + b ftrace_common +SYM_CODE_END(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +SYM_CODE_START(ftrace_regs_caller) + UNWIND_HINT + ftrace_regs_entry allregs=1 + b ftrace_common +SYM_CODE_END(ftrace_regs_caller) +#endif + +SYM_CODE_START(ftrace_common) + UNWIND_HINT_REGS + PTR_ADDIU a0, ra, -8 /* arg0: ip */ + move a1, t0 /* arg1: parent_ip */ + la.pcrel t1, function_trace_op + PTR_L a2, t1, 0 /* arg2: op */ + move a3, sp /* arg3: regs */ + .globl ftrace_call +ftrace_call: + bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call +ftrace_graph_call: + nop /* b ftrace_graph_caller */ +#endif +/* + * As we didn't use S series regs in this assmembly code and all calls + * are C function which will save S series regs by themselves, there is + * no need to restore S series regs. The T series is available and safe + * at the callsite, so there is no need to restore the T series regs. + */ +ftrace_common_return: + PTR_L a0, sp, PT_R4 + PTR_L a1, sp, PT_R5 + PTR_L a2, sp, PT_R6 + PTR_L a3, sp, PT_R7 + PTR_L a4, sp, PT_R8 + PTR_L a5, sp, PT_R9 + PTR_L a6, sp, PT_R10 + PTR_L a7, sp, PT_R11 + PTR_L fp, sp, PT_R22 + PTR_L ra, sp, PT_R1 + PTR_L t0, sp, PT_ERA + PTR_ADDIU sp, sp, PT_SIZE + UNWIND_HINT + jirl zero, t0, 0 +SYM_CODE_END(ftrace_common) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_CODE_START(ftrace_graph_caller) + UNWIND_HINT_REGS + PTR_L a0, sp, PT_ERA + PTR_ADDIU a0, a0, -8 /* arg0: self_addr */ + PTR_ADDIU a1, sp, PT_R1 /* arg1: parent */ + bl prepare_ftrace_return + b ftrace_common_return +SYM_CODE_END(ftrace_graph_caller) + +SYM_CODE_START(return_to_handler) + UNWIND_HINT + /* save return value regs */ + PTR_ADDIU sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + move a0, zero /* Has no check FP now. */ + bl ftrace_return_to_handler + move ra, a0 /* parent ra */ + + /* restore return value regs */ + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDIU sp, sp, 2 * SZREG + + jirl zero, ra, 0 +SYM_CODE_END(return_to_handler) +#endif + +SYM_FUNC_START(ftrace_stub) + jirl zero, ra, 0 +SYM_FUNC_END(ftrace_stub) diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index d8b494c7c3c3..cb68b8c3cb4b 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,7 @@ #include #include #include - +#include #include static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) @@ -666,7 +667,8 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *mod) { char *secstrings; - const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL; + const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL, + *ftrace_tramp = NULL; secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; @@ -677,6 +679,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) orc_ip = s; + if (!strcmp(".ftrace_tramp", secstrings + s->sh_name)) + ftrace_tramp = s; } if (alt) { @@ -689,5 +693,15 @@ int module_finalize(const Elf_Ehdr *hdr, unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size); + if (ftrace_tramp) { + /* apply dynamic ftrace trampoline */ + int res; + void *fseg = (void *)ftrace_tramp->sh_addr; + + res = apply_ftrace_tramp(mod, fseg, ftrace_tramp->sh_size); + if (res) + return res; + } + return 0; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index f1f0163b4639..a4654df1cc30 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -717,3 +719,18 @@ void __init __dt_setup_arch(void *bph) early_init_dt_scan(bph); } #endif + +#ifdef CONFIG_DEBUG_FS +struct dentry *loongarch_debugfs_dir; +static int __init debugfs_loongarch(void) +{ + struct dentry *d; + + d = debugfs_create_dir("loongarch", NULL); + if (!d) + return -ENOMEM; + loongarch_debugfs_dir = d; + return 0; +} +arch_initcall(debugfs_loongarch); +#endif diff --git a/arch/loongarch/kernel/spinlock_test.c b/arch/loongarch/kernel/spinlock_test.c new file mode 100644 index 000000000000..fbfbc0be4cc8 --- /dev/null +++ b/arch/loongarch/kernel/spinlock_test.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +static int ss_get(void *data, u64 *val) +{ + ktime_t start, finish; + int loops; + int cont; + DEFINE_RAW_SPINLOCK(ss_spin); + + loops = 1000000; + cont = 1; + + start = ktime_get(); + + while (cont) { + raw_spin_lock(&ss_spin); + loops--; + if (loops == 0) + cont = 0; + raw_spin_unlock(&ss_spin); + } + + finish = ktime_get(); + + *val = ktime_us_delta(finish, start); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ss, ss_get, NULL, "%llu\n"); + +struct spin_multi_state { + raw_spinlock_t lock; + atomic_t start_wait; + atomic_t enter_wait; + atomic_t exit_wait; + int loops; +}; + +struct spin_multi_per_thread { + struct spin_multi_state *state; + ktime_t start; +}; + +static int multi_other(void *data) +{ + int loops; + int cont; + struct spin_multi_per_thread *pt = data; + struct spin_multi_state *s = pt->state; + + loops = s->loops; + cont = 1; + + atomic_dec(&s->enter_wait); + + while (atomic_read(&s->enter_wait)) + ; /* spin */ + + pt->start = ktime_get(); + + atomic_dec(&s->start_wait); + + while (atomic_read(&s->start_wait)) + ; /* spin */ + + while (cont) { + raw_spin_lock(&s->lock); + loops--; + if (loops == 0) + cont = 0; + raw_spin_unlock(&s->lock); + } + + atomic_dec(&s->exit_wait); + while (atomic_read(&s->exit_wait)) + ; /* spin */ + return 0; +} + +static int multi_get(void *data, u64 *val) +{ + ktime_t finish; + struct spin_multi_state ms; + struct spin_multi_per_thread t1, t2; + + ms.lock = __RAW_SPIN_LOCK_UNLOCKED("multi_get"); + ms.loops = 1000000; + + atomic_set(&ms.start_wait, 2); + atomic_set(&ms.enter_wait, 2); + atomic_set(&ms.exit_wait, 2); + t1.state = &ms; + t2.state = &ms; + + kthread_run(multi_other, &t2, "multi_get"); + + multi_other(&t1); + + finish = ktime_get(); + + *val = ktime_us_delta(finish, t1.start); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_multi, multi_get, NULL, "%llu\n"); + +static int __init spinlock_test(void) +{ + debugfs_create_file("spin_single", S_IRUGO, loongarch_debugfs_dir, NULL, + &fops_ss); + debugfs_create_file("spin_multi", S_IRUGO, loongarch_debugfs_dir, NULL, + &fops_multi); + return 0; +} +device_initcall(spinlock_test); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 51c76a927b0c..fda0445ebad1 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -404,6 +404,12 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) bcode = (opcode & 0x7fff); +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_TRAP, str, regs, code, current->thread.trap_nr, + SIGTRAP) == NOTIFY_STOP) + return; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + /* * notify the kprobe handlers, if instruction is likely to * pertain to them. diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index 88064deb6b68..bf69044391e0 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -15,16 +15,31 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include "access-helper.h" +enum { + UNALIGNED_ACTION_QUIET, + UNALIGNED_ACTION_SIGNAL, + UNALIGNED_ACTION_SHOW, +}; +#ifdef CONFIG_DEBUG_FS +static u32 unaligned_instructions; +static u32 unaligned_action; +#else +#define unaligned_action UNALIGNED_ACTION_QUIET +#endif +extern void show_registers(struct pt_regs *regs); + static inline void write_fpr(unsigned int fd, unsigned long value) { #define WRITE_FPR(fd, value) \ @@ -352,6 +367,11 @@ static void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, uns } else goto sigbus; + +#ifdef CONFIG_DEBUG_FS + unaligned_instructions++; +#endif + compute_return_era(regs); return; @@ -379,6 +399,9 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + if (unaligned_action == UNALIGNED_ACTION_SHOW) + show_registers(regs); + die_if_kernel("Kernel ade access", regs); force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)regs->csr_badvaddr); @@ -400,6 +423,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) goto sigbus; + if (unaligned_action == UNALIGNED_ACTION_SIGNAL) + goto sigbus; + if (unaligned_action == UNALIGNED_ACTION_SHOW) + show_registers(regs); pc = (unsigned int *)exception_era(regs); @@ -414,3 +441,15 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) out: irqentry_exit(regs, state); } + +#ifdef CONFIG_DEBUG_FS +static int __init debugfs_unaligned(void) +{ + debugfs_create_u32("unaligned_instructions", S_IRUGO, + loongarch_debugfs_dir, &unaligned_instructions); + debugfs_create_u32("unaligned_action", S_IRUGO | S_IWUSR, + loongarch_debugfs_dir, &unaligned_action); + return 0; +} +arch_initcall(debugfs_unaligned); +#endif diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 6feb9edec81f..c7f51efbf73d 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -435,6 +435,17 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } +#ifdef CONFIG_FUNCTION_TRACER + if ((unsigned long)&ftrace_call == state->pc) { + regs = (struct pt_regs *)state->sp; + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp += sizeof(struct pt_regs); + preempt_enable(); + return true; + } +#endif + switch (orc->type) { case ORC_TYPE_CALL: if (orc->ra_reg == ORC_REG_PREV_SP) { diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c new file mode 100644 index 000000000000..6a9dc3d3e631 --- /dev/null +++ b/arch/loongarch/kernel/uprobes.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, + struct mm_struct *mm, unsigned long addr) +{ + union loongarch_instruction insn; + + if (addr & 0x03) + return -EINVAL; + + insn.word = aup->insn[0]; + + if (insn.reg1i21_format.opcode == bceqz_op) { + pr_notice("Uprobes for bceqz and bcnez instructions are not" + "supported\n"); + return -EINVAL; + } + + if (is_branch_insn(insn) || is_pc_insn(insn)) { + aup->ixol[0] = larch_insn_gen_nop(); + } else { + aup->ixol[0] = aup->insn[0]; + } + + aup->ixol[1] = UPROBE_BRK_UPROBE_XOL; + + return 0; +} + +/** + * is_trap_insn - check if the instruction is a trap variant + * @insn: instruction to be checked. + * Returns true if @insn is a trap variant. + * + * This definition overrides the weak definition in kernel/events/uprobes.c. + * and is needed for the case where an architecture has multiple trap + * instructions (like PowerPC or MIPS). We treat BREAK just like the more + * modern conditional trap instructions. + */ +bool is_trap_insn(uprobe_opcode_t *insn) +{ + union loongarch_instruction inst; + + inst.word = *insn; + + return (inst.reg0i15_format.opcode == break_op); +} + +#define UPROBE_TRAP_NR ULONG_MAX + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + union loongarch_instruction insn; + struct uprobe_task *utask = current->utask; + + insn.word = aup->insn[0]; + + /* + * Here do emulation if it were branch or pc-relative insn. + */ + if (is_branch_insn(insn)) { + if (!simu_branch(regs, insn)) + return -EFAULT; + aup->resume_era = regs->csr_era; + } else if (is_pc_insn(insn)) { + if (!simu_pc(regs, insn)) + return -EFAULT; + aup->resume_era = regs->csr_era; + } else { + aup->resume_era = regs->csr_era + 4; + } + utask->autask.saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + regs->csr_era = current->utask->xol_vaddr; + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->csr_era = aup->resume_era; + + return 0; +} + +/* + * If xol insn itself traps and generates a signal(Say, + * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped + * instruction jumps back to its own address. It is assumed that anything + * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. + * + * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, + * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to + * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol(). + */ +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + if (tsk->thread.trap_nr != UPROBE_TRAP_NR) + return true; + + return false; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + /* regs == NULL is a kernel bug */ + if (WARN_ON(!regs)) + return NOTIFY_DONE; + + /* We are only interested in userspace traps */ + if (!user_mode(regs)) + return NOTIFY_DONE; + + switch (val) { + case DIE_UPROBE: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_UPROBE_XOL: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + + return 0; +} + +/* + * This function gets called when XOL instruction either gets trapped or + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. + */ +void arch_uprobe_abort_xol(struct arch_uprobe *aup, + struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + instruction_pointer_set(regs, utask->vaddr); +} + +unsigned long arch_uretprobe_hijack_return_addr( + unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long ra; + + ra = regs->regs[1]; + + /* Replace the return address with the trampoline address */ + regs->regs[1] = trampoline_vaddr; + + return ra; +} + +/** + * set_swbp - store breakpoint at a given address. + * @auprobe: arch specific probepoint information. + * @mm: the probed process address space. + * @vaddr: the virtual address to insert the opcode. + * + * For mm @mm, store the breakpoint instruction at @vaddr. + * Return 0 (success) or a negative errno. + * + * This version overrides the weak version in kernel/events/uprobes.c. + */ +int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long vaddr) +{ + return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + unsigned long kaddr, kstart; + + /* Initialize the slot */ + kaddr = (unsigned long)kmap_atomic(page); + kstart = kaddr + (vaddr & ~PAGE_MASK); + memcpy((void *)kstart, src, len); + flush_icache_range(kstart, kstart + len); + kunmap_atomic((void *)kaddr); +} + +/** + * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs + * @regs: Reflects the saved state of the task after it has hit a breakpoint + * instruction. + * Return the address of the breakpoint instruction. + * + * This overrides the weak version in kernel/events/uprobes.c. + */ +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +/* + * See if the instruction can be emulated. + * Returns true if instruction was emulated, false otherwise. + * + * For now we always emulate so this function just returns 0. + */ +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + return 0; +} diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 605579b19a00..5eab7d7ead9b 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -135,6 +135,15 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, struct vm_area_struct *vma = NULL; vm_fault_t fault; +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) + return; +#endif + /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 84ea65aec015..15aca3c8c35a 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -44,6 +44,14 @@ #define R_SW_64_REFQUAD 2 /* Direct 64 bit */ #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#endif + #define R_ARM_PC24 1 #define R_ARM_THM_CALL 10 #define R_ARM_CALL 28 @@ -456,6 +464,28 @@ static int arm64_is_fake_mcount(Elf64_Rel const *rp) return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } +static int LARCH32_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + +static int LARCH64_is_fake_mcount(Elf64_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -579,6 +609,7 @@ static int do_file(char const *const fname) break; case EM_IA_64: reltype = R_IA64_IMM64; break; case EM_MIPS: /* reltype: e_class */ break; + case EM_LOONGARCH: /* reltype: e_class */ break; case EM_PPC: reltype = R_PPC_ADDR32; break; case EM_PPC64: reltype = R_PPC64_ADDR64; break; case EM_S390: /* reltype: e_class */ break; @@ -610,6 +641,10 @@ static int do_file(char const *const fname) reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } + if (w2(ehdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_32; + is_fake_mcount32 = LARCH32_is_fake_mcount; + } if (do32(ehdr, fname, reltype) < 0) goto out; break; @@ -634,6 +669,11 @@ static int do_file(char const *const fname) if (w2(ghdr->e_machine) == EM_SW64) is_fake_mcount64 = MIPS64_is_fake_mcount; + if (w2(ghdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_64; + is_fake_mcount64 = LARCH64_is_fake_mcount; + } + if (do64(ghdr, fname, reltype) < 0) goto out; break; -- Gitee From 800923cef78294c3ff16fcec37225717ae886e35 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 31 Oct 2018 14:13:26 +0800 Subject: [PATCH 48/59] LoongArch: Add efistub support Add efistub basic support for LoongArch. When CONFIG_EFI_STUB is enabled, A file named vmlinuz.efi is created which can be executed by UEFI directly with no need of traditional OS Loader. The EFI boot stub supports multiple initrds, but they must exist on the same partition as the vmlinuz.efi. Command-line arguments for the kernel can be appended after the vmlinuz.efi name when run from the EFI shell, e.g. Shell> vmlinuz.efi console=ttyS0 root=/dev/sdb initrd=initrd.img Change-Id: I24e383676a73bdd2563b12c19928e24de6743346 Signed-off-by: Yun Liu Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- .gitignore | 1 + arch/loongarch/Kconfig | 8 + arch/loongarch/Makefile | 4 + arch/loongarch/boot/compressed/Makefile | 25 +- arch/loongarch/boot/compressed/efi-entry.c | 24 ++ arch/loongarch/boot/compressed/efi-header.S | 126 ++++++ arch/loongarch/boot/compressed/ld.script | 19 +- arch/loongarch/boot/compressed/string.c | 120 ++++++ drivers/firmware/efi/Kconfig | 4 +- drivers/firmware/efi/libstub/Makefile | 10 + drivers/firmware/efi/libstub/loongarch-stub.c | 362 ++++++++++++++++++ include/linux/pe.h | 1 + 12 files changed, 698 insertions(+), 6 deletions(-) create mode 100644 arch/loongarch/boot/compressed/efi-entry.c create mode 100644 arch/loongarch/boot/compressed/efi-header.S create mode 100644 drivers/firmware/efi/libstub/loongarch-stub.c diff --git a/.gitignore b/.gitignore index 67d2f3503128..cbee16267723 100644 --- a/.gitignore +++ b/.gitignore @@ -61,6 +61,7 @@ modules.order /vmlinux.symvers /vmlinux-gdb.py /vmlinuz +/vmlinuz.efi /System.map /Module.markers /modules.builtin.modinfo diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2022a8e5c021..7c621e10f93e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -566,6 +566,14 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. +config EFI_STUB + bool "EFI stub support" + default y + depends on EFI + help + This kernel feature allows a vmlinuz.efi to be loaded directly + by EFI firmware without the use of a bootloader. + config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 996175d864bb..2ed8ac4ebb3b 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -44,6 +44,9 @@ endif all-y := vmlinux all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz +all-$(CONFIG_EFI_STUB) += vmlinuz.efi + +vmlinuz.efi: vmlinuz # # GCC uses -G0 -mabicalls -fpic as default. We don't want PIC in the kernel @@ -138,6 +141,7 @@ boot-y := vmlinux.bin # compressed boot image targets (arch/loongarch/boot/compressed/) bootz-y := vmlinuz bootz-y += vmlinuz.bin +bootz-$(CONFIG_EFI_STUB)+= vmlinuz.efi all: $(all-y) $(KBUILD_DTBS) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index e64de8dbbac4..4b4270fc615b 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -15,8 +15,12 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) KBUILD_CFLAGS := $(filter-out -fstack-protector, $(KBUILD_CFLAGS)) +KBUILD_CFLAGS += -fpic + KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ \ - -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ + -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" \ + -D"KERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)ull" \ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ @@ -84,5 +88,24 @@ vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr $(call cmd,zld) $(call cmd,strip) +efiobjs-y := $(obj)/efi-header.o $(obj)/efi-entry.o $(obj)/decompress.o $(obj)/piggy.o \ + $(obj)/string.o $(objtree)/drivers/firmware/efi/libstub/lib.a + +efiobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o +efiobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/uart-16550.o + +targets += $(notdir $(efiobjs-y)) + +quiet_cmd_efild = LD $@ + cmd_efild = $(LD) $(KBUILD_LDFLAGS) -T $< $(efiobjs-y) -o $@ + +quiet_cmd_eficopy = OBJCOPY $@ +cmd_eficopy = $(OBJCOPY) $(OBJCOPYFLAGS) -O binary -R .comment -S $@ $@ + +vmlinuz.efi: $(src)/ld.script $(efiobjs-y) + $(call cmd,efild) + $(call cmd,eficopy) + clean-files += $(objtree)/vmlinuz clean-files += $(objtree)/vmlinuz.bin +clean-files += $(objtree)/vmlinuz.efi diff --git a/arch/loongarch/boot/compressed/efi-entry.c b/arch/loongarch/boot/compressed/efi-entry.c new file mode 100644 index 000000000000..edb32ce588cb --- /dev/null +++ b/arch/loongarch/boot/compressed/efi-entry.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +typedef void (*kernel_entry_t)(int argc, char *argv[], struct boot_params *boot_p); + +unsigned char efi_heap[BOOT_HEAP_SIZE]; +kernel_entry_t kernel_entry = (kernel_entry_t)KERNEL_ENTRY; +efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_system_table_t *sys_table); + +efi_status_t start(efi_handle_t handle, efi_system_table_t *sys_table) +{ + /* Config Direct Mapping */ + csr_writeq(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); + csr_writeq(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); + + /* Clear BSS */ + memset(_edata, 0, _end - _edata); + + return efi_pe_entry(handle, sys_table); +} diff --git a/arch/loongarch/boot/compressed/efi-header.S b/arch/loongarch/boot/compressed/efi-header.S new file mode 100644 index 000000000000..13c7e2915a49 --- /dev/null +++ b/arch/loongarch/boot/compressed/efi-header.S @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Copyright (C) 2020, 2021 Loongson, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_LOONGARCH /* Machine */ + .short section_count /* NumberOfSections */ + .long 0 /* TimeDateStamp */ + .long 0 /* PointerToSymbolTable */ + .long 0 /* NumberOfSymbols */ + .short section_table - optional_header /* SizeOfOptionalHeader */ + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED /* Characteristics */ + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS /* PE32+ format */ + .byte 0x02 /* MajorLinkerVersion */ + .byte 0x14 /* MinorLinkerVersion */ + .long _data - efi_header_end /* SizeOfCode */ + .long _end - _data /* SizeOfInitializedData */ + .long 0 /* SizeOfUninitializedData */ + .long start - _text /* AddressOfEntryPoint */ + .long efi_header_end - _text /* BaseOfCode */ + +extra_header_fields: + .quad 0 /* ImageBase */ + .long PECOFF_SEGMENT_ALIGN /* SectionAlignment */ + .long PECOFF_FILE_ALIGN /* FileAlignment */ + .short 0 /* MajorOperatingSystemVersion */ + .short 0 /* MinorOperatingSystemVersion */ + .short 0 /* MajorImageVersion */ + .short 0 /* MinorImageVersion */ + .short 0 /* MajorSubsystemVersion */ + .short 0 /* MinorSubsystemVersion */ + .long 0 /* Win32VersionValue */ + + .long _end - _text /* SizeOfImage */ + + /* Everything before the kernel image is considered part of the header */ + .long efi_header_end - _head /* SizeOfHeaders */ + .long 0 /* CheckSum */ + .short IMAGE_SUBSYSTEM_EFI_APPLICATION /* Subsystem */ + .short 0 /* DllCharacteristics */ + .quad 0 /* SizeOfStackReserve */ + .quad 0 /* SizeOfStackCommit */ + .quad 0 /* SizeOfHeapReserve */ + .quad 0 /* SizeOfHeapCommit */ + .long 0 /* LoaderFlags */ + .long (section_table - .) / 8 /* NumberOfRvaAndSizes */ + + .quad 0 /* ExportTable */ + .quad 0 /* ImportTable */ + .quad 0 /* ResourceTable */ + .quad 0 /* ExceptionTable */ + .quad 0 /* CertificationTable */ + .quad 0 /* BaseRelocationTable */ + + /* Section table */ +section_table: + .ascii ".text\0\0\0" + .long _data - efi_header_end /* VirtualSize */ + .long efi_header_end - _text /* VirtualAddress */ + .long _data - efi_header_end /* SizeOfRawData */ + .long efi_header_end - _text /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE /* Characteristics */ + + .ascii ".data\0\0\0" + .long _end - _data /* VirtualSize */ + .long _data - _text /* VirtualAddress */ + .long _edata - _data /* SizeOfRawData */ + .long _data - _text /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE /* Characteristics */ + + .org 0x20e + .word kernel_version - 512 - _head + + .set section_count, (. - section_table) / 40 +efi_header_end: + .endm + + .section ".head.text","ax" +_head: + /* "MZ", MS-DOS header */ + .byte 0x4d + .byte 0x5a + .org 0x28 + .ascii "Loongson\0" + .org 0x3c + + /* Offset to the PE header */ + .long pe_header - _head + +pe_header: + __EFI_PE_HEADER + +kernel_version: + .ascii UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION "\0" diff --git a/arch/loongarch/boot/compressed/ld.script b/arch/loongarch/boot/compressed/ld.script index 2e32d8a72fdf..675ce374a71b 100644 --- a/arch/loongarch/boot/compressed/ld.script +++ b/arch/loongarch/boot/compressed/ld.script @@ -6,6 +6,13 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ +/* + * Max avaliable Page Size is 64K, so we set SectionAlignment + * field of EFI application to 64K. + */ +PECOFF_FILE_ALIGN = 0x200; +PECOFF_SEGMENT_ALIGN = 0x10000; + OUTPUT_ARCH(loongarch) ENTRY(start) PHDRS { @@ -14,13 +21,19 @@ PHDRS { SECTIONS { /* Text and read-only data */ - /* . = VMLINUZ_LOAD_ADDRESS; */ + _text = .; + .head.text : { + *(.head.text) + } + .text : { *(.text) *(.rodata) }: text /* End of text section */ + .= ALIGN(PECOFF_SEGMENT_ALIGN); + _data = .; /* Writable data */ .data : { *(.data) @@ -29,7 +42,7 @@ SECTIONS *(.image) __image_end = .; CONSTRUCTORS - . = ALIGN(16); + . = ALIGN(PECOFF_FILE_ALIGN); } _edata = .; @@ -39,7 +52,7 @@ SECTIONS .bss : { *(.bss) } - . = ALIGN(16); + .= ALIGN(PECOFF_SEGMENT_ALIGN); _end = .; /* Sections to be discarded */ diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c index 67e2355ae50e..d38ccaece00d 100644 --- a/arch/loongarch/boot/compressed/string.c +++ b/arch/loongarch/boot/compressed/string.c @@ -44,3 +44,123 @@ void __weak *memmove(void *dest, const void *src, size_t n) return dest; } + +int __weak memcmp(const void *cs, const void *ct, size_t count) +{ + int res = 0; + const unsigned char *su1, *su2; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) { + res = *su1 - *su2; + if (res != 0) + break; + } + return res; +} + +int __weak strcmp(const char *str1, const char *str2) +{ + int delta = 0; + const unsigned char *s1 = (const unsigned char *)str1; + const unsigned char *s2 = (const unsigned char *)str2; + + while (*s1 || *s2) { + delta = *s1 - *s2; + if (delta) + return delta; + s1++; + s2++; + } + return 0; +} + +size_t __weak strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +size_t __weak strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * __weak strnstr(const char *s1, const char *s2, size_t len) +{ + size_t l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + while (len >= l2) { + len--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +#undef strcat +char * __weak strcat(char *dest, const char *src) +{ + char *tmp = dest; + + while (*dest) + dest++; + while ((*dest++ = *src++) != '\0') + ; + return tmp; +} + +char * __weak strncat(char *dest, const char *src, size_t count) +{ + char *tmp = dest; + + if (count) { + while (*dest) + dest++; + while ((*dest++ = *src++) != 0) { + if (--count == 0) { + *dest = '\0'; + break; + } + } + } + return tmp; +} + +char * __weak strpbrk(const char *cs, const char *ct) +{ + const char *sc1, *sc2; + + for (sc1 = cs; *sc1 != '\0'; ++sc1) { + for (sc2 = ct; *sc2 != '\0'; ++sc2) { + if (*sc1 == *sc2) + return (char *)sc1; + } + } + return NULL; +} + +char * __weak strsep(char **s, const char *ct) +{ + char *sbegin = *s; + char *end; + + if (sbegin == NULL) + return NULL; + + end = strpbrk(sbegin, ct); + if (end) + *end++ = '\0'; + *s = end; + return sbegin; +} diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 825d6619ded6..f5ee72325fda 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -121,9 +121,9 @@ config EFI_ARMSTUB_DTB_LOADER config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER bool "Enable the command line initrd loader" if !X86 - depends on EFI_STUB && (EFI_GENERIC_STUB || X86) - default y depends on !RISCV + depends on EFI_STUB && (EFI_GENERIC_STUB || X86 || LOONGARCH) + default y help Select this config option to add support for the initrd= command line parameter, allowing an initrd that resides on the same volume diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index a2ae9c3b9579..f42538ae3234 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -26,6 +26,8 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpic +cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ + -fpic -fshort-wchar cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt @@ -68,6 +70,8 @@ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o lib-$(CONFIG_RISCV) += riscv-stub.o +lib-$(CONFIG_LOONGARCH) += loongarch-stub.o string.o + CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # Even when -mbranch-protection=none is set, Clang will generate a @@ -95,6 +99,7 @@ STUBCOPY_FLAGS-$(CONFIG_ARM) += --rename-section .data=.data.efistub \ --rename-section .bss=.bss.efistub,load,alloc STUBCOPY_RELOC-$(CONFIG_ARM) := R_ARM_ABS + # # arm64 puts the stub in the kernel proper, which will unnecessarily retain all # code indefinitely unless it is annotated as __init/__initdata/__initconst etc. @@ -123,6 +128,11 @@ STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \ --prefix-symbols=__efistub_ STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20 +# For LoongArch, keep all the symbols in .init section and make sure that no +# absolute symbols references doesn't exist. +STUBCOPY_FLAGS-$(CONFIG_LOONGARCH) += --prefix-alloc-sections=.init +STUBCOPY_RELOC-$(CONFIG_LOONGARCH) := R_LARCH_MARK_LA + $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,stubcopy) diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c new file mode 100644 index 000000000000..d0df0e3a107f --- /dev/null +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013,2014 Linaro Limited + * Roy Franz + * Copyright (C) 2020 Loongson, Inc. + * Yun Liu + */ + +#include +#include +#include +#include +#include +#include +#include "efistub.h" + +#define MAX_ARG_COUNT 128 +#define CMDLINE_MAX_SIZE 0x200 + +#define EFI_LOONGSON_BOOT_PARAM_GUID \ + EFI_GUID(0x4660f721, 0x2ec5, 0x416a, 0x89, 0x9a, 0x43, 0x18, 0x02, 0x50, 0xa0, 0xc9) + +typedef struct loongson_mem_map mmap; +typedef void (*kernel_entry_t)(int argc, char *argv[], struct boot_params *boot_p); + +extern char efi_heap[]; +extern kernel_entry_t kernel_entry; +extern void decompress_kernel(unsigned long boot_heap_start); + +static int argc; +static char **argv; +const efi_system_table_t *efi_system_table; +static unsigned int map_entry[LOONGSON3_BOOT_MEM_MAP_MAX]; +static struct efi_mmap mmap_array[EFI_MAX_MEMORY_TYPE][LOONGSON3_BOOT_MEM_MAP_MAX]; + +struct exit_boot_struct { + struct boot_params *bp; + unsigned int *runtime_entry_count; +}; + +unsigned char efi_crc8(char *buff, int size) +{ + int sum, cnt; + + for (sum = 0, cnt = 0; cnt < size; cnt++) + sum = (char) (sum + *(buff + cnt)); + + return (char)(0x100 - sum); +} + +static unsigned long convert_priv_cmdline(char *cmdline_ptr, + unsigned long rd_addr, unsigned long rd_size) +{ + unsigned int rdprev_size; + unsigned int cmdline_size; + efi_status_t status; + char *pstr, *substr; + char *initrd_ptr = NULL; + char convert_str[CMDLINE_MAX_SIZE]; + static char cmdline_array[CMDLINE_MAX_SIZE]; + + cmdline_size = strlen(cmdline_ptr); + snprintf(cmdline_array, CMDLINE_MAX_SIZE, "kernel "); + + initrd_ptr = strstr(cmdline_ptr, "initrd="); + if (!initrd_ptr) { + snprintf(cmdline_array, CMDLINE_MAX_SIZE, "kernel %s", cmdline_ptr); + goto completed; + } + snprintf(convert_str, CMDLINE_MAX_SIZE, " initrd=0x%lx,0x%lx", rd_addr, rd_size); + rdprev_size = cmdline_size - strlen(initrd_ptr); + strncat(cmdline_array, cmdline_ptr, rdprev_size); + + cmdline_ptr = strnstr(initrd_ptr, " ", CMDLINE_MAX_SIZE); + strcat(cmdline_array, convert_str); + if (!cmdline_ptr) + goto completed; + + strcat(cmdline_array, cmdline_ptr); + +completed: + status = efi_allocate_pages((MAX_ARG_COUNT + 1) * (sizeof(char *)), + (unsigned long *)&argv, ULONG_MAX); + if (status != EFI_SUCCESS) { + efi_err("Alloc argv mmap_array error\n"); + return status; + } + + argc = 0; + pstr = cmdline_array; + + substr = strsep(&pstr, " \t"); + while (substr != NULL) { + if (strlen(substr)) { + argv[argc++] = substr; + if (argc == MAX_ARG_COUNT) { + efi_err("Argv mmap_array full!\n"); + break; + } + } + substr = strsep(&pstr, " \t"); + } + + return EFI_SUCCESS; +} + +unsigned int efi_memmap_sort(struct loongsonlist_mem_map *memmap, + unsigned int index, unsigned int mem_type) +{ + unsigned int i, t; + unsigned long msize; + + for (i = 0; i < map_entry[mem_type]; i = t) { + msize = mmap_array[mem_type][i].mem_size; + for (t = i + 1; t < map_entry[mem_type]; t++) { + if (mmap_array[mem_type][i].mem_start + msize < + mmap_array[mem_type][t].mem_start) + break; + + msize += mmap_array[mem_type][t].mem_size; + } + memmap->map[index].mem_type = mem_type; + memmap->map[index].mem_start = mmap_array[mem_type][i].mem_start; + memmap->map[index].mem_size = msize; + memmap->map[index].attribute = mmap_array[mem_type][i].attribute; + index++; + } + + return index; +} + +static efi_status_t mk_mmap(struct efi_boot_memmap *map, struct boot_params *p) +{ + char checksum; + unsigned int i; + unsigned int nr_desc; + unsigned int mem_type; + unsigned long count; + efi_memory_desc_t *mem_desc; + struct loongsonlist_mem_map *mhp = NULL; + + if (!strncmp((char *)p, "BPI", 3)) { + p->systemtable = efi_system_table; + p->flags |= BPI_FLAGS_UEFI_SUPPORTED; + p->extlist_offset = sizeof(*p) + sizeof(unsigned long); + mhp = (struct loongsonlist_mem_map *)((char *)p + p->extlist_offset); + + memcpy(&mhp->header.signature, "MEM", sizeof(unsigned long)); + mhp->header.length = sizeof(*mhp); + mhp->desc_version = *map->desc_ver; + } + if (!(*(map->map_size)) || !(*(map->desc_size)) || !mhp) { + efi_err("get memory info error\n"); + return EFI_INVALID_PARAMETER; + } + nr_desc = *(map->map_size) / *(map->desc_size); + + /* + * According to UEFI SPEC, mmap_buf is the accurate Memory Map + * mmap_array now we can fill platform specific memory structure. + */ + for (i = 0; i < nr_desc; i++) { + mem_desc = (efi_memory_desc_t *)((void *)(*map->map) + (i * (*(map->desc_size)))); + switch (mem_desc->type) { + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + case EFI_UNUSABLE_MEMORY: + case EFI_PAL_CODE: + mem_type = ADDRESS_TYPE_RESERVED; + break; + + case EFI_ACPI_MEMORY_NVS: + mem_type = ADDRESS_TYPE_NVS; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + mem_type = ADDRESS_TYPE_ACPI; + break; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_PERSISTENT_MEMORY: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + mem_type = ADDRESS_TYPE_SYSRAM; + break; + + default: + continue; + } + + mmap_array[mem_type][map_entry[mem_type]].mem_type = mem_type; + mmap_array[mem_type][map_entry[mem_type]].mem_start = + mem_desc->phys_addr & TO_PHYS_MASK; + mmap_array[mem_type][map_entry[mem_type]].mem_size = + mem_desc->num_pages << EFI_PAGE_SHIFT; + mmap_array[mem_type][map_entry[mem_type]].attribute = + mem_desc->attribute; + map_entry[mem_type]++; + } + + count = mhp->map_count; + /* Sort EFI memmap and add to BPI for kernel */ + for (i = 0; i < LOONGSON3_BOOT_MEM_MAP_MAX; i++) { + if (!map_entry[i]) + continue; + count = efi_memmap_sort(mhp, count, i); + } + + mhp->map_count = count; + mhp->header.checksum = 0; + + checksum = efi_crc8((char *)mhp, mhp->header.length); + mhp->header.checksum = checksum; + + return EFI_SUCCESS; +} + +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) +{ + efi_status_t status; + struct exit_boot_struct *p = priv; + + status = mk_mmap(map, p->bp); + if (status != EFI_SUCCESS) { + efi_err("Make kernel memory map failed!\n"); + return status; + } + + return EFI_SUCCESS; +} + +static efi_status_t exit_boot_services(struct boot_params *boot_params, void *handle) +{ + unsigned int desc_version; + unsigned int runtime_entry_count = 0; + unsigned long map_sz, key, desc_size, buff_size; + efi_status_t status; + struct efi_boot_memmap map; + struct exit_boot_struct priv; + efi_memory_desc_t *mem_map; + + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + status = efi_get_memory_map(&map); + if (status != EFI_SUCCESS) { + efi_err("Unable to retrieve UEFI memory map.\n"); + return status; + } + + priv.bp = boot_params; + priv.runtime_entry_count = &runtime_entry_count; + + /* Might as well exit boot services now */ + status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); + if (status != EFI_SUCCESS) + return status; + + return EFI_SUCCESS; +} + +/* + * EFI entry point for the LoongArch EFI stub. + */ +efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_system_table_t *sys_table) +{ + efi_status_t status; + unsigned int cmdline_size = 0; + unsigned long initrd_addr = 0; + unsigned long initrd_size = 0; + enum efi_secureboot_mode secure_boot; + char *cmdline_ptr = NULL; + struct boot_params *boot_p; + efi_loaded_image_t *image; + efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID; + + efi_system_table = sys_table; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + goto fail; + + /* + * Get a handle to the loaded image protocol. This is used to get + * information about the running image, such as size and the command + * line. + */ + status = sys_table->boottime->handle_protocol(handle, + &loaded_image_proto, (void *)&image); + if (status != EFI_SUCCESS) { + efi_err("Failed to get loaded image protocol\n"); + goto fail; + } + + /* Get the command line from EFI, using the LOADED_IMAGE protocol. */ + cmdline_ptr = efi_convert_cmdline(image, &cmdline_size); + if (!cmdline_ptr) { + efi_err("Getting command line failed!\n"); + goto fail_free_cmdline; + } + +#ifdef CONFIG_CMDLINE_BOOL + if (cmdline_size == 0) + efi_parse_options(CONFIG_CMDLINE); +#endif + if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE) && cmdline_size > 0) + efi_parse_options(cmdline_ptr); + + efi_info("Booting Linux Kernel...\n"); + + secure_boot = efi_get_secureboot(); + efi_enable_reset_attack_mitigation(); + + status = efi_load_initrd(image, &initrd_addr, &initrd_size, SZ_4G, ULONG_MAX); + if (status != EFI_SUCCESS) { + efi_err("Failed get initrd addr!\n"); + goto fail_free; + } + + status = convert_priv_cmdline(cmdline_ptr, initrd_addr, initrd_size); + if (status != EFI_SUCCESS) { + efi_err("Covert cmdline failed!\n"); + goto fail_free; + } + + boot_p = get_efi_config_table(EFI_LOONGSON_BOOT_PARAM_GUID); + if (!boot_p) { + efi_err("System table not found BPI!\n"); + goto fail; + } + + status = exit_boot_services(boot_p, handle); + if (status != EFI_SUCCESS) { + efi_err("exit_boot services failed!\n"); + goto fail_free; + } + + decompress_kernel((unsigned long)efi_heap); + + kernel_entry(argc, argv, boot_p); + + return EFI_SUCCESS; + +fail_free: + efi_free(initrd_size, initrd_addr); + +fail_free_cmdline: + efi_free(cmdline_size, (unsigned long)cmdline_ptr); + +fail: + return status; +} diff --git a/include/linux/pe.h b/include/linux/pe.h index daf09ffffe38..f4bb0b6a416d 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -65,6 +65,7 @@ #define IMAGE_FILE_MACHINE_SH5 0x01a8 #define IMAGE_FILE_MACHINE_THUMB 0x01c2 #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 +#define IMAGE_FILE_MACHINE_LOONGARCH 0x6264 /* flags */ #define IMAGE_FILE_RELOCS_STRIPPED 0x0001 -- Gitee From 73cf90b2856d6f89d156ffb2a40abb8cb24ed053 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 49/59] LoongArch: Add kernel livepatching support Change-Id: I3e25cbb6e0fec883eb35371f3ff28f2ee7107fe3 Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 +++ arch/loongarch/include/asm/livepatch.h | 16 ++++++++++++++++ arch/loongarch/include/asm/thread_info.h | 1 + kernel/livepatch/core.c | 8 ++++++-- 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 arch/loongarch/include/asm/livepatch.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 7c621e10f93e..754a11af33cb 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -96,6 +96,7 @@ config LOONGARCH select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES + select HAVE_LIVEPATCH select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC @@ -714,6 +715,8 @@ config USE_OF config BUILTIN_DTB bool +source "kernel/livepatch/Kconfig" + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/loongarch/include/asm/livepatch.h b/arch/loongarch/include/asm/livepatch.h new file mode 100644 index 000000000000..f5c3771348e9 --- /dev/null +++ b/arch/loongarch/include/asm/livepatch.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LIVEPATCH_H +#define _ASM_LIVEPATCH_H + +#include + +static inline int klp_check_compiler_support(void) +{ + return 0; +} + +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->csr_era = ip; +} +#endif /* _ASM_LIVEPATCH_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 356974e33671..93404ebc8216 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -100,6 +100,7 @@ register unsigned long current_stack_pointer __asm__("$r3"); #define TIF_LOAD_WATCH 18 /* If set, load watch registers */ #define TIF_LSX_CTX_LIVE 19 /* LSX context must be preserved */ #define TIF_LASX_CTX_LIVE 20 /* LASX context must be preserved */ +#define TIF_PATCH_PENDING 21 /* pending live patching update */ #define _TIF_SIGPENDING (1<sh_size / sizeof(*relas); i++) { + if (last && relas[i].r_offset == last->r_offset) + continue; + + last = &relas[i]; sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", -- Gitee From 536d3e91563b03e8ee10e412cacef1c225871349 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 50/59] LoongArch: Add kernel address sanitizer support Change-Id: I9cbfb0c8b913ff5a5009dd4aff7032ec2dc54246 Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/boot/compressed/Makefile | 4 + arch/loongarch/boot/compressed/string.c | 6 + arch/loongarch/include/asm/kasan.h | 123 ++++++++++++++++ arch/loongarch/include/asm/pgtable-64.h | 7 + arch/loongarch/include/asm/string.h | 20 +++ arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/head.S | 4 + arch/loongarch/kernel/setup.c | 4 + arch/loongarch/lib/memcpy.S | 5 +- arch/loongarch/lib/memmove.S | 13 +- arch/loongarch/lib/memset.S | 5 +- arch/loongarch/mm/Makefile | 1 + arch/loongarch/mm/kasan_init.c | 182 ++++++++++++++++++++++++ arch/loongarch/vdso/Makefile | 4 + include/linux/kasan.h | 2 + kernel/Makefile | 4 + mm/kasan/generic.c | 6 + mm/kasan/kasan.h | 6 + 19 files changed, 392 insertions(+), 7 deletions(-) create mode 100644 arch/loongarch/include/asm/kasan.h create mode 100644 arch/loongarch/mm/kasan_init.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 754a11af33cb..37c4a11ab008 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -68,6 +68,7 @@ config LOONGARCH select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if 64BIT select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index 4b4270fc615b..a84409806b6e 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -10,6 +10,10 @@ include $(srctree)/arch/loongarch/Kbuild.platforms # set the default size of the mallocing area for decompressing BOOT_HEAP_SIZE := 0x400000 +ifdef CONFIG_KASAN +KASAN_SANITIZE := n +endif + # Disable Function Tracer KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c index d38ccaece00d..0ff50b823111 100644 --- a/arch/loongarch/boot/compressed/string.c +++ b/arch/loongarch/boot/compressed/string.c @@ -164,3 +164,9 @@ char * __weak strsep(char **s, const char *ct) *s = end; return sbegin; } + +#ifdef CONFIG_KASAN +extern void *__memset(void *s, int c, size_t n) __alias(memset); +extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy); +extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove); +#endif diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h new file mode 100644 index 000000000000..dfb1bf7a576e --- /dev/null +++ b/arch/loongarch/include/asm/kasan.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +#ifdef CONFIG_64BIT +#include +#endif + +#define __HAVE_ARCH_SHADOW_MAP + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET 0 + +#define XRANGE_SHIFT (48) + +/* Valid address length */ +#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3) +/* Used for taking out the valid address */ +#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) +/* One segment whole address space size */ +#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1) + +/* 64-bit segment value. */ +#define XKPRANGE_UC_SEG (0x8000) +#define XKPRANGE_CC_SEG (0x9000) +#define XKVRANGE_VC_SEG (0xffff) + +/* Cached */ +#define XKPRANGE_CC_START CAC_BASE +#define XKPRANGE_CC_SIZE XRANGE_SIZE +#define XKPRANGE_CC_KASAN_OFFSET (0) +#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE) + +/* UnCached */ +#define XKPRANGE_UC_START UNCAC_BASE +#define XKPRANGE_UC_SIZE XRANGE_SIZE +#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END +#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) + +/* VMALLOC (Cached or UnCached) */ +#define XKVRANGE_VC_START VMALLOC_START +#define XKVRANGE_VC_SIZE round_up(VMEMMAP_END - VMALLOC_START + 1, PGDIR_SIZE) +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) + +/* Kasan shadow memory start right after vmalloc. */ +#define KASAN_SHADOW_START round_up(VMEMMAP_END, PGDIR_SIZE) +#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) +#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) + +#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) +#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) + +extern bool kasan_early_stage; +extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + if (kasan_early_stage) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index cc542a621758..957a31a41b0e 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -70,9 +70,16 @@ #define MODULES_END (MODULES_VADDR + SZ_256M) #define VMALLOC_START MODULES_END + +#ifndef CONFIG_KASAN #define VMALLOC_END \ (vm_map_base + \ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) +#else +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE) +#endif #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index 812389888e35..fa4c8e896f02 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -11,11 +11,31 @@ #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); +extern void *__memset(void *__s, int __c, size_t __count); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); +extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); +extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memset(s, c, n) __memset(s, c, n) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index bda9428f32fd..3695bd4fd4d5 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -30,6 +30,8 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_vdso.o := n + obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e62f4b34bcf5..6509db5faadf 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -79,6 +79,10 @@ dtb_found: set_saved_sp sp, t0, t1 PTR_ADDIU sp, sp, -4 * SZREG # init stack pointer +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif + #ifdef CONFIG_RELOCATABLE /* Copy kernel and apply the relocations */ bl relocate_kernel diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index a4654df1cc30..d95afa2681eb 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -708,6 +708,10 @@ void __init setup_arch(char **cmdline_p) paging_init(); +#if defined(CONFIG_KASAN) + kasan_init(); +#endif + unwind_init(); } diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index 586ab4fc7b07..d12bc4f2de2e 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -10,15 +10,18 @@ #include #include -SYM_FUNC_START(memcpy) +SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __memcpy_generic", \ "b __memcpy_fast", CPU_FEATURE_UAL SYM_FUNC_END(memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) /* * void *__memcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index 5ae764586db3..269aab72603b 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -10,7 +10,8 @@ #include #include -SYM_FUNC_START(memmove) +SYM_FUNC_START_WEAK(memmove) +SYM_FUNC_START_ALIAS(__memmove) blt a0, a1, 1f /* dst < src, memcpy */ blt a1, a0, 3f /* src < dst, rmemcpy */ jr ra /* dst == src, return */ @@ -19,26 +20,28 @@ SYM_FUNC_START(memmove) 1: ori a3, zero, 64 sub.d t0, a1, a0 blt t0, a3, 2f - b memcpy + b __memcpy 2: b __memcpy_generic /* if (dst - src) < 64, copy 1 byte at a time */ 3: ori a3, zero, 64 sub.d t0, a0, a1 blt t0, a3, 4f - b rmemcpy + b __rmemcpy 4: b __rmemcpy_generic SYM_FUNC_END(memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL(__memmove) -SYM_FUNC_START(rmemcpy) +SYM_FUNC_START(__rmemcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __rmemcpy_generic", \ "b __rmemcpy_fast", CPU_FEATURE_UAL -SYM_FUNC_END(rmemcpy) +SYM_FUNC_END(__rmemcpy) /* * void *__rmemcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 9fcf0d002ab9..78943fd35533 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -16,15 +16,18 @@ bstrins.d \r0, \r0, 63, 32 .endm -SYM_FUNC_START(memset) +SYM_FUNC_START_WEAK(memset) +SYM_FUNC_START_ALIAS(__memset) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __memset_generic", \ "b __memset_fast", CPU_FEATURE_UAL SYM_FUNC_END(memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) /* * void *__memset_generic(void *s, int c, size_t n) diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index a3a137709650..fd890d332acf 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -8,3 +8,4 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \ obj-$(CONFIG_64BIT) += pgtable-64.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c new file mode 100644 index 000000000000..b6f700fe399b --- /dev/null +++ b/arch/loongarch/mm/kasan_init.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include + +#include + +#define __pgd_none(early, pgd) (early ? (pgd_val(pgd) == 0) : \ +(__pa(pgd_val(pgd)) == (unsigned long)__pa(kasan_early_shadow_pmd))) + +#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \ +(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte))) + +#define __pte_none(early, pte) (early ? pte_none(pte) : \ +((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) + +bool kasan_early_stage = true; + +/* + * Alloc memory for shadow memory page table. + */ +static phys_addr_t __init kasan_alloc_zeroed_page(int node) +{ + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, node); + return __pa(p); +} + +static pte_t *kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, + bool early) +{ + if (__pmd_none(early, READ_ONCE(*pmdp))) { + phys_addr_t pte_phys = early ? + __pa_symbol(kasan_early_shadow_pte) + : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pte_phys), kasan_early_shadow_pte, + sizeof(kasan_early_shadow_pte)); + + pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys)); + } + + return pte_offset_kernel(pmdp, addr); +} + +static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) +{ + WRITE_ONCE(*pgdp, pgdval); +} + +static pmd_t *kasan_pmd_offset(pgd_t *pgdp, unsigned long addr, int node, + bool early) +{ + if (__pgd_none(early, READ_ONCE(*pgdp))) { + phys_addr_t pmd_phys = early ? + __pa_symbol(kasan_early_shadow_pmd) + : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pmd_phys), kasan_early_shadow_pmd, + sizeof(kasan_early_shadow_pmd)); + kasan_set_pgd(pgdp, __pgd((unsigned long)__va(pmd_phys))); + } + + return (pmd_t *)((pmd_t *)pgd_val(*pgdp) + pmd_index(addr)); +} + +static void kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); + + do { + phys_addr_t page_phys = early ? + __pa_symbol(kasan_early_shadow_page) + : kasan_alloc_zeroed_page(node); + next = addr + PAGE_SIZE; + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); + } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); +} + +static void kasan_pmd_populate(pgd_t *pgdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pmd_t *pmdp = kasan_pmd_offset(pgdp, addr, node, early); + + do { + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, node, early); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + int node, bool early) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset_k(addr); + + do { + next = pgd_addr_end(addr, end); + kasan_pmd_populate(pgdp, addr, next, node, early); + } while (pgdp++, addr = next, addr != end); + +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); +} + +/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ +static void __init kasan_map_populate(unsigned long start, unsigned long end, + int node) +{ + kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start < end; start += PGDIR_SIZE) + kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); +} + +void __init kasan_init(void) +{ + u64 i; + phys_addr_t pa_start, pa_end; + + /* + * PGD was populated as invalid_pmd_table or invalid_pud_table + * in pagetable_init() which depends on how many levels of page + * table you are using, but we had to clean the gpd of kasan + * shadow memory, as the pgd value is none-zero. + * The assertion pgd_none is going to be false and the formal populate + * afterwards is not going to create any new pgd at all. + */ + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + /* Maps everything to a single page of zeroes */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, + true); + + kasan_early_stage = false; + + /* Populate the linear mapping */ + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = (void *)phys_to_virt(pa_start); + void *end = (void *)phys_to_virt(pa_end); + + if (start >= end) + break; + + kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end), + NUMA_NO_NODE); + } + + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), + PAGE_KERNEL_RO)); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized.\n"); +} diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 444e945c6f77..20830916cc5e 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -2,6 +2,10 @@ # Objects to go into the VDSO. OBJECT_FILES_NON_STANDARD := y +ifdef CONFIG_KASAN +KASAN_SANITIZE := n +endif + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 30d343b4a40a..c07de911fe23 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -29,11 +29,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); diff --git a/kernel/Makefile b/kernel/Makefile index 455d0b5fbbb0..48a25c5f87c6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -3,6 +3,10 @@ # Makefile for the linux kernel. # +ifdef CONFIG_KASAN +KASAN_SANITIZE := n +endif + obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o softirq.o resource.o \ sysctl.o capability.o ptrace.o user.o \ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index c4c56ec8a472..04d1d114edca 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -176,10 +176,16 @@ static __always_inline bool check_memory_region_inline(unsigned long addr, if (unlikely(addr + size < addr)) return !kasan_report(addr, size, write, ret_ip); +#ifndef __HAVE_ARCH_SHADOW_MAP if (unlikely((void *)addr < kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { return !kasan_report(addr, size, write, ret_ip); } +#else + if (unlikely(kasan_mem_to_shadow((void *)addr) == NULL)) { + return !kasan_report(addr, size, write, ret_ip); + } +#endif if (likely(!memory_is_poisoned(addr, size))) return true; diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ac499456740f..a75b1658f17a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -139,15 +139,21 @@ struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT); } +#endif static inline bool addr_has_shadow(const void *addr) { +#ifdef __HAVE_ARCH_SHADOW_MAP + return (kasan_mem_to_shadow((void *)addr) != NULL); +#else return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +#endif } void kasan_poison_shadow(const void *address, size_t size, u8 value); -- Gitee From b060313b89cb4bd9a6c795a74a367e569d950ff9 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 51/59] LoongArch: Add platform device drivers Change-Id: Icce3a1620624df211920de73387ba7c3ee286f2f Signed-off-by: lvjianmin Signed-off-by: Huacai Chen --- drivers/platform/Kconfig | 3 + drivers/platform/Makefile | 1 + drivers/platform/loongarch/Kconfig | 44 ++ drivers/platform/loongarch/Makefile | 2 + drivers/platform/loongarch/cpu_hwmon.c | 305 ++++++++ drivers/platform/loongarch/generic-laptop.c | 777 ++++++++++++++++++++ 6 files changed, 1132 insertions(+) create mode 100644 drivers/platform/loongarch/Kconfig create mode 100644 drivers/platform/loongarch/Makefile create mode 100644 drivers/platform/loongarch/cpu_hwmon.c create mode 100644 drivers/platform/loongarch/generic-laptop.c diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig index 971426bb4302..4b74a8de68ab 100644 --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig @@ -5,6 +5,9 @@ endif if MIPS source "drivers/platform/mips/Kconfig" endif +if LOONGARCH +source "drivers/platform/loongarch/Kconfig" +endif source "drivers/platform/goldfish/Kconfig" diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile index 6fda58c021ca..c36d65ac4c4d 100644 --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile @@ -4,6 +4,7 @@ # obj-$(CONFIG_X86) += x86/ +obj-$(CONFIG_LOONGARCH) += loongarch/ obj-$(CONFIG_MELLANOX_PLATFORM) += mellanox/ obj-$(CONFIG_MIPS) += mips/ obj-$(CONFIG_OLPC_EC) += olpc/ diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig new file mode 100644 index 000000000000..5b4ec24fea3f --- /dev/null +++ b/drivers/platform/loongarch/Kconfig @@ -0,0 +1,44 @@ +# +# LoongArch Platform Specific Drivers +# + +menuconfig LOONGARCH_PLATFORM_DEVICES + bool "LoongArch Platform Specific Device Drivers" + default y + help + Say Y here to get to see options for device drivers of various + LOONGARCH platforms, including vendor-specific netbook/laptop/desktop + extension and hardware monitor drivers. This option itself does + not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if LOONGARCH_PLATFORM_DEVICES + +config CPU_HWMON + tristate "Loongson CPU HWMon Driver" + depends on MACH_LOONGSON64 + select HWMON + default y + help + Loongson-3A/3B/3C CPU Hwmon (temperature sensor) driver. + +config GENERIC_LAPTOP + tristate "Generic Loongson-3A Laptop Driver" + depends on MACH_LOONGSON64 + select BACKLIGHT_LCD_SUPPORT + select LCD_CLASS_DEVICE + select BACKLIGHT_CLASS_DEVICE + select POWER_SUPPLY + select HWMON + select VIDEO_OUTPUT_CONTROL + select INPUT_SPARSEKMAP + select INPUT_EVDEV + select LEDS_CLASS + depends on INPUT + depends on ACPI + default y + help + Loongson-3A/3B/3C family laptops generic driver. + +endif # LOONGARCH_PLATFORM_DEVICES diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile new file mode 100644 index 000000000000..9d6f69f2319d --- /dev/null +++ b/drivers/platform/loongarch/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CPU_HWMON) += cpu_hwmon.o +obj-$(CONFIG_GENERIC_LAPTOP) += generic-laptop.o diff --git a/drivers/platform/loongarch/cpu_hwmon.c b/drivers/platform/loongarch/cpu_hwmon.c new file mode 100644 index 000000000000..ea32f844281a --- /dev/null +++ b/drivers/platform/loongarch/cpu_hwmon.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Loongson-3 series cpu has two sensors inside, + * each of them from 0 to 255, + * if more than 127, that is dangerous. + * here only provide sensor1 data, because it always hot than sensor0 + */ +int loongson3_cpu_temp(int cpu) +{ + int cputemp; + u32 reg, prid_rev; + + reg = LOONGSON_CHIPTEMP(cpu); + reg = (reg & 0xffff)*731/0x4000 - 273; + cputemp = (int)reg * 1000; + + return cputemp; +} +EXPORT_SYMBOL(loongson3_cpu_temp); + +static int nr_packages; +static struct device *cpu_hwmon_dev; + +static ssize_t get_hwmon_name(struct device *dev, + struct device_attribute *attr, char *buf); +static SENSOR_DEVICE_ATTR(name, S_IRUGO, get_hwmon_name, NULL, 0); + +static struct attribute *cpu_hwmon_attributes[] = { + &sensor_dev_attr_name.dev_attr.attr, + NULL +}; + +/* Hwmon device attribute group */ +static struct attribute_group cpu_hwmon_attribute_group = { + .attrs = cpu_hwmon_attributes, +}; + +/* Hwmon device get name */ +static ssize_t get_hwmon_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "cpu-hwmon\n"); +} + +static ssize_t get_cpu_temp(struct device *dev, + struct device_attribute *attr, char *buf); +static ssize_t cpu_temp_label(struct device *dev, + struct device_attribute *attr, char *buf); + +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, get_cpu_temp, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, cpu_temp_label, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, get_cpu_temp, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, cpu_temp_label, NULL, 2); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, get_cpu_temp, NULL, 3); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, cpu_temp_label, NULL, 3); +static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp4_label, S_IRUGO, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, get_cpu_temp, NULL, 5); +static SENSOR_DEVICE_ATTR(temp5_label, S_IRUGO, cpu_temp_label, NULL, 5); +static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, get_cpu_temp, NULL, 6); +static SENSOR_DEVICE_ATTR(temp6_label, S_IRUGO, cpu_temp_label, NULL, 6); +static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, get_cpu_temp, NULL, 7); +static SENSOR_DEVICE_ATTR(temp7_label, S_IRUGO, cpu_temp_label, NULL, 7); +static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, get_cpu_temp, NULL, 8); +static SENSOR_DEVICE_ATTR(temp8_label, S_IRUGO, cpu_temp_label, NULL, 8); +static SENSOR_DEVICE_ATTR(temp9_input, S_IRUGO, get_cpu_temp, NULL, 9); +static SENSOR_DEVICE_ATTR(temp9_label, S_IRUGO, cpu_temp_label, NULL, 9); +static SENSOR_DEVICE_ATTR(temp10_input, S_IRUGO, get_cpu_temp, NULL, 10); +static SENSOR_DEVICE_ATTR(temp10_label, S_IRUGO, cpu_temp_label, NULL, 10); +static SENSOR_DEVICE_ATTR(temp11_input, S_IRUGO, get_cpu_temp, NULL, 11); +static SENSOR_DEVICE_ATTR(temp11_label, S_IRUGO, cpu_temp_label, NULL, 11); +static SENSOR_DEVICE_ATTR(temp12_input, S_IRUGO, get_cpu_temp, NULL, 12); +static SENSOR_DEVICE_ATTR(temp12_label, S_IRUGO, cpu_temp_label, NULL, 12); +static SENSOR_DEVICE_ATTR(temp13_input, S_IRUGO, get_cpu_temp, NULL, 13); +static SENSOR_DEVICE_ATTR(temp13_label, S_IRUGO, cpu_temp_label, NULL, 13); +static SENSOR_DEVICE_ATTR(temp14_input, S_IRUGO, get_cpu_temp, NULL, 14); +static SENSOR_DEVICE_ATTR(temp14_label, S_IRUGO, cpu_temp_label, NULL, 14); +static SENSOR_DEVICE_ATTR(temp15_input, S_IRUGO, get_cpu_temp, NULL, 15); +static SENSOR_DEVICE_ATTR(temp15_label, S_IRUGO, cpu_temp_label, NULL, 15); +static SENSOR_DEVICE_ATTR(temp16_input, S_IRUGO, get_cpu_temp, NULL, 16); +static SENSOR_DEVICE_ATTR(temp16_label, S_IRUGO, cpu_temp_label, NULL, 16); + +static const struct attribute *hwmon_cputemp[16][3] = { + { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp4_input.dev_attr.attr, + &sensor_dev_attr_temp4_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp5_input.dev_attr.attr, + &sensor_dev_attr_temp5_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp6_input.dev_attr.attr, + &sensor_dev_attr_temp6_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp7_input.dev_attr.attr, + &sensor_dev_attr_temp7_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp8_input.dev_attr.attr, + &sensor_dev_attr_temp8_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp9_input.dev_attr.attr, + &sensor_dev_attr_temp9_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp10_input.dev_attr.attr, + &sensor_dev_attr_temp10_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp11_input.dev_attr.attr, + &sensor_dev_attr_temp11_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp12_input.dev_attr.attr, + &sensor_dev_attr_temp12_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp13_input.dev_attr.attr, + &sensor_dev_attr_temp13_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp14_input.dev_attr.attr, + &sensor_dev_attr_temp14_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp15_input.dev_attr.attr, + &sensor_dev_attr_temp15_label.dev_attr.attr, + NULL + }, + { + &sensor_dev_attr_temp16_input.dev_attr.attr, + &sensor_dev_attr_temp16_label.dev_attr.attr, + NULL + } +}; + +static ssize_t cpu_temp_label(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + return sprintf(buf, "CPU %d Temperature\n", id); +} + +static ssize_t get_cpu_temp(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + int value = loongson3_cpu_temp(id); + return sprintf(buf, "%d\n", value); +} + +static int create_sysfs_cputemp_files(struct kobject *kobj) +{ + int i, ret = 0; + + for (i = 0; i < nr_packages; i++) + ret = sysfs_create_files(kobj, hwmon_cputemp[i]); + + return ret; +} + +static void remove_sysfs_cputemp_files(struct kobject *kobj) +{ + int i; + + for (i = 0; i < nr_packages; i++) + sysfs_remove_files(kobj, hwmon_cputemp[i]); +} + +static int cpu_initial_threshold = 72000; +static int cpu_thermal_threshold = 96000; +module_param(cpu_thermal_threshold, int, 0644); +MODULE_PARM_DESC(cpu_thermal_threshold, "cpu thermal threshold (96000 (default))"); + +static struct delayed_work thermal_work; + +static void do_thermal_timer(struct work_struct *work) +{ + int i, value, temp_max = 0; + + for (i = 0; i < nr_packages; i++) { + value = loongson3_cpu_temp(i); + if (value > temp_max) + temp_max = value; + } + + if (temp_max <= cpu_thermal_threshold) + schedule_delayed_work(&thermal_work, msecs_to_jiffies(5000)); + else + orderly_poweroff(true); +} + +static int __init loongson_hwmon_init(void) +{ + int i, ret, value, temp_max = 0; + + pr_info("Loongson Hwmon Enter...\n"); + + cpu_hwmon_dev = hwmon_device_register(NULL); + if (IS_ERR(cpu_hwmon_dev)) { + ret = -ENOMEM; + pr_err("hwmon_device_register fail!\n"); + goto fail_hwmon_device_register; + } + + nr_packages = loongson_sysconf.nr_cpus / + loongson_sysconf.cores_per_package; + + ret = sysfs_create_group(&cpu_hwmon_dev->kobj, + &cpu_hwmon_attribute_group); + if (ret) { + pr_err("fail to create loongson hwmon!\n"); + goto fail_sysfs_create_group_hwmon; + } + + ret = create_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); + if (ret) { + pr_err("fail to create cpu temperature interface!\n"); + goto fail_create_sysfs_cputemp_files; + } + + for (i = 0; i < nr_packages; i++) { + value = loongson3_cpu_temp(i); + if (value > temp_max) + temp_max = value; + } + + pr_info("Initial CPU temperature is %d (highest).\n", temp_max); + if (temp_max > cpu_initial_threshold) + cpu_thermal_threshold += temp_max - cpu_initial_threshold; + + INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer); + schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000)); + + return ret; + +fail_create_sysfs_cputemp_files: + sysfs_remove_group(&cpu_hwmon_dev->kobj, + &cpu_hwmon_attribute_group); + +fail_sysfs_create_group_hwmon: + hwmon_device_unregister(cpu_hwmon_dev); + +fail_hwmon_device_register: + return ret; +} + +static void __exit loongson_hwmon_exit(void) +{ + cancel_delayed_work_sync(&thermal_work); + remove_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); + sysfs_remove_group(&cpu_hwmon_dev->kobj, + &cpu_hwmon_attribute_group); + hwmon_device_unregister(cpu_hwmon_dev); +} + +module_init(loongson_hwmon_init); +module_exit(loongson_hwmon_exit); + +MODULE_AUTHOR("Yu Xiang "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson CPU Hwmon driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/loongarch/generic-laptop.c b/drivers/platform/loongarch/generic-laptop.c new file mode 100644 index 000000000000..c1b829238ecc --- /dev/null +++ b/drivers/platform/loongarch/generic-laptop.c @@ -0,0 +1,777 @@ +/* + * Generic Loongson processor based LAPTOP/ALL-IN-ONE driver + * + * Jianmin Lv + * Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ACPI HIDs */ +#define LOONGSON_ACPI_HKEY_HID "LOON0000" +#define LOONGSON_ACPI_EC_HID "PNP0C09" + +/**************************************************************************** + * Main driver + */ + +#define ACPI_LAPTOP_VERSION "1.0" +#define ACPI_LAPTOP_NAME "loongson-laptop" +#define ACPI_LAPTOP_DESC "Loongson Laptop/all-in-one ACPI Driver" +#define ACPI_LAPTOP_FILE ACPI_LAPTOP_NAME "_acpi" +#define ACPI_LAPTOP_DRVR_NAME ACPI_LAPTOP_FILE +#define ACPI_LAPTOP_ACPI_EVENT_PREFIX "loongson" +/**************************************************************************** + * Driver-wide structs and misc. variables + */ + +struct generic_struct; + +struct generic_acpi_drv_struct { + u32 type; + acpi_handle *handle; + const struct acpi_device_id *hid; + struct acpi_device *device; + struct acpi_driver *driver; + void (*notify)(struct generic_struct *, u32); +}; + +struct generic_struct { + char *name; + + int (*init)(struct generic_struct *); + + struct generic_acpi_drv_struct *acpi; + + struct { + u8 acpi_driver_registered; + u8 acpi_notify_installed; + } flags; +}; + + +static struct { + u32 input_device_registered:1; +} generic_features; + +static int hotkey_status_get(int *status); + +/**************************************************************************** + **************************************************************************** + * + * ACPI Helpers and device model + * + **************************************************************************** + ****************************************************************************/ + +/************************************************************************* + * ACPI basic handles + */ + +static int acpi_evalf(acpi_handle handle, + int *res, char *method, char *fmt, ...); +static acpi_handle ec_handle; + +#define GENERIC_HANDLE(object, parent, paths...) \ + static acpi_handle object##_handle; \ + static const acpi_handle * const object##_parent __initconst = \ + &parent##_handle; \ + static char *object##_paths[] __initdata = { paths } + +GENERIC_HANDLE(hkey, ec, "\\_SB.HKEY", + "^HKEY", + "HKEY", + ); + +/************************************************************************* + * ACPI device model + */ + +#define GENERIC_ACPIHANDLE_INIT(object) \ + drv_acpi_handle_init(#object, &object##_handle, *object##_parent, \ + object##_paths, ARRAY_SIZE(object##_paths)) + +static void __init drv_acpi_handle_init(const char *name, + acpi_handle *handle, const acpi_handle parent, + char **paths, const int num_paths) +{ + int i; + acpi_status status; + + for (i = 0; i < num_paths; i++) { + status = acpi_get_handle(parent, paths[i], handle); + if (ACPI_SUCCESS(status)) + return; + } + + *handle = NULL; +} +static acpi_status __init generic_acpi_handle_locate_callback(acpi_handle handle, + u32 level, void *context, void **return_value) +{ + *(acpi_handle *)return_value = handle; + + return AE_CTRL_TERMINATE; +} + +static void __init generic_acpi_handle_locate(const char *name, + const char *hid, + acpi_handle *handle) +{ + acpi_status status; + acpi_handle device_found; + + BUG_ON(!name || !hid || !handle); + + memset(&device_found, 0, sizeof(device_found)); + status = acpi_get_devices(hid, generic_acpi_handle_locate_callback, + (void *)name, &device_found); + + *handle = NULL; + + if (ACPI_SUCCESS(status)) + *handle = device_found; +} + +static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data) +{ + struct generic_struct *sub_driver = data; + + if (!sub_driver || !sub_driver->acpi || !sub_driver->acpi->notify) + return; + sub_driver->acpi->notify(sub_driver, event); +} + +static int __init setup_acpi_notify(struct generic_struct *sub_driver) +{ + acpi_status status; + int rc; + + BUG_ON(!sub_driver->acpi); + + if (!*sub_driver->acpi->handle) + return 0; + + rc = acpi_bus_get_device(*sub_driver->acpi->handle, &sub_driver->acpi->device); + if (rc < 0) { + pr_err("acpi_bus_get_device(%s) failed: %d\n", sub_driver->name, rc); + return -ENODEV; + } + + sub_driver->acpi->device->driver_data = sub_driver; + sprintf(acpi_device_class(sub_driver->acpi->device), "%s/%s", + ACPI_LAPTOP_ACPI_EVENT_PREFIX, + sub_driver->name); + + status = acpi_install_notify_handler(*sub_driver->acpi->handle, + sub_driver->acpi->type, dispatch_acpi_notify, sub_driver); + if (ACPI_FAILURE(status)) { + if (status == AE_ALREADY_EXISTS) { + pr_notice("another device driver is already " + "handling %s events\n", sub_driver->name); + } else { + pr_err("acpi_install_notify_handler(%s) failed: %s\n", + sub_driver->name, acpi_format_exception(status)); + } + return -ENODEV; + } + sub_driver->flags.acpi_notify_installed = 1; + return 0; +} + +static int __init tpacpi_device_add(struct acpi_device *device) +{ + return 0; +} + +static struct input_dev *generic_inputdev; + +#ifdef CONFIG_PM +static int loongson_generic_suspend(struct device *dev) +{ + return 0; +} +static int loongson_generic_resume(struct device *dev) +{ + int status = 0; + struct key_entry ke; + + /* + * Only if the firmware supports SW_LID event model, we can handle the + * event. This is for the consideration of development board without + * EC. + */ + if (test_bit(SW_LID, generic_inputdev->swbit)) { + if (hotkey_status_get(&status)) + return -EIO; + /* + * The input device sw element records the last lid status. + * When the system is awakened by other wake-up sources, + * the lid event will also be reported. The judgment of + * adding SW_LID bit which in sw element can avoid this + * case. + * + * input system will drop lid event when current lid event + * value and last lid status in the same data set,which + * data set inclue zero set and no zero set. so laptop + * driver doesn't report repeated events. + * + * Lid status is generally 0, but hardware exception is + * considered. So add lid status confirmation. + */ + if (test_bit(SW_LID, generic_inputdev->sw) && !(status & (1 << SW_LID))) { + ke.type = KE_SW; + ke.sw.value = (u8)status; + ke.sw.code = SW_LID; + sparse_keymap_report_entry(generic_inputdev, &ke, + 1, true); + } + } + return 0; +} + +static SIMPLE_DEV_PM_OPS(loongson_generic_pm, loongson_generic_suspend, + loongson_generic_resume); +#endif + +static int __init register_generic_subdriver(struct generic_struct *sub_driver) +{ + int rc; + + BUG_ON(!sub_driver->acpi); + + sub_driver->acpi->driver = kzalloc(sizeof(struct acpi_driver), GFP_KERNEL); + if (!sub_driver->acpi->driver) { + pr_err("failed to allocate memory for ibm->acpi->driver\n"); + return -ENOMEM; + } + + sprintf(sub_driver->acpi->driver->name, "%s_%s", ACPI_LAPTOP_NAME, sub_driver->name); + sub_driver->acpi->driver->ids = sub_driver->acpi->hid; + sub_driver->acpi->driver->ops.add = &tpacpi_device_add; +#ifdef CONFIG_PM + sub_driver->acpi->driver->drv.pm = &loongson_generic_pm; +#endif + rc = acpi_bus_register_driver(sub_driver->acpi->driver); + if (rc < 0) { + pr_err("acpi_bus_register_driver(%s) failed: %d\n", + sub_driver->name, rc); + kfree(sub_driver->acpi->driver); + sub_driver->acpi->driver = NULL; + } else if (!rc) + sub_driver->flags.acpi_driver_registered = 1; + + return rc; +} + +/* + * Loongson generic laptop firmware event model + */ + +#define GENERIC_HOTKEY_MAP_MAX 64 +#define METHOD_NAME__KMAP "KMAP" +static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX]; +static int hkey_map(void) +{ + struct acpi_buffer buf; + union acpi_object *pack; + acpi_status status; + u32 index; + + buf.length = ACPI_ALLOCATE_BUFFER; + status = acpi_evaluate_object_typed(hkey_handle, METHOD_NAME__KMAP, NULL, &buf, ACPI_TYPE_PACKAGE); + if (status != AE_OK) { + printk(KERN_ERR ": ACPI exception: %s\n", + acpi_format_exception(status)); + return -1; + } + pack = buf.pointer; + for (index = 0; index < pack->package.count; index++) { + union acpi_object *sub_pack = &pack->package.elements[index]; + union acpi_object *element = &sub_pack->package.elements[0]; + + hotkey_keycode_map[index].type = element->integer.value; + element = &sub_pack->package.elements[1]; + hotkey_keycode_map[index].code = element->integer.value; + element = &sub_pack->package.elements[2]; + hotkey_keycode_map[index].keycode = element->integer.value; + } + return 0; +} + +static int hotkey_backlight_set(bool enable) +{ + if (!acpi_evalf(hkey_handle, NULL, "VCBL", "vd", enable ? 1 : 0)) + return -EIO; + + return 0; +} + +static int __init event_init(struct generic_struct *sub_driver) +{ + int ret; + + GENERIC_ACPIHANDLE_INIT(hkey); + ret = hkey_map(); + if (ret) { + printk(KERN_ERR "Fail to parse keymap from DSDT.\n"); + return ret; + } + + ret = sparse_keymap_setup(generic_inputdev, hotkey_keycode_map, NULL); + if (ret) { + printk(KERN_ERR "Fail to setup input device keymap\n"); + input_free_device(generic_inputdev); + + return ret; + } + + /* + * This hotkey driver handle backlight event when + * acpi_video_get_backlight_type() gets acpi_backlight_vendor + */ + if (acpi_video_get_backlight_type() != acpi_backlight_vendor) + hotkey_backlight_set(false); + else + hotkey_backlight_set(true); + + printk("ACPI:enabling firmware HKEY event interface...\n"); + return ret; + +} + +#define GENERIC_EVENT_TYPE_OFF 12 +#define GENERIC_EVENT_MASK 0xFFF +#define TPACPI_MAX_ACPI_ARGS 3 +static int acpi_evalf(acpi_handle handle, + int *res, char *method, char *fmt, ...) +{ + char *fmt0 = fmt; + struct acpi_object_list params; + union acpi_object in_objs[TPACPI_MAX_ACPI_ARGS]; + struct acpi_buffer result, *resultp; + union acpi_object out_obj; + acpi_status status; + va_list ap; + char res_type; + int success; + int quiet; + + if (!*fmt) { + pr_err("acpi_evalf() called with empty format\n"); + return 0; + } + + if (*fmt == 'q') { + quiet = 1; + fmt++; + } else + quiet = 0; + + res_type = *(fmt++); + + params.count = 0; + params.pointer = &in_objs[0]; + + va_start(ap, fmt); + while (*fmt) { + char c = *(fmt++); + switch (c) { + case 'd': /* int */ + in_objs[params.count].integer.value = va_arg(ap, int); + in_objs[params.count++].type = ACPI_TYPE_INTEGER; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", + c); + va_end(ap); + return 0; + } + } + va_end(ap); + + if (res_type != 'v') { + result.length = sizeof(out_obj); + result.pointer = &out_obj; + resultp = &result; + } else + resultp = NULL; + + status = acpi_evaluate_object(handle, method, ¶ms, resultp); + + switch (res_type) { + case 'd': /* int */ + success = (status == AE_OK && + out_obj.type == ACPI_TYPE_INTEGER); + if (success && res) + *res = out_obj.integer.value; + break; + case 'v': /* void */ + success = status == AE_OK; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", + res_type); + return 0; + } + + if (!success && !quiet) + pr_err("acpi_evalf(%s, %s, ...) failed: %s\n", + method, fmt0, acpi_format_exception(status)); + + return success; +} + +int ec_get_brightness(void) +{ + int status = 0; + + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, &status, "ECBG", "d")) + return -EIO; + + if (status < 0) + return status; + + return status; +} +EXPORT_SYMBOL(ec_get_brightness); + +int ec_set_brightness(int level) +{ + + int ret = 0; + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, NULL, "ECBS", "vd", level)) + ret = -EIO; + + return ret; +} +EXPORT_SYMBOL(ec_set_brightness); + +int ec_backlight_level(u8 level) +{ + int status = 0; + + if (!hkey_handle) + return -ENXIO; + + if (!acpi_evalf(hkey_handle, &status, "ECLL", "d")) + return -EIO; + + if ((status < 0) || (level > status)) + return status; + + if (!acpi_evalf(hkey_handle, &status, "ECSL", "d")) + return -EIO; + + if ((status < 0) || (level < status)) + return status; + + return level; +} +EXPORT_SYMBOL(ec_backlight_level); + +static int loongson_laptop_backlight_update(struct backlight_device *bd) +{ + int lvl = ec_backlight_level(bd->props.brightness); + if (lvl < 0) + return -EIO; + if (ec_set_brightness(lvl)) + return -EIO; + + return 0; +} + +static int loongson_laptop_get_brightness(struct backlight_device *bd) +{ + u8 __maybe_unused level; + + level = ec_get_brightness(); + if (level < 0) + return -EIO; + + return level; +} + +static const struct backlight_ops backlight_laptop_ops = { + .update_status = loongson_laptop_backlight_update, + .get_brightness = loongson_laptop_get_brightness, +}; + +static int laptop_backlight_register(void) +{ + int status = 0; + struct backlight_properties props; + + memset(&props, 0, sizeof(props)); + props.type = BACKLIGHT_RAW; + + if (!acpi_evalf(hkey_handle, &status, "ECLL", "d")) + return -EIO; + + props.max_brightness = status; + props.brightness = 1; + + backlight_device_register("loongson_laptop", + NULL, NULL, &backlight_laptop_ops, &props); + + return 0; +} + +int turn_off_backlight(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + + arg0.integer.value = 0; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} + +int turn_on_backlight(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + + arg0.integer.value = 1; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} + +static int hotkey_status_get(int *status) +{ + if (!acpi_evalf(hkey_handle, status, "GSWS", "d")) + return -EIO; + + return 0; +} + +static void event_notify(struct generic_struct *sub_driver, u32 event) +{ + struct key_entry *ke = NULL; + int scan_code = event & GENERIC_EVENT_MASK; + int type = (event >> GENERIC_EVENT_TYPE_OFF) & 0xF; + + ke = sparse_keymap_entry_from_scancode(generic_inputdev, scan_code); + if (ke) { + if (type == KE_SW) { + int status = 0; + + if (hotkey_status_get(&status)) + return; + ke->sw.value = !!(status & (1 << ke->sw.code)); + } + sparse_keymap_report_entry(generic_inputdev, ke, 1, true); + } +} + +static const struct acpi_device_id loongson_htk_device_ids[] = { + {LOONGSON_ACPI_HKEY_HID, 0}, + {"", 0}, +}; + +static struct generic_acpi_drv_struct ec_event_acpidriver = { + .hid = loongson_htk_device_ids, + .notify = event_notify, + .handle = &hkey_handle, + .type = ACPI_DEVICE_NOTIFY, +}; + +/**************************************************************************** + **************************************************************************** + * + * Infrastructure + * + **************************************************************************** + ****************************************************************************/ +static void generic_exit(struct generic_struct *sub_driver) +{ + + if (sub_driver->flags.acpi_notify_installed) { + BUG_ON(!sub_driver->acpi); + acpi_remove_notify_handler(*sub_driver->acpi->handle, + sub_driver->acpi->type, + dispatch_acpi_notify); + sub_driver->flags.acpi_notify_installed = 0; + } + + if (sub_driver->flags.acpi_driver_registered) { + BUG_ON(!sub_driver->acpi); + acpi_bus_unregister_driver(sub_driver->acpi->driver); + kfree(sub_driver->acpi->driver); + sub_driver->acpi->driver = NULL; + sub_driver->flags.acpi_driver_registered = 0; + } + +} + +static int __init probe_for_generic(void) +{ + if (acpi_disabled) + return -ENODEV; + + /* The EC handler is required */ + generic_acpi_handle_locate("ec", LOONGSON_ACPI_EC_HID, &ec_handle); + if (!ec_handle) { + pr_err("Not yet supported Loongson Generic Laptop/All-in-one detected!\n"); + return -ENODEV; + } + + return 0; +} +static int __init generic_subdriver_init(struct generic_struct *sub_driver) +{ + int ret; + + BUG_ON(sub_driver == NULL); + + if (sub_driver->init) + sub_driver->init(sub_driver); + + if (sub_driver->acpi) { + if (sub_driver->acpi->hid) { + ret = register_generic_subdriver(sub_driver); + if (ret) + goto err_out; + } + + if (sub_driver->acpi->notify) { + ret = setup_acpi_notify(sub_driver); + if (ret == -ENODEV) { + ret = 0; + goto err_out; + } + if (ret < 0) + goto err_out; + } + } + + return 0; + +err_out: + generic_exit(sub_driver); + return (ret < 0) ? ret : 0; +} + +/* Module init, exit, parameters */ +static struct generic_struct generic_sub_drivers[] __initdata = { + { + .name = "EC Event", + .init = event_init, + .acpi = &ec_event_acpidriver, + }, +}; + +static void generic_acpi_laptop_exit(void); + +static int __init generic_acpi_laptop_init(void) +{ + int ret, i; + int status; + ret = probe_for_generic(); + if (ret) { + generic_acpi_laptop_exit(); + return ret; + } + generic_inputdev = input_allocate_device(); + if (!generic_inputdev) { + pr_err("unable to allocate input device\n"); + generic_acpi_laptop_exit(); + return -ENOMEM; + } + + /* Prepare input device, but don't register */ + generic_inputdev->name = + "Loongson Generic Laptop/All-in-one Extra Buttons"; + generic_inputdev->phys = ACPI_LAPTOP_DRVR_NAME "/input0"; + generic_inputdev->id.bustype = BUS_HOST; + generic_inputdev->dev.parent = NULL; + + /* Init subdrivers */ + for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) { + ret = generic_subdriver_init(&generic_sub_drivers[i]); + if (ret < 0) { + generic_acpi_laptop_exit(); + return ret; + } + } + + ret = input_register_device(generic_inputdev); + if (ret < 0) { + pr_err("unable to register input device\n"); + generic_acpi_laptop_exit(); + return ret; + } + + generic_features.input_device_registered = 1; + + if (acpi_evalf(hkey_handle, &status, "ECBG", "d")) { + pr_info("Loongson Laptop used, init brightness is 0x%x\n", status); + ret = laptop_backlight_register(); + if (ret < 0) + pr_err("Loongson Laptop: laptop-backlight device register failed\n"); + } + + return 0; +} + +static void __exit generic_acpi_laptop_exit(void) +{ + if (generic_inputdev) { + if (generic_features.input_device_registered) + input_unregister_device(generic_inputdev); + else + input_free_device(generic_inputdev); + } +} + +module_init(generic_acpi_laptop_init); +module_exit(generic_acpi_laptop_exit); + +MODULE_ALIAS("platform:acpi-laptop"); +MODULE_AUTHOR("lvjianmin "); +MODULE_DESCRIPTION(ACPI_LAPTOP_DESC); +MODULE_VERSION(ACPI_LAPTOP_VERSION); +MODULE_LICENSE("GPL"); -- Gitee From 84a617c9117dceee7897c0cd6d5bd91c315aeb4f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 23 Sep 2021 20:22:57 +0800 Subject: [PATCH 52/59] LoongArch: Add LS7A IOMMU support Add driver support for the IOMMU in LS7A. If you need to enable it, please add the loongson_iommu=on kernel parameter. Change-Id: Ia59927ceb1307af5c82623fffcd92584760c0770 Signed-off-by: Huacai Chen --- drivers/iommu/Kconfig | 1 + drivers/iommu/Makefile | 2 +- drivers/iommu/loongson/Kconfig | 14 + drivers/iommu/loongson/Makefile | 2 + drivers/iommu/loongson/iommu.c | 1373 +++++++++++++++++++++++++++++++ drivers/iommu/loongson/iommu.h | 210 +++++ drivers/vfio/Kconfig | 2 +- include/linux/pci_ids.h | 1 + 8 files changed, 1603 insertions(+), 2 deletions(-) create mode 100644 drivers/iommu/loongson/Kconfig create mode 100644 drivers/iommu/loongson/Makefile create mode 100644 drivers/iommu/loongson/iommu.c create mode 100644 drivers/iommu/loongson/iommu.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d97e38bfbe4b..9812e107eb51 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -137,6 +137,7 @@ config MSM_IOMMU source "drivers/iommu/amd/Kconfig" source "drivers/iommu/intel/Kconfig" source "drivers/iommu/sw64/Kconfig" +source "drivers/iommu/loongson/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 1e7519f56afb..af0da5e0f9a6 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += amd/ intel/ arm/ sw64/ +obj-y += amd/ intel/ arm/ sw64/ loongson/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o diff --git a/drivers/iommu/loongson/Kconfig b/drivers/iommu/loongson/Kconfig new file mode 100644 index 000000000000..7b5d1b947c03 --- /dev/null +++ b/drivers/iommu/loongson/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Loongson IOMMU support +config LOONGSON_IOMMU + bool "Loongson IOMMU support" + select IOMMU_API + select IOMMU_DMA + select IOMMU_DEFAULT_PASSTHROUGH + depends on LOONGARCH + help + With this option you can enable support for Loongson IOMMU hardware in + your system. An IOMMU is a hardware component which provides remapping + of DMA memory accesses from devices. With an IOMMU you can isolate the + DMA memory of different devices and protect the system from misbehaving + device drivers or hardware. diff --git a/drivers/iommu/loongson/Makefile b/drivers/iommu/loongson/Makefile new file mode 100644 index 000000000000..99e38e56ebc2 --- /dev/null +++ b/drivers/iommu/loongson/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_LOONGSON_IOMMU) += iommu.o diff --git a/drivers/iommu/loongson/iommu.c b/drivers/iommu/loongson/iommu.c new file mode 100644 index 000000000000..a73c4326782b --- /dev/null +++ b/drivers/iommu/loongson/iommu.c @@ -0,0 +1,1373 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iommu.h" + +#define LOOP_TIMEOUT 100000 +#define IOVA_START (SZ_256M) +#define IOVA_END0 (SZ_2G + SZ_256M) + +/* Lock for domain allocing */ +static DEFINE_SPINLOCK(domain_bitmap_lock); + +/* Lock for priv->list */ +static DEFINE_SPINLOCK(loongson_iommu_priv_lock); + +/* Lock for bitmap of page table */ +static DEFINE_SPINLOCK(pgtable_bitmap_lock); + +/* Lock for iommu page table */ +static DEFINE_SPINLOCK(loongson_iommu_pgtlock); + +/* List of all domain privates */ +static LIST_HEAD(loongson_iommu_priv_list); + +/* List of all available dev_data structures */ +static LIST_HEAD(loongson_dev_data_list); + +/* List of shadow page table */ +static LIST_HEAD(iommu_shadow_pglist); + +/* Bitmap of global domains */ +void *loongson_iommu_domain_alloc_bitmap; + +/* Bitmap of devtable and pages for page table */ +void *loongson_iommu_devtable_bitmap; +void *loongson_iommu_pgtable_alloc_bitmap; + +unsigned long iommu_mem_base; +unsigned long iommu_pgt_base; +static struct iommu_ops loongson_iommu_ops; + +int loongson_iommu_disable = 1; + +static void iommu_write_regl(unsigned long off, u32 val) +{ + *(u32 *)(iommu_mem_base + off) = val; + __sync(); +} + +static u32 iommu_read_regl(unsigned long off) +{ + u32 val; + + val = *(u32 *)(iommu_mem_base + off); + __sync(); + return val; +} + +static void iommu_translate_disable(void) +{ + u32 val = iommu_read_regl(LA_IOMMU_EIVDB); + + /* Disable */ + val &= ~(1 << 31); + iommu_write_regl(LA_IOMMU_EIVDB, val); + + /* Write cmd */ + val = iommu_read_regl(LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(LA_IOMMU_CMD, val); +} + +static void iommu_translate_enable(void) +{ + u32 val = 0; + + val = iommu_read_regl(LA_IOMMU_EIVDB); + + /* Enable */ + val |= (1 << 31); + iommu_write_regl(LA_IOMMU_EIVDB, val); + + /* Write cmd */ + val = iommu_read_regl(LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(LA_IOMMU_CMD, val); +} + +static bool loongson_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static loongson_iommu_priv *to_loongson_iommu_priv(struct iommu_domain *dom) +{ + return container_of(dom, loongson_iommu_priv, domain); +} + +/* + * Check whether the system has a priv. + * If yes, it returns 1 and if not, it returns 0 + */ +static int check_has_priv(void) +{ + spin_lock(&loongson_iommu_priv_lock); + while (!list_empty(&loongson_iommu_priv_list)) { + spin_unlock(&loongson_iommu_priv_lock); + return 1; + } + spin_unlock(&loongson_iommu_priv_lock); + + return 0; +} + +static int update_dev_table(u16 domain_id, + struct loongson_iommu_dev_data *dev_data, int flag) +{ + u32 val = 0; + int index; + unsigned short bdf; + + bdf = dev_data->bdf; + + /* Set device table */ + if (flag) { + index = find_first_zero_bit(loongson_iommu_devtable_bitmap, + MAX_ATTACHED_DEV_ID); + if (index < MAX_ATTACHED_DEV_ID) { + __set_bit(index, loongson_iommu_devtable_bitmap); + dev_data->index = index; + } + + val = bdf & 0xffff; + val |= ((domain_id & 0xf) << 16); /* domain id */ + val |= ((index & 0xf) << 24); /* index */ + val |= (0x1 << 20); /* valid */ + val |= (0x1 << 31); /* enable */ + iommu_write_regl(LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(LA_IOMMU_CMD, val); + } else { + /* Flush device table */ + index = dev_data->index; + + val = iommu_read_regl(LA_IOMMU_EIVDB); + val &= ~(0x7fffffff); + val |= ((index & 0xf) << 24); /* index */ + iommu_write_regl(LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(LA_IOMMU_CMD, val); + + if (index < MAX_ATTACHED_DEV_ID) + __clear_bit(index, loongson_iommu_devtable_bitmap); + } + + return 0; +} + +static void flush_iotlb(void) +{ + u32 val, cmd; + + val = iommu_read_regl(LA_IOMMU_VBTC); + val &= ~0x1f; + + /* Flush all tlb */ + val |= 0x5; + iommu_write_regl(LA_IOMMU_VBTC, val); + + cmd = iommu_read_regl(LA_IOMMU_CMD); + cmd &= 0xfffffffc; + iommu_write_regl(LA_IOMMU_CMD, cmd); +} + +static int flush_pgtable_is_busy(void) +{ + u32 val = iommu_read_regl(LA_IOMMU_VBTC); + + return val & IOMMU_PGTABLE_BUSY; +} + +static int __iommu_flush_iotlb_all(loongson_iommu_priv *priv) +{ + u32 retry = 0; + + flush_iotlb(); + while (flush_pgtable_is_busy()) { + if (retry == LOOP_TIMEOUT) { + pr_err("Loongson-IOMMU: iotlb flush busy\n"); + return -EIO; + } + retry++; + udelay(1); + } + iommu_translate_enable(); + + return 0; +} + +static void priv_flush_iotlb_pde(loongson_iommu_priv *priv) +{ + __iommu_flush_iotlb_all(priv); +} + +static void do_attach(struct loongson_iommu_priv *priv, + struct loongson_iommu_dev_data *dev_data) +{ + if (!dev_data->count) + return; + + dev_data->priv = priv; + list_add(&dev_data->list, &priv->dev_list); + priv->dev_cnt += 1; + + update_dev_table(priv->id, dev_data, 1); + if (priv->dev_cnt > 0) + priv_flush_iotlb_pde(priv); +} + +static void do_detach(struct loongson_iommu_priv *priv, + struct loongson_iommu_dev_data *dev_data) +{ + if (dev_data->count) + return; + + list_del(&dev_data->list); + priv->dev_cnt -= 1; + update_dev_table(priv->id, dev_data, 0); + + dev_data->priv = NULL; +} + +static void cleanup_domain(struct loongson_iommu_priv *priv) +{ + struct loongson_iommu_dev_data *entry; + + spin_lock(&priv->devlock); + + while (!list_empty(&priv->dev_list)) { + entry = list_first_entry(&priv->dev_list, + struct loongson_iommu_dev_data, list); + do_detach(priv, entry); + } + + spin_unlock(&priv->devlock); +} + +static int domain_id_alloc(void) +{ + int id = -1; + + spin_lock(&domain_bitmap_lock); + id = find_first_zero_bit(loongson_iommu_domain_alloc_bitmap, MAX_DOMAIN_ID); + if (id < MAX_DOMAIN_ID) + __set_bit(id, loongson_iommu_domain_alloc_bitmap); + else + pr_err("Loongson-IOMMU: Alloc domain id over max domain id\n"); + + spin_unlock(&domain_bitmap_lock); + + return id; +} + +static void domain_id_free(int id) +{ + spin_lock(&domain_bitmap_lock); + if ((id >= 0) && (id < MAX_DOMAIN_ID)) + __clear_bit(id, loongson_iommu_domain_alloc_bitmap); + + spin_unlock(&domain_bitmap_lock); +} + +/* + * * This function adds a private domain to the global domain list + */ +static void add_domain_to_list(struct loongson_iommu_priv *priv) +{ + spin_lock(&loongson_iommu_priv_lock); + list_add(&priv->list, &loongson_iommu_priv_list); + spin_unlock(&loongson_iommu_priv_lock); +} + +static void del_domain_from_list(struct loongson_iommu_priv *priv) +{ + spin_lock(&loongson_iommu_priv_lock); + list_del(&priv->list); + spin_unlock(&loongson_iommu_priv_lock); +} + +static void iommu_pgd_init(void *pgd) +{ + memset(pgd, 0x0, IOMMU_PAGE_SIZE); +} + +static void iommu_pmd_init(void *pmd) +{ + memset(pmd, 0x0, IOMMU_PAGE_SIZE); +} + +static inline int iommu_pgd_none(unsigned long *pgd) +{ + return *pgd == 0x0; +} + +static inline int iommu_pmd_none(unsigned long *pmd) +{ + return *pmd == 0x0; +} + +static shadow_pg_entry *iommu_zalloc_page(struct loongson_iommu_priv *priv) +{ + int index = 0; + unsigned long addr; + iommu_pte *new_pg; + shadow_pg_entry *entry; + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) { + kfree(entry); + return NULL; + } + + spin_lock(&pgtable_bitmap_lock); + index = find_first_zero_bit(loongson_iommu_pgtable_alloc_bitmap, + MAX_PAGES_NUM); + if (index < MAX_PAGES_NUM) { + addr = iommu_pgt_base + index * IOMMU_PAGE_SIZE; + new_pg = (iommu_pte *)(addr); + memset(new_pg, 0x0, IOMMU_PAGE_SIZE); + entry->index = index; + + addr = get_zeroed_page(GFP_ATOMIC); + entry->va = (unsigned long *)(addr); + if (!entry->va) { + spin_unlock(&pgtable_bitmap_lock); + pr_err("Loongson-IOMMU: get zeroed page err\n"); + kfree(entry); + return NULL; + } + __set_bit(index, loongson_iommu_pgtable_alloc_bitmap); + + entry->pa = virt_to_phys(entry->va) & IOMMU_PAGE_MASK; + list_add_tail(&entry->pglist, &iommu_shadow_pglist); + + priv->used_pages++; + } else { + pr_err("Loongson-IOMMU: not enough memory for iommu page table\n"); + kfree(entry); + return NULL; + } + spin_unlock(&pgtable_bitmap_lock); + + return entry; +} + +static void iommu_free_page(struct loongson_iommu_priv *priv, shadow_pg_entry *entry) +{ + unsigned long addr; + + spin_lock(&pgtable_bitmap_lock); + if (entry->index < MAX_PAGES_NUM) { + addr = iommu_pgt_base + entry->index * IOMMU_PAGE_SIZE; + memset((void *)(addr), 0x0, IOMMU_PAGE_SIZE); + __clear_bit(entry->index, loongson_iommu_pgtable_alloc_bitmap); + entry->index = -1; + free_page((unsigned long)entry->va); + list_del(&entry->pglist); + kfree(entry); + priv->used_pages--; + } + spin_unlock(&pgtable_bitmap_lock); +} + +static shadow_pg_entry *index_to_pg_entry(int index) +{ + struct shadow_pg_entry *entry; + + list_for_each_entry(entry, &iommu_shadow_pglist, pglist) { + if (entry->index == index) + return entry; + } + + return NULL; +} + +static shadow_pg_entry *pa_to_pg_entry(unsigned long pa) +{ + struct shadow_pg_entry *entry; + + list_for_each_entry(entry, &iommu_shadow_pglist, pglist) { + if (entry->pa == pa) + return entry; + } + + return NULL; +} + +static void free_pagetable(struct loongson_iommu_priv *priv, void *vaddr) +{ + unsigned long *pgd, *shd_pgd_entry, *shd_pmd_entry, *shd_tmp_pmd_entry; + int index, i, j; + shadow_pg_entry *entry, *entry1; + unsigned long pa, tmp; + + pgd = (unsigned long *)vaddr; + index = ((unsigned long)pgd - iommu_pgt_base) / IOMMU_PAGE_SIZE; + entry = index_to_pg_entry(index); + if (!entry) { + pr_err("Loongson-IOMMU: find err index:%d, pgd:0x%lx\n", + index, (unsigned long)pgd); + return; + } + + for (i = 0; i < IOMMU_PTRS_PER_PGD; i++) { + + shd_pgd_entry = entry->va + i; + + if (!IOMMU_PTE_PRESENT(*shd_pgd_entry)) + continue; + + tmp = (*shd_pgd_entry) & IOMMU_PAGE_MASK; + shd_pmd_entry = (unsigned long *)iommu_phys_to_virt(tmp); + + if (!priv->is_hugepage) { + for (j = 0; j < IOMMU_PTRS_PER_PMD; j++) { + + shd_tmp_pmd_entry = shd_pmd_entry + j; + + if (!IOMMU_PTE_PRESENT(*shd_tmp_pmd_entry)) + continue; + + pa = (unsigned long)((*shd_tmp_pmd_entry) & IOMMU_PAGE_MASK); + entry1 = pa_to_pg_entry(pa); + if (!entry1) { + pr_err("Loongson-IOMMU: find err id:%d,pgd:0x%lx\n", + priv->id, (unsigned long)priv->pgd); + continue; + } + iommu_free_page(priv, entry1); + } + } + + /* Free pmd page */ + pa = (unsigned long)((*shd_pgd_entry) & IOMMU_PAGE_MASK); + entry1 = pa_to_pg_entry(pa); + if (!entry1) { + pr_err("Loongson-IOMMU: find err id:%d,pgd:0x%lx\n", + priv->id, (unsigned long)priv->pgd); + continue; + } + iommu_free_page(priv, entry1); + } + + /* Free pgd page */ + iommu_free_page(priv, entry); +} + +static int loongson_iommu_priv_init(struct loongson_iommu_priv *priv) +{ + int index; + unsigned long pgd_pa; + u32 dir_ctrl, pgd_lo, pgd_hi; + shadow_pg_entry *entry; + + spin_lock_init(&priv->devlock); + + /* Alloc pgd page,set base in register */ + spin_lock(&loongson_iommu_pgtlock); + + entry = iommu_zalloc_page(priv); + if (!entry) { + spin_unlock(&loongson_iommu_pgtlock); + pr_err("Loongson-IOMMU: alloc shadow page entry err\n"); + return -ENOMEM; + } + index = entry->index; + priv->pgd = (iommu_pte *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + spin_unlock(&loongson_iommu_pgtlock); + + dir_ctrl = (IOMMU_PGDIR_WIDTH << 26) | (IOMMU_PGDIR_SHIFT << 20); + dir_ctrl |= (IOMMU_PMD_WIDTH << 16) | (IOMMU_PMD_SHIFT << 10); + dir_ctrl |= (IOMMU_PTE_WIDTH << 6) | IOMMU_PAGE_SHIFT; + + pgd_pa = iommu_gmem_virt_to_phys((unsigned long)priv->pgd); + + pgd_hi = pgd_pa >> 32; + pgd_lo = pgd_pa & 0xffffffff; + + iommu_write_regl(LA_IOMMU_DIR_CTRL(priv->id), dir_ctrl); + iommu_write_regl(LA_IOMMU_PGD_HI(priv->id), pgd_hi); + iommu_write_regl(LA_IOMMU_PGD_LO(priv->id), pgd_lo); + + INIT_LIST_HEAD(&priv->dev_list); + + /* 0x10000000~0x8fffffff */ + priv->virtio_pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL, 6); + if ((!priv->virtio_pgtable)) { + pr_err("Loongson-IOMMU: get free page err\n"); + goto fail_nomem; + } + memset(priv->virtio_pgtable, 0x0, LA_VIRTIO_PAGE_SIZE * 64); + + iommu_pgd_init(priv->pgd); + iommu_pgd_init(entry->va); + + return 0; + +fail_nomem: + spin_lock(&loongson_iommu_pgtlock); + free_pagetable(priv, priv->pgd); + spin_unlock(&loongson_iommu_pgtlock); + priv->pgd = 0; + + return -ENOMEM; +} + +static loongson_iommu_priv *loongson_iommu_alloc_priv(void) +{ + loongson_iommu_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + + if (!priv) + return NULL; + + priv->id = domain_id_alloc(); + + if (loongson_iommu_priv_init(priv)) + goto out_err; + + add_domain_to_list(priv); + + return priv; + +out_err: + kfree(priv); + + return NULL; + +} + +static void loongson_iommu_priv_free(struct loongson_iommu_priv *priv) +{ + if (!priv) + return; + + /* 0x10000000~0x8fffffff */ + if (priv->virtio_pgtable) { + free_pages((unsigned long)priv->virtio_pgtable, 6); + priv->virtio_pgtable = NULL; + } + + del_domain_from_list(priv); + + if ((priv->id >= 0) && (priv->id < MAX_DOMAIN_ID)) { + domain_id_free(priv->id); + kfree(priv); + } +} + +static struct iommu_domain *loongson_iommu_domain_alloc(unsigned type) +{ + loongson_iommu_priv *priv; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + priv = loongson_iommu_alloc_priv(); + if (!priv) + return NULL; + + priv->domain.geometry.aperture_start = 0; + priv->domain.geometry.aperture_end = ~0ULL; + priv->domain.geometry.force_aperture = true; + + break; + default: + return NULL; + } + + return &priv->domain; +} + +static void loongson_iommu_domain_free(struct iommu_domain *domain) +{ + + loongson_iommu_priv *priv; + + priv = to_loongson_iommu_priv(domain); + if (!priv) { + pr_info("Loongson-IOMMU: priv is null\n"); + return; + } + + if (priv->dev_cnt > 0) + cleanup_domain(priv); + + spin_lock(&loongson_iommu_pgtlock); + free_pagetable(priv, priv->pgd); + spin_unlock(&loongson_iommu_pgtlock); + + loongson_iommu_priv_free(priv); + __iommu_flush_iotlb_all(priv); + if (!check_has_priv()) + iommu_translate_disable(); +} + +static int iommu_init_device(struct device *dev) +{ + unsigned char busnum; + unsigned short bdf, devid; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + struct loongson_iommu_dev_data *dev_data; + + bdf = pdev->devfn & 0xff; + busnum = bus->number; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + + list_for_each_entry(dev_data, &loongson_dev_data_list, glist) { + if (dev_data->bdf == bdf) { + pr_info("Loonsgon-IOMMU: bdf:0x%x has added\n", bdf); + return 0; + } + } + + } + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + devid = PCI_DEVID(bus->number, bdf); + + dev_data->bdf = devid; + /* The initial state is 0, and 1 is added only when attach dev */ + dev_data->count = 0; + + dev_iommu_priv_set(dev, dev_data); + list_add_tail(&dev_data->glist, &loongson_dev_data_list); + + return 0; +} + +static struct iommu_device *loongson_iommu_probe_device(struct device *dev) +{ + int ret = 0; + + ret = iommu_init_device(dev); + if (ret < 0) + pr_err("Loongson-IOMMU: unable to alloc memory for dev_data\n"); + + return 0; +} + +static struct iommu_group *loongson_iommu_device_group(struct device *dev) +{ + struct iommu_group *group; + + /* + * We don't support devices sharing stream IDs other than PCI RID + * aliases, since the necessary ID-to-device lookup becomes rather + * impractical given a potential sparse 32-bit stream ID space. + */ + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + + return group; +} + +static void loongson_iommu_release_device(struct device *dev) +{ + struct loongson_iommu_dev_data *dev_data; + + dev_data = dev_iommu_priv_get(dev); + if (dev_data) { + list_del(&dev_data->glist); + kfree(dev_data); + } +} + +static struct loongson_iommu_dev_data *iommu_get_devdata(loongson_iommu_priv *priv, + unsigned long bdf) +{ + struct loongson_iommu_dev_data *dev_data; + + /* Find from priv list */ + list_for_each_entry(dev_data, &priv->dev_list, list) { + if (dev_data->bdf == bdf) + return dev_data; + } + + return NULL; +} + +static int loongson_iommu_attach_dev(struct iommu_domain *domain, + struct device *dev) +{ + unsigned short bdf; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned char busnum = pdev->bus->number; + struct loongson_iommu_dev_data *dev_data; + loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + bdf = pdev->devfn & 0xff; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + } + + spin_lock(&priv->devlock); + dev_data = iommu_get_devdata(priv, bdf); + spin_unlock(&priv->devlock); + + if (dev_data) { + dev_data->count++; + pr_info("Loongson-IOMMU: bdf 0x%x devfn %x has attached,count:0x%x\n", + bdf, pdev->devfn, dev_data->count); + return 0; + } + + dev_data = dev_iommu_priv_get(dev); + dev_data->count++; + spin_lock(&priv->devlock); + do_attach(priv, dev_data); + spin_unlock(&priv->devlock); + + return 0; +} + +static void loongson_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + unsigned short bdf; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned char busnum = pdev->bus->number; + struct loongson_iommu_dev_data *dev_data; + loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + bdf = pdev->devfn & 0xff; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + } + + spin_lock(&priv->devlock); + dev_data = iommu_get_devdata(priv, bdf); + spin_unlock(&priv->devlock); + + if (dev_data == NULL) { + pr_info("Loongson-IOMMU: bdf 0x%x devfn %x count:0x%x, domainid:%d\n", + bdf, pdev->devfn & 0xff, dev_data->count, priv->id); + return; + } + + dev_data->count--; + + spin_lock(&priv->devlock); + do_detach(priv, dev_data); + spin_unlock(&priv->devlock); +} + +static unsigned long *loongson_iommu_walk_pgd(struct loongson_iommu_priv *priv, + iommu_pte *pgd, unsigned long iova) +{ + int index; + unsigned long va, pa; + unsigned long *pgd_entry, *shd_pgd_entry, *pmd_base; + unsigned long *pmd_entry, *shd_pmd_entry, *pte, *pte_base; + shadow_pg_entry *entry, *entry1, *new_entry; + + index = ((unsigned long)pgd - iommu_pgt_base) / IOMMU_PAGE_SIZE; + entry = index_to_pg_entry(index); + if (!entry) { + pr_err("Loongson-IOMMU: pgd:0x%lx, iova:0x%lx, index:%d\n", + (unsigned long)pgd, iova, index); + return NULL; + } + + va = (unsigned long)entry->va; + shd_pgd_entry = (unsigned long *)iommu_pgd_offset(va, iova); + pgd_entry = (unsigned long *)iommu_pgd_offset((unsigned long)pgd, iova); + + if (priv->is_hugepage) + return pmd_entry; + + if (iommu_pgd_none(shd_pgd_entry)) { + iommu_pte *new_pmd; + + new_entry = iommu_zalloc_page(priv); + if (!new_entry) { + pr_err("Loongson-IOMMU: new_entry alloc err iova:0x%lx\n", + iova); + return NULL; + } + index = new_entry->index; + new_pmd = (iommu_pte *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + + iommu_pmd_init(new_entry->va); + iommu_pmd_init(new_pmd); + + /* fill shd_pgd_entry */ + *shd_pgd_entry = new_entry->pa & IOMMU_PAGE_MASK; + *shd_pgd_entry |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW; + + /* fill gmem pgd_entry */ + *pgd_entry = iommu_gmem_virt_to_phys((unsigned long)new_pmd); + *pgd_entry &= IOMMU_PAGE_MASK; + *pgd_entry |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW; + } + + shd_pmd_entry = iommu_pmd_offset((unsigned long *)shd_pgd_entry, iova); + + /* *shd_pgd_entry is pmd base , + * so entry1->index is the index of pmd_base in gmem + */ + pa = (unsigned long)(*shd_pgd_entry & IOMMU_PAGE_MASK); + entry1 = pa_to_pg_entry(pa); + index = entry1->index; + if (!entry1) { + pr_err("Loongson-IOMMU: index:%d, id:%d, priv->pgd:0x%lx\n", + index, priv->id, (unsigned long)priv->pgd); + return NULL; + } + pmd_base = (unsigned long *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + pmd_entry = pmd_base + iommu_pmd_index(iova); + + if (iommu_pmd_none(shd_pmd_entry)) { + iommu_pte *new_pte; + + new_entry = iommu_zalloc_page(priv); + index = new_entry->index; + if (!new_entry) { + pr_err("Loongson-IOMMU: new_entry alloc err iova:0x%lx\n", + iova); + return NULL; + } + new_pte = (iommu_pte *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + + /* fill shd_pmd_entry */ + *shd_pmd_entry = new_entry->pa & IOMMU_PAGE_MASK; + *shd_pmd_entry |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW; + + /* fill gmem pmd_entry */ + *pmd_entry = iommu_gmem_virt_to_phys((unsigned long)new_pte); + *pmd_entry &= IOMMU_PAGE_MASK; + *pmd_entry |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW; + } + + /* *shd_pmd_entry is pte base, + * so entry1->index is the index of pte_base in gmem + */ + pa = (unsigned long)(*shd_pmd_entry & IOMMU_PAGE_MASK); + entry1 = pa_to_pg_entry(pa); + index = entry1->index; + if (!entry1) { + pr_err("Loongson-IOMMU: index:%d, id:%d, priv->pgd:0x%lx\n", + index, priv->id, (unsigned long)priv->pgd); + return NULL; + } + /* clear gmem pte */ + pte_base = (unsigned long *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + pte = pte_base + iommu_pte_index(iova); + + return pte; +} + +static unsigned long *loongson_iommu_pte_for_iova(struct loongson_iommu_priv *priv, + unsigned long iova) +{ + return loongson_iommu_walk_pgd(priv, priv->pgd, iova); +} + +static unsigned long *iommu_fetch_pte(iommu_pte *pgd, unsigned long iova, bool iommu_is_hugepage) +{ + unsigned long *pgd_entry; + unsigned long *pmd_entry; + + pgd_entry = (unsigned long *)iommu_pgd_offset((unsigned long)pgd, iova); + + if (iommu_is_hugepage || IOMMU_PTE_HUGEPAGE(*pmd_entry)) + return pmd_entry; + + if (!IOMMU_PTE_PRESENT(*pgd_entry)) { + pr_err("Loongson-IOMMU: pmd is not present\n"); + return NULL; + } + + pmd_entry = iommu_pmd_offset((unsigned long *)pgd_entry, iova); + if (!IOMMU_PTE_PRESENT(*pmd_entry)) { + pr_err("Loongson-IOMMU: pte is not present\n"); + return NULL; + } + + return iommu_pte_offset(pmd_entry, iova); +} + +static unsigned long *iommu_fetch_pmd_entry(iommu_pte *pgd, unsigned long iova, bool iommu_is_hugepage) +{ + unsigned long *pgd_entry; + unsigned long *pmd_entry; + + pgd_entry = (unsigned long *)iommu_pgd_offset((unsigned long)pgd, iova); + + if (iommu_is_hugepage) + return pgd_entry; + + if (!IOMMU_PTE_PRESENT(*pgd_entry)) { + pr_err("Loongson-IOMMU: pmd is not present\n"); + return NULL; + } + + pmd_entry = iommu_pmd_offset((unsigned long *)pgd_entry, iova); + if (!IOMMU_PTE_PRESENT(*pmd_entry)) { + pr_err("Loongson-IOMMU: pte is not present\n"); + return NULL; + } + + return pmd_entry; +} + +static int iommu_map_page(struct loongson_iommu_priv *priv, unsigned long iova, + phys_addr_t pa, size_t size, int prot, gfp_t gfp) +{ + + int ret = 0, index; + unsigned long page_size, page_mask; + unsigned long *pte, *shd_pte; + shadow_pg_entry *entry; + + /* 0x10000000~0x8fffffff */ + if ((iova >= IOVA_START) && (iova < IOVA_END0)) { + iova -= IOVA_START; + pte = (unsigned long *)(priv->virtio_pgtable); + while (size > 0) { + pte[iova >> LA_VIRTIO_PAGE_SHIFT] = + pa & LA_VIRTIO_PAGE_MASK; + size -= 0x4000; + iova += 0x4000; + pa += 0x4000; + } + return 0; + } + + index = ((unsigned long)priv->pgd - iommu_pgt_base) / IOMMU_PAGE_SIZE; + + spin_lock(&loongson_iommu_pgtlock); + + entry = index_to_pg_entry(index); + if (!entry) { + pr_err("Loonson-IOMMU: index:%d, iova:0x%lx, size:0x%lx\n", + index, iova, size); + ret = -EFAULT; + goto out; + } + + while (size > 0) { + /* page_size/mask is set to huge page, + * only when iova and size are both aligned to huge page , + */ + + if ((iova | size) & (IOMMU_PMD_SIZE - 1)) { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + priv->is_hugepage = 0; + } else { + page_size = IOMMU_PMD_SIZE; + page_mask = IOMMU_PMD_MASK; + priv->is_hugepage = 1; + } + + /* Fetch gmem pte */ + pte = loongson_iommu_pte_for_iova(priv, iova); + + /* Fetch shadow pte */ + shd_pte = iommu_fetch_pte((iommu_pte *)entry->va, iova, priv->is_hugepage); + + if (!shd_pte) { + pr_err("Loongson-IOMMU: fetch shadow pte for iova 0x%lx err\n", + iova); + ret = -EFAULT; + goto out; + } + /* Fill gmem pte */ + *pte = pa & page_mask; + *pte |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW | (priv->is_hugepage << 1); + + /* Fill shadow pte */ + *shd_pte = pa & page_mask; + *shd_pte |= IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW | (priv->is_hugepage << 1); + + size -= page_size; + iova += page_size; + pa += page_size; + } + + if (check_has_priv()) + __iommu_flush_iotlb_all(priv); + + spin_unlock(&loongson_iommu_pgtlock); + + return ret; + +out: + if (check_has_priv()) + __iommu_flush_iotlb_all(priv); + + spin_unlock(&loongson_iommu_pgtlock); + + return ret; + +} + +static size_t iommu_unmap_page(struct loongson_iommu_priv *priv, + unsigned long iova, size_t size) +{ + int index; + unsigned long pa, page_size, page_mask; + unsigned long *pte, *shd_pte, *shd_pmd_entry, *pte_base; + size_t unmap_len = 0; + iommu_pte *va; + shadow_pg_entry *entry, *entry1; + + /* 0x10000000~0x8fffffff */ + if ((iova >= IOVA_START) && (iova < IOVA_END0)) { + iova -= IOVA_START; + pte = (unsigned long *)priv->virtio_pgtable; + while (size > 0) { + pte[iova >> LA_VIRTIO_PAGE_SHIFT] = 0; + size -= 0x4000; + unmap_len += 0x4000; + iova += 0x4000; + } + unmap_len += size; + return unmap_len; + } + + index = ((unsigned long)priv->pgd - iommu_pgt_base) / IOMMU_PAGE_SIZE; + + spin_lock(&loongson_iommu_pgtlock); + + entry = index_to_pg_entry(index); + if (!entry) { + pr_err("Loongson-IOMMU: index:%d, iova:0x%lx,size:0x%lx\n", + index, iova, size); + spin_unlock(&loongson_iommu_pgtlock); + return unmap_len; + } + + while (unmap_len < size) { + if ((iova | size) & (IOMMU_PMD_SIZE - 1)) { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + priv->is_hugepage = 0; + } else { + page_size = IOMMU_PMD_SIZE; + page_mask = IOMMU_PMD_MASK; + priv->is_hugepage = 1; + } + + shd_pte = iommu_fetch_pte((iommu_pte *)entry->va, iova, priv->is_hugepage); + + if (shd_pte && IOMMU_PTE_PRESENT(*shd_pte)) { + /* clear shd_pte*/ + va = (iommu_pte *)entry->va; + shd_pmd_entry = iommu_fetch_pmd_entry(va, iova, priv->is_hugepage); + /* *shd_pmd_entry is pte base, + * so entry1->index is the index of pte_base in gmem + */ + pa = (unsigned long)(*shd_pmd_entry & IOMMU_PAGE_MASK); + entry1 = pa_to_pg_entry(pa); + if (!entry1) { + spin_unlock(&loongson_iommu_pgtlock); + return unmap_len; + } + /* clear gmem pte*/ + index = entry1->index; + pte_base = (unsigned long *)(iommu_pgt_base + index * IOMMU_PAGE_SIZE); + pte = pte_base + iommu_pte_index(iova); + *shd_pte = 0ULL; + *pte = 0ULL; + } + + unmap_len += page_size; + iova += page_size; + } + + if (check_has_priv()) + __iommu_flush_iotlb_all(priv); + + spin_unlock(&loongson_iommu_pgtlock); + + return unmap_len; + +} + +static int loongson_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t pa, size_t len, int prot, gfp_t gfp) +{ + struct loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + return iommu_map_page(priv, iova, pa, len, prot, GFP_KERNEL); +} + +static size_t loongson_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) +{ + struct loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + return iommu_unmap_page(priv, iova, size); +} + +static phys_addr_t loongson_iommu_iova_to_pa(struct iommu_domain *domain, + dma_addr_t iova) +{ + int ret = 0, index; + unsigned long *shd_pte, *pte; + unsigned long pa, offset, tmpva, page_size, page_mask; + shadow_pg_entry *entry; + struct loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + /* 0x10000000~0x8fffffff */ + if ((iova >= IOVA_START) && (iova < IOVA_END0)) { + tmpva = iova & LA_VIRTIO_PAGE_MASK; + pte = (unsigned long *)priv->virtio_pgtable; + offset = iova & ((1ULL << LA_VIRTIO_PAGE_SHIFT) - 1); + pa = pte[(tmpva - IOVA_START) >> 14] + offset; + return pa; + } + + index = ((unsigned long)priv->pgd - iommu_pgt_base) / IOMMU_PAGE_SIZE; + + spin_lock(&loongson_iommu_pgtlock); + + entry = index_to_pg_entry(index); + if (!entry) { + pr_err("Loongson-IOMMU: index:%d, iova:0x%llx\n", index, iova); + ret = -EFAULT; + goto out; + } + + shd_pte = iommu_fetch_pte((iommu_pte *)entry->va, iova, 0); + + if (!shd_pte || !IOMMU_PTE_PRESENT(*shd_pte)) { + ret = -EFAULT; + pr_err("Loongson-IOMMU: shadow pte is null or not present\n"); + goto out; + } + + if (IOMMU_PTE_HUGEPAGE(*shd_pte)) { + page_size = IOMMU_PMD_SIZE; + page_mask = IOMMU_PMD_MASK; + } else { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + } + + pa = *shd_pte & page_mask; + pa |= (iova & (page_size - 1)); + + if (check_has_priv()) + __iommu_flush_iotlb_all(priv); + + spin_unlock(&loongson_iommu_pgtlock); + return (phys_addr_t)pa; +out: + spin_unlock(&loongson_iommu_pgtlock); + return ret; +} + +static phys_addr_t loongson_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + phys_addr_t pa; + + pa = loongson_iommu_iova_to_pa(domain, iova); + + return pa; +} + +static void loongson_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + int ret; + loongson_iommu_priv *priv = to_loongson_iommu_priv(domain); + + spin_lock(&loongson_iommu_pgtlock); + ret = __iommu_flush_iotlb_all(priv); + spin_unlock(&loongson_iommu_pgtlock); +} + +static void loongson_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + loongson_iommu_flush_iotlb_all(domain); +} + +static struct iommu_ops loongson_iommu_ops = { + .capable = loongson_iommu_capable, + .domain_alloc = loongson_iommu_domain_alloc, + .domain_free = loongson_iommu_domain_free, + .attach_dev = loongson_iommu_attach_dev, + .detach_dev = loongson_iommu_detach_dev, + .map = loongson_iommu_map, + .unmap = loongson_iommu_unmap, + .iova_to_phys = loongson_iommu_iova_to_phys, + .probe_device = loongson_iommu_probe_device, + .release_device = loongson_iommu_release_device, + .device_group = loongson_iommu_device_group, + .pgsize_bitmap = LA_IOMMU_PGSIZE, + .flush_iotlb_all = loongson_iommu_flush_iotlb_all, + .iotlb_sync = loongson_iommu_iotlb_sync, + +}; + +static int loongson_iommu_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret = 1; + int bitmap_sz = 0; + int tmp; + + iommu_mem_base = (unsigned long)pci_resource_start(pdev, 0); + iommu_mem_base += UNCAC_BASE; + iommu_pgt_base = (unsigned long)pci_resource_start(pdev, 2); + iommu_pgt_base += UNCAC_BASE; + + pci_info(pdev, "iommu memory address:0x%lx,page table address:0x%lx\n", + iommu_mem_base, iommu_pgt_base); + + tmp = MAX_DOMAIN_ID / 8; + bitmap_sz = (MAX_DOMAIN_ID % 8) ? (tmp + 1) : tmp; + loongson_iommu_domain_alloc_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (loongson_iommu_domain_alloc_bitmap == NULL) { + pr_err("Loongson-IOMMU: domain bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err; + } + + tmp = MAX_ATTACHED_DEV_ID / 8; + bitmap_sz = (MAX_ATTACHED_DEV_ID % 8) ? (tmp + 1) : tmp; + loongson_iommu_devtable_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (loongson_iommu_devtable_bitmap == NULL) { + pr_err("Loongson-IOMMU: devtable bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err_1; + } + + tmp = MAX_PAGES_NUM / 8; + bitmap_sz = (MAX_PAGES_NUM % 8) ? (tmp + 1) : tmp; + loongson_iommu_pgtable_alloc_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (loongson_iommu_pgtable_alloc_bitmap == NULL) { + pr_err("Loongson-IOMMU: pgtable bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err_2; + } + + bus_set_iommu(&pci_bus_type, &loongson_iommu_ops); + + return 0; + +out_err_2: + kfree(loongson_iommu_devtable_bitmap); +out_err_1: + kfree(loongson_iommu_domain_alloc_bitmap); +out_err: + + return ret; +} + +static void loongson_iommu_remove(struct pci_dev *pdev) +{ + kfree(loongson_iommu_domain_alloc_bitmap); + loongson_iommu_domain_alloc_bitmap = NULL; + + kfree(loongson_iommu_devtable_bitmap); + loongson_iommu_devtable_bitmap = NULL; + + kfree(loongson_iommu_pgtable_alloc_bitmap); + loongson_iommu_pgtable_alloc_bitmap = NULL; +} + +static int __init loonson_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + + while (*str) { + if (!strncmp(str, "on", 2)) { + loongson_iommu_disable = 0; + pr_info("IOMMU enabled\n"); + } else if (!strncmp(str, "off", 3)) { + loongson_iommu_disable = 1; + pr_info("IOMMU disabled\n"); + } + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("loongson_iommu=", loonson_iommu_setup); + +static const struct pci_device_id loongson_iommu_pci_tbl[] = { + { PCI_DEVICE(0x14, 0x7a1f) }, + { 0, } +}; + +static struct pci_driver loongson_iommu_driver = { + .name = "loongson-iommu", + .probe = loongson_iommu_probe, + .remove = loongson_iommu_remove, + .id_table = loongson_iommu_pci_tbl, +}; + +static int __init loongson_iommu_driver_init(void) +{ + int ret = 0; + + if (!loongson_iommu_disable) { + ret = pci_register_driver(&loongson_iommu_driver); + if (ret != 0) { + pr_err("Failed to register IOMMU driver\n"); + return ret; + } + } + + return ret; +} + +static void __exit loongson_iommu_driver_exit(void) +{ + if (!loongson_iommu_disable) + pci_unregister_driver(&loongson_iommu_driver); +} + +module_init(loongson_iommu_driver_init); +module_exit(loongson_iommu_driver_exit); diff --git a/drivers/iommu/loongson/iommu.h b/drivers/iommu/loongson/iommu.h new file mode 100644 index 000000000000..f6b765274022 --- /dev/null +++ b/drivers/iommu/loongson/iommu.h @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + */ + +#ifndef LOONGSON_IOMMU_H +#define LOONGSON_IOMMU_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOVA_WIDTH 47 + +/* Bit value definition for I/O PTE fields */ +#define IOMMU_PTE_PR (1ULL << 0) /* Present */ +#define IOMMU_PTE_HP (1ULL << 1) /* HugePage */ +#define IOMMU_PTE_IR (1ULL << 2) /* Readable */ +#define IOMMU_PTE_IW (1ULL << 3) /* Writeable */ + +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) +#define IOMMU_PTE_HUGEPAGE(pte) ((pte) & IOMMU_PTE_HP) + +#define LA_IOMMU_PGSIZE (SZ_16K | SZ_32M) + +/* IOMMU page table */ +#define IOMMU_PAGE_SHIFT 14 +#define IOMMU_PAGE_SIZE (_AC(1, UL) << IOMMU_PAGE_SHIFT) +#define IOMMU_PAGE_MASK (~(IOMMU_PAGE_SIZE - 1)) +#define IOMMU_PTRS_PER_PTE (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_PMD_SHIFT (IOMMU_PAGE_SHIFT + (IOMMU_PAGE_SHIFT - 3)) +#define IOMMU_PMD_SIZE (1UL << IOMMU_PMD_SHIFT) +#define IOMMU_PMD_MASK (~(IOMMU_PMD_SIZE-1)) +#define IOMMU_PTRS_PER_PMD (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_PGDIR_SHIFT (IOMMU_PMD_SHIFT + (IOMMU_PAGE_SHIFT - 3)) +#define IOMMU_PGDIR_SIZE (1ULL << IOMMU_PGDIR_SHIFT) +#define IOMMU_PGDIR_MASK (~(IOMMU_PGDIR_SIZE-1)) +#define IOMMU_PTRS_PER_PGD (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_PTE_WIDTH (IOMMU_PMD_SHIFT - IOMMU_PAGE_SHIFT) +#define IOMMU_PMD_WIDTH (IOMMU_PGDIR_SHIFT - IOMMU_PMD_SHIFT) +#define IOMMU_PGDIR_WIDTH (IOVA_WIDTH - IOMMU_PGDIR_SHIFT) + +/* Virtio page use size of 16k */ +#define LA_VIRTIO_PAGE_SHIFT 14 +#define LA_VIRTIO_PAGE_SIZE (_AC(1, UL) << LA_VIRTIO_PAGE_SHIFT) +#define LA_VIRTIO_PAGE_MASK (~((1ULL << LA_VIRTIO_PAGE_SHIFT) - 1)) + +/* Bits of iommu map address space field */ +#define LA_IOMMU_PFN_LO 0x0 +#define PFN_LO_SHIFT 12 +#define LA_IOMMU_PFN_HI 0x4 +#define PFN_HI_MASK 0x3ffff +#define LA_IOMMU_VFN_LO 0x8 +#define VFN_LO_SHIFT 12 +#define LA_IOMMU_VFN_HI 0xC +#define VFN_HI_MASK 0x3ffff + +/* wired | index | domain | shift */ +#define LA_IOMMU_WIDS 0x10 +/* valid | busy | tlbar/aw | cmd */ +#define LA_IOMMU_VBTC 0x14 +#define IOMMU_PGTABLE_BUSY (1 << 16) +/* enable |index | valid | domain | bdf */ +#define LA_IOMMU_EIVDB 0x18 +/* enable | valid | cmd */ +#define LA_IOMMU_CMD 0x1C +#define LA_IOMMU_PGD0_LO 0x20 +#define LA_IOMMU_PGD0_HI 0x24 +#define STEP_PGD 0x8 +#define STEP_PGD_SHIFT 3 +#define LA_IOMMU_PGD_LO(domain_id) \ + (LA_IOMMU_PGD0_LO + ((domain_id) << STEP_PGD_SHIFT)) +#define LA_IOMMU_PGD_HI(domain_id) \ + (LA_IOMMU_PGD0_HI + ((domain_id) << STEP_PGD_SHIFT)) + +#define LA_IOMMU_DIR_CTRL0 0xA0 +#define LA_IOMMU_DIR_CTRL1 0xA4 +#define LA_IOMMU_DIR_CTRL(x) (LA_IOMMU_DIR_CTRL0 + ((x) << 2)) + +#define LA_IOMMU_SAFE_BASE_HI 0xE0 +#define LA_IOMMU_SAFE_BASE_LO 0xE4 +#define LA_IOMMU_EX_ADDR_LO 0xE8 +#define LA_IOMMU_EX_ADDR_HI 0xEC + +#define LA_IOMMU_PFM_CNT_EN 0x100 + +#define LA_IOMMU_RD_HIT_CNT_0 0x110 +#define LA_IOMMU_RD_MISS_CNT_O 0x114 +#define LA_IOMMU_WR_HIT_CNT_0 0x118 +#define LA_IOMMU_WR_MISS_CNT_0 0x11C +#define LA_IOMMU_RD_HIT_CNT_1 0x120 +#define LA_IOMMU_RD_MISS_CNT_1 0x124 +#define LA_IOMMU_WR_HIT_CNT_1 0x128 +#define LA_IOMMU_WR_MISS_CNT_1 0x12C +#define LA_IOMMU_RD_HIT_CNT_2 0x130 +#define LA_IOMMU_RD_MISS_CNT_2 0x134 +#define LA_IOMMU_WR_HIT_CNT_2 0x138 +#define LA_IOMMU_WR_MISS_CNT_2 0x13C + +#define MAX_DOMAIN_ID 16 +#define MAX_ATTACHED_DEV_ID 16 +#define MAX_PAGES_NUM (SZ_128M / IOMMU_PAGE_SIZE) + +/* To find an entry in an iommu page table directory */ +#define iommu_pgd_index(addr) \ + (((addr) >> IOMMU_PGDIR_SHIFT) & (IOMMU_PTRS_PER_PGD - 1)) +#define iommu_pmd_index(addr) \ + (((addr) >> IOMMU_PMD_SHIFT) & (IOMMU_PTRS_PER_PMD - 1)) +#define iommu_pte_index(addr) \ + (((addr) >> IOMMU_PAGE_SHIFT) & (IOMMU_PTRS_PER_PTE - 1)) +#define iommu_page_offset(addr) \ + (addr & (IOMMU_PAGE_SIZE - 1)) + +#define iommu_pgd_offset(pgd, addr) (pgd + iommu_pgd_index(addr)) + +/* IOMMU iommu_table entry */ +typedef struct { unsigned long iommu_pte; } iommu_pte; + +static inline void *iommu_gmem_phys_to_virt(unsigned long paddr) +{ + return (void *)(UNCAC_BASE + paddr); +} + +static inline unsigned long iommu_gmem_virt_to_phys(unsigned long va) +{ + return (unsigned long)(va - UNCAC_BASE); +} + +static inline void *iommu_phys_to_virt(unsigned long paddr) +{ + return phys_to_virt(paddr); +} + +static inline u64 iommu_virt_to_phys(void *vaddr) +{ + return (u64)virt_to_phys(vaddr); +} + +static inline unsigned long *iommu_pmd_offset(unsigned long *pgd_entry, + unsigned long addr) +{ + unsigned long pmd_base; + + pmd_base = (*pgd_entry) & IOMMU_PAGE_MASK; + pmd_base = (unsigned long)iommu_phys_to_virt(pmd_base); + + return (unsigned long *)(pmd_base) + iommu_pmd_index(addr); +} + +static inline unsigned long *iommu_pte_offset(unsigned long *pmd_entry, + unsigned long addr) +{ + unsigned long pte_base; + + pte_base = (*pmd_entry) & IOMMU_PAGE_MASK; + pte_base = (unsigned long)iommu_phys_to_virt(pte_base); + + return (unsigned long *)(pte_base) + iommu_pte_index(addr); +} + +/* One vm is equal to a domain,one domain has a priv */ +typedef struct loongson_iommu_priv { + /* For list of all domains */ + struct list_head list; + /* List of all devices in this domain */ + struct list_head dev_list; + struct iommu_domain domain; + struct device *dev; + /* priv dev list lock */ + spinlock_t devlock; + /* 0x10000000~0x8fffffff */ + unsigned long *virtio_pgtable; + short id; + iommu_pte *pgd; + /* devices assigned to this domain */ + unsigned int dev_cnt; + int used_pages; + bool is_hugepage; +} loongson_iommu_priv; + +/* A device for passthrough */ +struct loongson_iommu_dev_data { + struct list_head list; /* for domain->dev_list */ + struct list_head glist; /* for global dev_data_list */ + loongson_iommu_priv *priv; + unsigned short bdf; + int count; + int index; /* index in device table */ +}; + +/* shadow page table entry */ +typedef struct shadow_pg_entry { + struct list_head pglist; /* for iommu_shadow_pglist */ + unsigned long *va; /* virtual address base for shadow page */ + unsigned long pa; /* physical address base for shadow page */ + int index; /* index 128M gmem */ + int dirty; + int present; +} shadow_pg_entry; + +#endif /* LOONGSON_IOMMU_H */ diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 985bbb09c8d9..39c22118e070 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -22,7 +22,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || SW64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || SW64 || LOONGARCH) help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1eb125f5d12b..67c1ea372d16 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -160,6 +160,7 @@ #define PCI_DEVICE_ID_LOONGSON_OHCI 0x7a24 #define PCI_DEVICE_ID_LOONGSON_LPC 0x7a0c #define PCI_DEVICE_ID_LOONGSON_DMA 0x7a0f +#define PCI_DEVICE_ID_LOONGSON_IOMMU 0x7a1f #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a -- Gitee From 8bed3fa58fcf90cea175762339403a03ed67fee5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 17 Sep 2018 16:53:45 +0800 Subject: [PATCH 53/59] Loongson: Add LS2H/LS7A i2c/rtc driver support Change-Id: Id3a9db3359affcfce9d8466d60fb7bfbfad58881 Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 2 + drivers/i2c/busses/Kconfig | 7 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-gpio.c | 39 ++- drivers/i2c/busses/i2c-ls2x.c | 372 +++++++++++++++++++++ drivers/i2c/i2c-core-base.c | 9 +- drivers/rtc/Kconfig | 7 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-ls2x.c | 327 ++++++++++++++++++ 9 files changed, 763 insertions(+), 2 deletions(-) create mode 100644 drivers/i2c/busses/i2c-ls2x.c create mode 100644 drivers/rtc/rtc-ls2x.c diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index b9ccbce3419c..ee29270ffbbf 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -546,6 +546,7 @@ CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y CONFIG_I2C_GPIO=y +CONFIG_I2C_LS2X=y CONFIG_SPI=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_LOONGSON=y @@ -643,6 +644,7 @@ CONFIG_USB_GADGET=y CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_LS2X=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_DMEM_GENIRQ=m diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 036fdcee5eb3..52dfe1c4aeab 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -866,6 +866,13 @@ config I2C_OWL Say Y here if you want to use the I2C bus controller on the Actions Semiconductor Owl SoC's. +config I2C_LS2X + tristate "Loongson LS2X I2C adapter" + depends on MACH_LOONGSON64 + help + If you say yes to this option, support will be included for the + I2C interface on the Loongson's LS2H/LS2K/LS7A Platform-Bridge. + config I2C_PASEMI tristate "PA Semi SMBus interface" depends on PPC_PASEMI && PCI diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e6d5d108e22b..f600bf3a0806 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,6 +89,7 @@ obj-$(CONFIG_I2C_NPCM7XX) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o +obj-$(CONFIG_I2C_LS2X) += i2c-ls2x.o obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PMCMSP) += i2c-pmcmsp.o diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index a4a6825c8758..8f291a58da93 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -318,6 +319,24 @@ static void of_i2c_gpio_get_props(struct device_node *np, of_property_read_bool(np, "i2c-gpio,scl-output-only"); } +static void acpi_i2c_gpio_get_props(struct device *dev, + struct i2c_gpio_platform_data *pdata) +{ + u32 reg; + + device_property_read_u32(dev, "delay-us", &pdata->udelay); + + if (!device_property_read_u32(dev, "timeout-ms", ®)) + pdata->timeout = msecs_to_jiffies(reg); + + pdata->sda_is_open_drain = + device_property_read_bool(dev, "sda-open-drain"); + pdata->scl_is_open_drain = + device_property_read_bool(dev, "scl-open-drain"); + pdata->scl_is_output_only = + device_property_read_bool(dev, "scl-output-only"); +} + static struct gpio_desc *i2c_gpio_get_desc(struct device *dev, const char *con_id, unsigned int index, @@ -363,6 +382,8 @@ static int i2c_gpio_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; enum gpiod_flags gflags; + acpi_status status; + unsigned long long id; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -375,6 +396,8 @@ static int i2c_gpio_probe(struct platform_device *pdev) if (np) { of_i2c_gpio_get_props(np, pdata); + } else if (ACPI_COMPANION(dev)) { + acpi_i2c_gpio_get_props(dev, pdata); } else { /* * If all platform data settings are zero it is OK @@ -445,7 +468,13 @@ static int i2c_gpio_probe(struct platform_device *pdev) adap->dev.parent = dev; adap->dev.of_node = np; - adap->nr = pdev->id; + if (ACPI_COMPANION(dev)) { + status = acpi_evaluate_integer(ACPI_HANDLE(dev), "_UID", NULL, &id); + if (ACPI_SUCCESS(status) && (id >= 0)) { + adap->nr = id; + } + } else + adap->nr = pdev->id; ret = i2c_bit_add_numbered_bus(adap); if (ret) return ret; @@ -491,10 +520,18 @@ static const struct of_device_id i2c_gpio_dt_ids[] = { MODULE_DEVICE_TABLE(of, i2c_gpio_dt_ids); #endif + +static const struct acpi_device_id i2c_gpio_acpi_match[] = { + {"LOON0005"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, i2c_gpio_acpi_match); + static struct platform_driver i2c_gpio_driver = { .driver = { .name = "i2c-gpio", .of_match_table = of_match_ptr(i2c_gpio_dt_ids), + .acpi_match_table = ACPI_PTR(i2c_gpio_acpi_match), }, .probe = i2c_gpio_probe, .remove = i2c_gpio_remove, diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c new file mode 100644 index 000000000000..c59130ff3059 --- /dev/null +++ b/drivers/i2c/busses/i2c-ls2x.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson-2H/7A I2C master mode driver + * + * Copyright (C) 2013 Loongson Technology Corporation Limited + * Copyright (C) 2014-2017 Lemote, Inc. + * + * Originally written by liushaozong + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LS2X_I2C_PRER_LO_REG 0x0 +#define LS2X_I2C_PRER_HI_REG 0x1 +#define LS2X_I2C_CTR_REG 0x2 +#define LS2X_I2C_TXR_REG 0x3 +#define LS2X_I2C_RXR_REG 0x3 +#define LS2X_I2C_CR_REG 0x4 +#define LS2X_I2C_SR_REG 0x4 + +#define CR_START 0x80 +#define CR_STOP 0x40 +#define CR_READ 0x20 +#define CR_WRITE 0x10 +#define CR_ACK 0x8 +#define CR_IACK 0x1 + +#define SR_NOACK 0x80 +#define SR_BUSY 0x40 +#define SR_AL 0x20 +#define SR_TIP 0x2 +#define SR_IF 0x1 + +#define i2c_readb(addr) readb(dev->base + addr) +#define i2c_writeb(val, addr) writeb(val, dev->base + addr) + +#ifdef LS2X_I2C_DEBUG +#define i2c_debug(fmt, args...) printk(KERN_CRIT fmt, ##args) +#else +#define i2c_debug(fmt, args...) +#endif + +struct ls2x_i2c_dev { + spinlock_t lock; + unsigned int suspended:1; + struct device *dev; + void __iomem *base; + int irq; + struct completion cmd_complete; + struct resource *ioarea; + struct i2c_adapter adapter; +}; + +static void i2c_stop(struct ls2x_i2c_dev *dev) +{ +again: + i2c_writeb(CR_STOP, LS2X_I2C_CR_REG); + i2c_readb(LS2X_I2C_SR_REG); + while (i2c_readb(LS2X_I2C_SR_REG) & SR_BUSY) + goto again; +} + +static int i2c_start(struct ls2x_i2c_dev *dev, + int dev_addr, int flags) +{ + int retry = 5; + unsigned char addr = (dev_addr & 0x7f) << 1; + addr |= (flags & I2C_M_RD) ? 1 : 0; + +start: + mdelay(1); + i2c_writeb(addr, LS2X_I2C_TXR_REG); + i2c_debug("%s : i2c device address: 0x%x\n", + __func__, __LINE__, addr); + i2c_writeb((CR_START | CR_WRITE), LS2X_I2C_CR_REG); + while (i2c_readb(LS2X_I2C_SR_REG) & SR_TIP) + ; + + if (i2c_readb(LS2X_I2C_SR_REG) & SR_NOACK) { + i2c_stop(dev); + while (retry--) + goto start; + pr_info("There is no i2c device ack\n"); + return 0; + } + return 1; +} + +static void ls2x_i2c_reginit(struct ls2x_i2c_dev *dev) +{ + i2c_writeb(0, LS2X_I2C_CTR_REG); + i2c_writeb(0x2c, LS2X_I2C_PRER_LO_REG); + i2c_writeb(0x1, LS2X_I2C_PRER_HI_REG); + i2c_writeb(0x80, LS2X_I2C_CTR_REG); +} + +static int i2c_read(struct ls2x_i2c_dev *dev, + unsigned char *buf, int count) +{ + int i; + + for (i = 0; i < count; i++) { + i2c_writeb((i == count - 1) ? + (CR_READ | CR_ACK) : CR_READ, + LS2X_I2C_CR_REG); + while (i2c_readb(LS2X_I2C_SR_REG) & SR_TIP) + ; + buf[i] = i2c_readb(LS2X_I2C_RXR_REG); + i2c_debug("%s : read buf[%d] <= %02x\n", + __func__, __LINE__, i, buf[i]); + } + + return i; +} + +static int i2c_write(struct ls2x_i2c_dev *dev, + unsigned char *buf, int count) +{ + int i; + + for (i = 0; i < count; i++) { + i2c_writeb(buf[i], LS2X_I2C_TXR_REG); + i2c_debug("%s : write buf[%d] => %02x\n", + __func__, __LINE__, i, buf[i]); + i2c_writeb(CR_WRITE, LS2X_I2C_CR_REG); + while (i2c_readb(LS2X_I2C_SR_REG) & SR_TIP) + ; + + if (i2c_readb(LS2X_I2C_SR_REG) & SR_NOACK) { + i2c_debug("%s : device no ack\n", + __func__, __LINE__); + i2c_stop(dev); + return 0; + } + } + + return i; +} + +static int i2c_doxfer(struct ls2x_i2c_dev *dev, + struct i2c_msg *msgs, int num) +{ + struct i2c_msg *m = msgs; + unsigned long flags; + int i; + + spin_lock_irqsave(&dev->lock, flags); + for (i = 0; i < num; i++) { + if (!i2c_start(dev, m->addr, m->flags)) { + spin_unlock_irqrestore(&dev->lock, flags); + return 0; + } + if (m->flags & I2C_M_RD) + i2c_read(dev, m->buf, m->len); + else + i2c_write(dev, m->buf, m->len); + ++m; + } + + i2c_stop(dev); + spin_unlock_irqrestore(&dev->lock, flags); + + return i; +} + +static int i2c_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + int ret; + int retry; + struct ls2x_i2c_dev *dev; + + dev = i2c_get_adapdata(adap); + for (retry = 0; retry < adap->retries; retry++) { + ret = i2c_doxfer(dev, msgs, num); + if (ret != -EAGAIN) + return ret; + + udelay(100); + } + + return -EREMOTEIO; +} + +static unsigned int i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm ls2x_i2c_algo = { + .master_xfer = i2c_xfer, + .functionality = i2c_func, +}; + +static int ls2x_i2c_probe(struct platform_device *pdev) +{ + struct ls2x_i2c_dev *dev; + struct i2c_adapter *adap; + struct resource *mem, *ioarea; + int r, irq; + + /* NOTE: driver uses the static register mapping */ + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "no mem resource?\n"); + return -ENODEV; + } + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(&pdev->dev, "no irq resource?\n"); + return -ENODEV; + } + + ioarea = request_mem_region(mem->start, resource_size(mem), + pdev->name); + if (!ioarea) { + dev_err(&pdev->dev, "I2C region already claimed\n"); + return -EBUSY; + } + + dev = kzalloc(sizeof(struct ls2x_i2c_dev), GFP_KERNEL); + if (!dev) { + r = -ENOMEM; + goto err_release_region; + } + + spin_lock_init(&dev->lock); + dev->dev = &pdev->dev; + dev->irq = irq; + dev->base = ioremap(mem->start, resource_size(mem)); + if (!dev->base) { + r = -ENOMEM; + goto err_free_mem; + } + + platform_set_drvdata(pdev, dev); + + ls2x_i2c_reginit(dev); + + adap = &dev->adapter; + i2c_set_adapdata(adap, dev); + adap->nr = pdev->id; + strlcpy(adap->name, pdev->name, sizeof(adap->name)); + adap->owner = THIS_MODULE; + adap->class = I2C_CLASS_HWMON; + adap->retries = 5; + adap->algo = &ls2x_i2c_algo; + adap->dev.parent = &pdev->dev; + adap->dev.of_node = pdev->dev.of_node; + ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); + + /* i2c device drivers may be active on return from add_adapter() */ + r = i2c_add_adapter(adap); + if (r) { + dev_err(dev->dev, "failure adding adapter\n"); + goto err_iounmap; + } + + return 0; + +err_iounmap: + iounmap(dev->base); +err_free_mem: + platform_set_drvdata(pdev, NULL); + kfree(dev); +err_release_region: + release_mem_region(mem->start, resource_size(mem)); + + return r; +} + +static int ls2x_i2c_remove(struct platform_device *pdev) +{ + struct ls2x_i2c_dev *dev = platform_get_drvdata(pdev); + struct resource *mem; + + platform_set_drvdata(pdev, NULL); + i2c_del_adapter(&dev->adapter); + iounmap(dev->base); + kfree(dev); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); + return 0; +} + +#ifdef CONFIG_PM +static int ls2x_i2c_suspend_noirq(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ls2x_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + + i2c_dev->suspended = 1; + + return 0; +} + +static int ls2x_i2c_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct ls2x_i2c_dev *i2c_dev = platform_get_drvdata(pdev); + + i2c_dev->suspended = 0; + ls2x_i2c_reginit(i2c_dev); + + return 0; +} + +static const struct dev_pm_ops ls2x_i2c_dev_pm_ops = { + .suspend_noirq = ls2x_i2c_suspend_noirq, + .resume = ls2x_i2c_resume, +}; + +#define LS2X_DEV_PM_OPS (&ls2x_i2c_dev_pm_ops) +#else +#define LS2X_DEV_PM_OPS NULL +#endif + +#ifdef CONFIG_OF +static struct of_device_id ls2x_i2c_id_table[] = { + {.compatible = "loongson,ls2h-i2c"}, + {.compatible = "loongson,ls2k-i2c"}, + {.compatible = "loongson,ls7a-i2c"}, + {}, +}; +#endif +static const struct acpi_device_id ls2x_i2c_acpi_match[] = { + {"LOON0004"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, ls2x_i2c_acpi_match); + +static struct platform_driver ls2x_i2c_driver = { + .probe = ls2x_i2c_probe, + .remove = ls2x_i2c_remove, + .driver = { + .name = "ls2x-i2c", + .owner = THIS_MODULE, + .pm = LS2X_DEV_PM_OPS, +#ifdef CONFIG_OF + .of_match_table = of_match_ptr(ls2x_i2c_id_table), +#endif + .acpi_match_table = ACPI_PTR(ls2x_i2c_acpi_match), + }, +}; + +static int __init ls2x_i2c_init_driver(void) +{ + return platform_driver_register(&ls2x_i2c_driver); +} +subsys_initcall(ls2x_i2c_init_driver); + +static void __exit ls2x_i2c_exit_driver(void) +{ + platform_driver_unregister(&ls2x_i2c_driver); +} +module_exit(ls2x_i2c_exit_driver); + +MODULE_AUTHOR("Loongson Technology Corporation Limited"); +MODULE_DESCRIPTION("Loongson LS2X I2C bus adapter"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ls2x-i2c"); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 8a996af91c12..b5e410496da6 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1550,7 +1550,8 @@ static int __i2c_add_numbered_adapter(struct i2c_adapter *adap) int i2c_add_adapter(struct i2c_adapter *adapter) { struct device *dev = &adapter->dev; - int id; + acpi_status status; + unsigned long long id; if (dev->of_node) { id = of_alias_get_id(dev->of_node, "i2c"); @@ -1558,6 +1559,12 @@ int i2c_add_adapter(struct i2c_adapter *adapter) adapter->nr = id; return __i2c_add_numbered_adapter(adapter); } + } else if (dev->parent->fwnode) { + status = acpi_evaluate_integer(ACPI_HANDLE(dev->parent), "_UID", NULL, &id); + if (ACPI_SUCCESS(status) && (id >= 0)) { + adapter->nr = id; + return __i2c_add_numbered_adapter(adapter); + } } mutex_lock(&core_lock); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index d5512b18a3ae..36d877bff373 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1320,6 +1320,13 @@ config RTC_DRV_CROS_EC This driver can also be built as a module. If so, the module will be called rtc-cros-ec. +config RTC_DRV_LS2X + tristate "Loongson LS2X RTC" + depends on MACH_LOONGSON64 + help + If you say yes here you get support for the RTC on the + Loongson's LS2H/LS2K/LS7A Platform-Bridge. + comment "on-CPU RTC drivers" config RTC_DRV_ASM9260 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index fd1d53e789b7..b86587da86ff 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o obj-$(CONFIG_RTC_DRV_CPCAP) += rtc-cpcap.o obj-$(CONFIG_RTC_DRV_CROS_EC) += rtc-cros-ec.o +obj-$(CONFIG_RTC_DRV_LS2X) += rtc-ls2x.o obj-$(CONFIG_RTC_DRV_DA9052) += rtc-da9052.o obj-$(CONFIG_RTC_DRV_DA9055) += rtc-da9055.o obj-$(CONFIG_RTC_DRV_DA9063) += rtc-da9063.o diff --git a/drivers/rtc/rtc-ls2x.c b/drivers/rtc/rtc-ls2x.c new file mode 100644 index 000000000000..30ea8b7418f2 --- /dev/null +++ b/drivers/rtc/rtc-ls2x.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Loongson-2H/7A Real Time Clock interface for Linux + * + * Author: Shaozong Liu + * Huacai Chen + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Loongson-2H/7A rtc register + */ + +#define TOY_TRIM_REG 0x20 +#define TOY_WRITE0_REG 0x24 +#define TOY_WRITE1_REG 0x28 +#define TOY_READ0_REG 0x2c +#define TOY_READ1_REG 0x30 +#define TOY_MATCH0_REG 0x34 +#define TOY_MATCH1_REG 0x38 +#define TOY_MATCH2_REG 0x3c +#define RTC_CTRL_REG 0x40 +#define RTC_TRIM_REG 0x60 +#define RTC_WRITE0_REG 0x64 +#define RTC_READE0_REG 0x68 +#define RTC_MATCH0_REG 0x6c +#define RTC_MATCH1_REG 0x70 +#define RTC_MATCH2_REG 0x74 + +/** + * shift bits and filed mask + */ +#define TOY_MON_MASK 0x3f +#define TOY_DAY_MASK 0x1f +#define TOY_HOUR_MASK 0x1f +#define TOY_MIN_MASK 0x3f +#define TOY_SEC_MASK 0x3f +#define TOY_MSEC_MASK 0xf + +#define TOY_MON_SHIFT 26 +#define TOY_DAY_SHIFT 21 +#define TOY_HOUR_SHIFT 16 +#define TOY_MIN_SHIFT 10 +#define TOY_SEC_SHIFT 4 +#define TOY_MSEC_SHIFT 0 + +/* shift bits for TOY_MATCH */ +#define TOY_MATCH_YEAR_SHIFT 26 +#define TOY_MATCH_MON_SHIFT 22 +#define TOY_MATCH_DAY_SHIFT 17 +#define TOY_MATCH_HOUR_SHIFT 12 +#define TOY_MATCH_MIN_SHIFT 6 +#define TOY_MATCH_SEC_SHIFT 0 + +/* Filed mask bits for TOY_MATCH */ +#define TOY_MATCH_YEAR_MASK 0x3f +#define TOY_MATCH_MON_MASK 0xf +#define TOY_MATCH_DAY_MASK 0x1f +#define TOY_MATCH_HOUR_MASK 0x1f +#define TOY_MATCH_MIN_MASK 0x3f +#define TOY_MATCH_SEC_MASK 0x3f + +/* ACPI and RTC offset */ +#define ACPI_RTC_OFFSET 0x100 + +/* support rtc wakeup */ +#define PM1_STS_FOR_RTC 0x10 +#define RTC_STS_WAKEUP_BIT (0x1 << 10) +#define ACPI_FOR_RTC_WAKEUP_BASE \ + (rtc_reg_base - ACPI_RTC_OFFSET + PM1_STS_FOR_RTC) + +/* interface for rtc read and write */ +#define rtc_write(val, addr) writel(val, rtc_reg_base + (addr)) +#define rtc_read(addr) readl(rtc_reg_base + (addr)) + +DEFINE_SPINLOCK(rtc_lock); + +struct ls2x_rtc_info { + struct platform_device *pdev; + struct rtc_device *rtc_dev; + struct resource *mem_res; + void __iomem *rtc_base; + int irq_base; +}; + +static void __iomem *rtc_reg_base; + +static int ls2x_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + unsigned int val; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + + val = rtc_read(TOY_READ1_REG); + tm->tm_year = val; + val = rtc_read(TOY_READ0_REG); + tm->tm_sec = (val >> TOY_SEC_SHIFT) & TOY_SEC_MASK; + tm->tm_min = (val >> TOY_MIN_SHIFT) & TOY_MIN_MASK; + tm->tm_hour = (val >> TOY_HOUR_SHIFT) & TOY_HOUR_MASK; + tm->tm_mday = (val >> TOY_DAY_SHIFT) & TOY_DAY_MASK; + tm->tm_mon = ((val >> TOY_MON_SHIFT) & TOY_MON_MASK) - 1; + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} + +static int ls2x_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + unsigned int val = 0; + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + + val |= (tm->tm_sec << TOY_SEC_SHIFT); + val |= (tm->tm_min << TOY_MIN_SHIFT); + val |= (tm->tm_hour << TOY_HOUR_SHIFT); + val |= (tm->tm_mday << TOY_DAY_SHIFT); + val |= ((tm->tm_mon + 1) << TOY_MON_SHIFT); + rtc_write(val, TOY_WRITE0_REG); + val = tm->tm_year; + rtc_write(val, TOY_WRITE1_REG); + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} + +static int ls2x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + unsigned int val; + unsigned long flags; + struct rtc_time *tm = &alrm->time; + + spin_lock_irqsave(&rtc_lock, flags); + + val = rtc_read(TOY_MATCH0_REG); + tm->tm_sec = (val >> TOY_MATCH_SEC_SHIFT) & TOY_MATCH_SEC_MASK; + tm->tm_min = (val >> TOY_MATCH_MIN_SHIFT) & TOY_MATCH_MIN_MASK; + tm->tm_hour = (val >> TOY_MATCH_HOUR_SHIFT) & TOY_MATCH_HOUR_MASK; + tm->tm_mday = (val >> TOY_MATCH_DAY_SHIFT) & TOY_MATCH_DAY_MASK; + tm->tm_mon = ((val >> TOY_MATCH_MON_SHIFT) & TOY_MATCH_MON_MASK) - 1; + tm->tm_year = ((val >> TOY_MATCH_YEAR_SHIFT) & TOY_MATCH_YEAR_MASK); + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} + +static int ls2x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + unsigned int val = 0; + unsigned int temp = 0; + unsigned long flags; + struct rtc_time *tm = &alrm->time; + + spin_lock_irqsave(&rtc_lock, flags); + + val |= (tm->tm_sec << TOY_MATCH_SEC_SHIFT); + val |= (tm->tm_min << TOY_MATCH_MIN_SHIFT); + val |= (tm->tm_hour << TOY_MATCH_HOUR_SHIFT); + val |= (tm->tm_mday << TOY_MATCH_DAY_SHIFT); + val |= ((tm->tm_mon + 1) << TOY_MATCH_MON_SHIFT); + val |= ((tm->tm_year & TOY_MATCH_YEAR_MASK) << TOY_MATCH_YEAR_SHIFT); + rtc_write(val, TOY_MATCH0_REG); + + /* enalbe acpi rtc wakeup */ + temp = readl(ACPI_FOR_RTC_WAKEUP_BASE) | RTC_STS_WAKEUP_BIT; + writel(temp, ACPI_FOR_RTC_WAKEUP_BASE); + + spin_unlock_irqrestore(&rtc_lock, flags); + + return 0; +} + +static struct rtc_class_ops ls2x_rtc_ops = { + .read_time = ls2x_rtc_read_time, + .set_time = ls2x_rtc_set_time, + .read_alarm = ls2x_rtc_read_alarm, + .set_alarm = ls2x_rtc_set_alarm, +}; + + +static int ls2x_rtc_probe(struct platform_device *pdev) +{ + struct resource *res, *mem; + struct device *dev = &pdev->dev; + struct rtc_device *rtc; + struct ls2x_rtc_info *info; + + info = kzalloc(sizeof(struct ls2x_rtc_info), GFP_KERNEL); + if (!info) { + pr_debug("%s: no enough memory\n", pdev->name); + return -ENOMEM; + } + + info->pdev = pdev; + info->irq_base = platform_get_irq(pdev, 0); + if (info->irq_base <= 0) { + pr_debug("%s: no irq?\n", pdev->name); + return -ENOENT; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + pr_debug("%s: RTC resource data missing\n", pdev->name); + return -ENOENT; + } + + mem = request_mem_region(res->start, resource_size(res), pdev->name); + if (!mem) { + pr_debug("%s: RTC registers at %x are not free\n", + pdev->name, (unsigned int)res->start); + return -EBUSY; + } + info->mem_res = mem; + + info->rtc_base = ioremap(res->start, resource_size(res)); + if (!info->rtc_base) { + pr_debug("%s: RTC registers can't be mapped\n", pdev->name); + goto fail1; + } + rtc_reg_base = info->rtc_base; + + device_init_wakeup(dev, 1); + + rtc = info->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name, + &ls2x_rtc_ops, THIS_MODULE); + + /* There don't need alarm interrupt */ + info->rtc_dev->uie_unsupported = 1; + + if (IS_ERR(info->rtc_dev)) { + pr_debug("%s: can't register RTC device, err %ld\n", + pdev->name, PTR_ERR(rtc)); + goto fail0; + } + + platform_set_drvdata(pdev, info); + dev_set_drvdata(&rtc->dev, info); + + return 0; + +fail0: + iounmap(info->rtc_base); +fail1: + release_resource(mem); + kfree(info); + + return -EIO; +} + +static int ls2x_rtc_remove(struct platform_device *pdev) +{ + struct ls2x_rtc_info *info = platform_get_drvdata(pdev); + struct rtc_device *rtc = info->rtc_dev; + + iounmap(info->rtc_base); + release_resource(dev_get_drvdata(&rtc->dev)); + release_resource(info->mem_res); + kfree(info); + + return 0; +} + +#ifdef CONFIG_OF +static struct of_device_id ls2x_rtc_id_table[] = { + {.compatible = "loongson,ls2h-rtc"}, + {.compatible = "loongson,ls2k-rtc"}, + {.compatible = "loongson,ls7a-rtc"}, + {}, +}; +#endif + +static const struct acpi_device_id ls2x_rtc_acpi_match[] = { + {"LOON0001"}, + {} +}; +MODULE_DEVICE_TABLE(acpi, ls2x_rtc_acpi_match); + +static struct platform_driver ls2x_rtc_driver = { + .probe = ls2x_rtc_probe, + .remove = ls2x_rtc_remove, + .driver = { + .name = "ls2x-rtc", + .owner = THIS_MODULE, +#ifdef CONFIG_OF + .of_match_table = of_match_ptr(ls2x_rtc_id_table), +#endif + .acpi_match_table = ACPI_PTR(ls2x_rtc_acpi_match), + }, +}; + +static int __init rtc_init(void) +{ + return platform_driver_register(&ls2x_rtc_driver); +} + +static void __exit rtc_exit(void) +{ + platform_driver_unregister(&ls2x_rtc_driver); +} + +module_init(rtc_init); +module_exit(rtc_exit); + +MODULE_AUTHOR("Liu Shaozong"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ls2x-rtc"); -- Gitee From 69f6a2d90969011be05cf1c3303cb70c695aa35a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 54/59] Input: i8042 - Add PNP checking hook for Loongson Add PNP checking related functions for Loongson, so that i8042 driver can work well under the ACPI firmware with PNP typed keyboard and mouse configured in DSDT. Change-Id: I2b24ebe336db20f74e34a164924baafacffa767e Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen --- drivers/input/serio/i8042-loongsonio.h | 330 +++++++++++++++++++++++++ drivers/input/serio/i8042.h | 2 + 2 files changed, 332 insertions(+) create mode 100644 drivers/input/serio/i8042-loongsonio.h diff --git a/drivers/input/serio/i8042-loongsonio.h b/drivers/input/serio/i8042-loongsonio.h new file mode 100644 index 000000000000..2ea83b14f13d --- /dev/null +++ b/drivers/input/serio/i8042-loongsonio.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * i8042-loongsonio.h + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Jianmin Lv + * Huacai Chen + */ + +#ifndef _I8042_LOONGSONIO_H +#define _I8042_LOONGSONIO_H + +/* + * Names. + */ + +#define I8042_KBD_PHYS_DESC "isa0060/serio0" +#define I8042_AUX_PHYS_DESC "isa0060/serio1" +#define I8042_MUX_PHYS_DESC "isa0060/serio%d" + +/* + * IRQs. + */ +#define I8042_MAP_IRQ(x) (x) + +#define I8042_KBD_IRQ i8042_kbd_irq +#define I8042_AUX_IRQ i8042_aux_irq + +static int i8042_kbd_irq; +static int i8042_aux_irq; + +/* + * Register numbers. + */ + +#define I8042_COMMAND_REG i8042_command_reg +#define I8042_STATUS_REG i8042_command_reg +#define I8042_DATA_REG i8042_data_reg + +static int i8042_command_reg = 0x64; +static int i8042_data_reg = 0x60; + + +static inline int i8042_read_data(void) +{ + return inb(I8042_DATA_REG); +} + +static inline int i8042_read_status(void) +{ + return inb(I8042_STATUS_REG); +} + +static inline void i8042_write_data(int val) +{ + outb(val, I8042_DATA_REG); +} + +static inline void i8042_write_command(int val) +{ + outb(val, I8042_COMMAND_REG); +} + +#ifdef CONFIG_PNP +#include + +static bool i8042_pnp_kbd_registered; +static unsigned int i8042_pnp_kbd_devices; +static bool i8042_pnp_aux_registered; +static unsigned int i8042_pnp_aux_devices; + +static int i8042_pnp_command_reg; +static int i8042_pnp_data_reg; +static int i8042_pnp_kbd_irq; +static int i8042_pnp_aux_irq; + +static char i8042_pnp_kbd_name[32]; +static char i8042_pnp_aux_name[32]; + +static void i8042_pnp_id_to_string(struct pnp_id *id, char *dst, int dst_size) +{ + strlcpy(dst, "PNP:", dst_size); + + while (id) { + strlcat(dst, " ", dst_size); + strlcat(dst, id->id, dst_size); + id = id->next; + } +} + +static int i8042_pnp_kbd_probe(struct pnp_dev *dev, + const struct pnp_device_id *did) +{ + if (pnp_port_valid(dev, 0) && pnp_port_len(dev, 0) == 1) + i8042_pnp_data_reg = pnp_port_start(dev, 0); + + if (pnp_port_valid(dev, 1) && pnp_port_len(dev, 1) == 1) + i8042_pnp_command_reg = pnp_port_start(dev, 1); + + if (pnp_irq_valid(dev, 0)) + i8042_pnp_kbd_irq = pnp_irq(dev, 0); + + strlcpy(i8042_pnp_kbd_name, did->id, sizeof(i8042_pnp_kbd_name)); + if (strlen(pnp_dev_name(dev))) { + strlcat(i8042_pnp_kbd_name, ":", sizeof(i8042_pnp_kbd_name)); + strlcat(i8042_pnp_kbd_name, pnp_dev_name(dev), + sizeof(i8042_pnp_kbd_name)); + } + i8042_pnp_id_to_string(dev->id, i8042_kbd_firmware_id, + sizeof(i8042_kbd_firmware_id)); + + /* Keyboard ports are always supposed to be wakeup-enabled */ + device_set_wakeup_enable(&dev->dev, true); + + i8042_pnp_kbd_devices++; + return 0; +} + +static int i8042_pnp_aux_probe(struct pnp_dev *dev, + const struct pnp_device_id *did) +{ + if (pnp_port_valid(dev, 0) && pnp_port_len(dev, 0) == 1) + i8042_pnp_data_reg = pnp_port_start(dev, 0); + + if (pnp_port_valid(dev, 1) && pnp_port_len(dev, 1) == 1) + i8042_pnp_command_reg = pnp_port_start(dev, 1); + + if (pnp_irq_valid(dev, 0)) + i8042_pnp_aux_irq = pnp_irq(dev, 0); + + strlcpy(i8042_pnp_aux_name, did->id, sizeof(i8042_pnp_aux_name)); + if (strlen(pnp_dev_name(dev))) { + strlcat(i8042_pnp_aux_name, ":", sizeof(i8042_pnp_aux_name)); + strlcat(i8042_pnp_aux_name, pnp_dev_name(dev), + sizeof(i8042_pnp_aux_name)); + } + i8042_pnp_id_to_string(dev->id, i8042_aux_firmware_id, + sizeof(i8042_aux_firmware_id)); + + i8042_pnp_aux_devices++; + return 0; +} + +static const struct pnp_device_id pnp_kbd_devids[] = { + { .id = "PNP0300", .driver_data = 0 }, + { .id = "PNP0301", .driver_data = 0 }, + { .id = "PNP0302", .driver_data = 0 }, + { .id = "PNP0303", .driver_data = 0 }, + { .id = "PNP0304", .driver_data = 0 }, + { .id = "PNP0305", .driver_data = 0 }, + { .id = "PNP0306", .driver_data = 0 }, + { .id = "PNP0309", .driver_data = 0 }, + { .id = "PNP030a", .driver_data = 0 }, + { .id = "PNP030b", .driver_data = 0 }, + { .id = "PNP0320", .driver_data = 0 }, + { .id = "PNP0343", .driver_data = 0 }, + { .id = "PNP0344", .driver_data = 0 }, + { .id = "PNP0345", .driver_data = 0 }, + { .id = "CPQA0D7", .driver_data = 0 }, + { .id = "", }, +}; +MODULE_DEVICE_TABLE(pnp, pnp_kbd_devids); + +static struct pnp_driver i8042_pnp_kbd_driver = { + .name = "i8042 kbd", + .id_table = pnp_kbd_devids, + .probe = i8042_pnp_kbd_probe, + .driver = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, + }, +}; + +static const struct pnp_device_id pnp_aux_devids[] = { + { .id = "AUI0200", .driver_data = 0 }, + { .id = "FJC6000", .driver_data = 0 }, + { .id = "FJC6001", .driver_data = 0 }, + { .id = "PNP0f03", .driver_data = 0 }, + { .id = "PNP0f0b", .driver_data = 0 }, + { .id = "PNP0f0e", .driver_data = 0 }, + { .id = "PNP0f12", .driver_data = 0 }, + { .id = "PNP0f13", .driver_data = 0 }, + { .id = "PNP0f19", .driver_data = 0 }, + { .id = "PNP0f1c", .driver_data = 0 }, + { .id = "SYN0801", .driver_data = 0 }, + { .id = "", }, +}; +MODULE_DEVICE_TABLE(pnp, pnp_aux_devids); + +static struct pnp_driver i8042_pnp_aux_driver = { + .name = "i8042 aux", + .id_table = pnp_aux_devids, + .probe = i8042_pnp_aux_probe, + .driver = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, + }, +}; + +static void i8042_pnp_exit(void) +{ + if (i8042_pnp_kbd_registered) { + i8042_pnp_kbd_registered = false; + pnp_unregister_driver(&i8042_pnp_kbd_driver); + } + + if (i8042_pnp_aux_registered) { + i8042_pnp_aux_registered = false; + pnp_unregister_driver(&i8042_pnp_aux_driver); + } +} +#ifdef CONFIG_ACPI +#include +#endif +static int __init i8042_pnp_init(void) +{ + char kbd_irq_str[4] = { 0 }, aux_irq_str[4] = { 0 }; + bool pnp_data_busted = false; + int err; + + if (i8042_nopnp) { + pr_info("PNP detection disabled\n"); + return 0; + } + + err = pnp_register_driver(&i8042_pnp_kbd_driver); + if (!err) + i8042_pnp_kbd_registered = true; + + err = pnp_register_driver(&i8042_pnp_aux_driver); + if (!err) + i8042_pnp_aux_registered = true; + + if (!i8042_pnp_kbd_devices && !i8042_pnp_aux_devices) { + i8042_pnp_exit(); + pr_info("PNP: No PS/2 controller found.\n"); +#ifdef CONFIG_ACPI + if (acpi_disabled == 0) + return -ENODEV; +#endif + pr_info("Probing ports directly.\n"); + return 0; + } + + if (i8042_pnp_kbd_devices) + snprintf(kbd_irq_str, sizeof(kbd_irq_str), + "%d", i8042_pnp_kbd_irq); + if (i8042_pnp_aux_devices) + snprintf(aux_irq_str, sizeof(aux_irq_str), + "%d", i8042_pnp_aux_irq); + + pr_info("PNP: PS/2 Controller [%s%s%s] at %#x,%#x irq %s%s%s\n", + i8042_pnp_kbd_name, + (i8042_pnp_kbd_devices && i8042_pnp_aux_devices) ? "," : "", + i8042_pnp_aux_name, + i8042_pnp_data_reg, i8042_pnp_command_reg, + kbd_irq_str, + (i8042_pnp_kbd_devices && i8042_pnp_aux_devices) ? "," : "", + aux_irq_str); + + if (((i8042_pnp_data_reg & ~0xf) == (i8042_data_reg & ~0xf) && + i8042_pnp_data_reg != i8042_data_reg) || + !i8042_pnp_data_reg) { + pr_warn("PNP: PS/2 controller has invalid data port %#x; using default %#x\n", + i8042_pnp_data_reg, i8042_data_reg); + i8042_pnp_data_reg = i8042_data_reg; + pnp_data_busted = true; + } + + if (((i8042_pnp_command_reg & ~0xf) == (i8042_command_reg & ~0xf) && + i8042_pnp_command_reg != i8042_command_reg) || + !i8042_pnp_command_reg) { + pr_warn("PNP: PS/2 controller has invalid command port %#x; using default %#x\n", + i8042_pnp_command_reg, i8042_command_reg); + i8042_pnp_command_reg = i8042_command_reg; + pnp_data_busted = true; + } + + if (!i8042_nokbd && !i8042_pnp_kbd_irq) { + pr_warn("PNP: PS/2 controller doesn't have KBD irq; using default %d\n", + i8042_kbd_irq); + i8042_pnp_kbd_irq = i8042_kbd_irq; + pnp_data_busted = true; + } + + if (!i8042_noaux && !i8042_pnp_aux_irq) { + if (!pnp_data_busted && i8042_pnp_kbd_irq) { + pr_warn("PNP: PS/2 appears to have AUX port disabled, " + "if this is incorrect please boot with i8042.nopnp\n"); + i8042_noaux = true; + } else { + pr_warn("PNP: PS/2 controller doesn't have AUX irq; using default %d\n", + i8042_aux_irq); + i8042_pnp_aux_irq = i8042_aux_irq; + } + } + + i8042_data_reg = i8042_pnp_data_reg; + i8042_command_reg = i8042_pnp_command_reg; + i8042_kbd_irq = i8042_pnp_kbd_irq; + i8042_aux_irq = i8042_pnp_aux_irq; + + return 0; +} + +#else /* !CONFIG_PNP */ +static inline int i8042_pnp_init(void) { return 0; } +static inline void i8042_pnp_exit(void) { } +#endif /* CONFIG_PNP */ + +static int __init i8042_platform_init(void) +{ + int retval; + + i8042_kbd_irq = I8042_MAP_IRQ(1); + i8042_aux_irq = I8042_MAP_IRQ(12); + + retval = i8042_pnp_init(); + if (retval) + return retval; + + return retval; +} + +static inline void i8042_platform_exit(void) +{ + i8042_pnp_exit(); +} + +#endif /* _I8042_LOONGSONIO_H */ diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h index 55381783dc82..166bd69841cf 100644 --- a/drivers/input/serio/i8042.h +++ b/drivers/input/serio/i8042.h @@ -19,6 +19,8 @@ #include "i8042-snirm.h" #elif defined(CONFIG_SPARC) #include "i8042-sparcio.h" +#elif defined(CONFIG_MACH_LOONGSON64) +#include "i8042-loongsonio.h" #elif defined(CONFIG_X86) || defined(CONFIG_IA64) #include "i8042-x86ia64io.h" #else -- Gitee From fca8e6933ff5569aca97d6a1fce3b47bf6658150 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 23 Oct 2021 10:14:30 +0800 Subject: [PATCH 55/59] USB: Fix OHCI/XHCI wakeup problems Change-Id: Ic774294e981d92f30b8c1026d1c0536b91fcf4d3 Signed-off-by: Huacai Chen --- drivers/usb/host/ohci-hub.c | 3 +++ drivers/usb/host/pci-quirks.c | 4 ++++ drivers/usb/host/xhci-pci.c | 10 ++++++++++ drivers/usb/host/xhci.h | 1 + 4 files changed, 18 insertions(+) diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 44504c1751e0..865123024568 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -91,6 +91,9 @@ __acquires(ohci->lock) update_done_list(ohci); ohci_work(ohci); + /* All ED unlinks should be finished, no need for SOF interrupts */ + ohci_writel(ohci, OHCI_INTR_SF, &ohci->regs->intrdisable); + /* * Some controllers don't handle "global" suspend properly if * there are unsuspended ports. For these controllers, put all diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index ef08d68b9714..2975e592e231 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -948,6 +948,10 @@ static void quirk_usb_disable_ehci(struct pci_dev *pdev) * booting from USB disk or using a usb keyboard */ hcc_params = readl(base + EHCI_HCC_PARAMS); + if (pdev->vendor == PCI_VENDOR_ID_LOONGSON && + pdev->device == PCI_DEVICE_ID_LOONGSON_EHCI) + hcc_params &= ~(0xffL << 8); + offset = (hcc_params >> 8) & 0xff; while (offset && --count) { pci_read_config_dword(pdev, offset, &cap); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 490ce38ae93d..ca8ef977d494 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -274,11 +274,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x0014) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_ZERO_64B_REGS; + xhci->quirks |= XHCI_LWP_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_ZERO_64B_REGS; + xhci->quirks |= XHCI_LWP_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_VIA) xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -519,6 +521,10 @@ static void xhci_pci_remove(struct pci_dev *dev) if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(dev, PCI_D3hot); + /* Workaround for decreasing power consumption after S5 */ + if (xhci->quirks & XHCI_LWP_QUIRK) + pci_set_power_state(dev, PCI_D3hot); + usb_hcd_pci_remove(dev); } @@ -664,6 +670,10 @@ static void xhci_pci_shutdown(struct usb_hcd *hcd) /* Yet another workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(pdev, PCI_D3hot); + + /* Workaround for decreasing power consumption after S5 */ + if (xhci->quirks & XHCI_LWP_QUIRK) + pci_set_power_state(pdev, PCI_D3hot); } #endif /* CONFIG_PM */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 679ef073d99d..bbee0c73c606 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1887,6 +1887,7 @@ struct xhci_hcd { #define XHCI_ZHAOXIN_HOST BIT_ULL(41) #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(42) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(43) +#define XHCI_LWP_QUIRK BIT_ULL(44) unsigned int num_active_eps; unsigned int limit_active_eps; -- Gitee From 99e8f8ece2cf96c4ea66984b5304dfbb1cdaada6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 22 Feb 2021 10:53:47 +0800 Subject: [PATCH 56/59] drm/radeon: Workaround radeon driver bug for Loongson Radeon driver can not handle the interrupt is faster than DMA data, so irq handler must update an old ih.rptr value in IH_RB_RPTR register to enable interrupt again when interrupt is faster than DMA data. Change-Id: I1ba74ebe1db2744896f11f16d584a3e5c6df0481 Signed-off-by: Huacai Chen Signed-off-by: Zhijie Zhang --- drivers/gpu/drm/radeon/cik.c | 1 + drivers/gpu/drm/radeon/evergreen.c | 1 + drivers/gpu/drm/radeon/r600.c | 1 + drivers/gpu/drm/radeon/si.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 5c42877fd6fb..de402657091e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8108,6 +8108,7 @@ int cik_irq_process(struct radeon_device *rdev) if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 14d90dc376e7..11e3e99a9f01 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4919,6 +4919,7 @@ int evergreen_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index d9a33ca768f3..cd5418ccf20e 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4331,6 +4331,7 @@ int r600_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 93dcab548a83..914b861df92c 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6443,6 +6443,7 @@ int si_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ -- Gitee From 3dcdaeba03cf236432086e0967785ae3b0b95360 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 22 May 2018 16:54:04 +0800 Subject: [PATCH 57/59] drm: Add Loongson LS7A Display-Controller driver Change-Id: Id3c3a7bebf1e95dcc882f69ba20ff3b7e57d275d Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 1 + drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-loongson.c | 6 +- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/loongson/Kconfig | 19 + drivers/gpu/drm/loongson/Makefile | 16 + drivers/gpu/drm/loongson/loongson_connector.c | 243 +++++ drivers/gpu/drm/loongson/loongson_crtc.c | 493 +++++++++++ drivers/gpu/drm/loongson/loongson_cursor.c | 203 +++++ drivers/gpu/drm/loongson/loongson_drv.c | 620 +++++++++++++ drivers/gpu/drm/loongson/loongson_drv.h | 216 +++++ drivers/gpu/drm/loongson/loongson_encoder.c | 105 +++ drivers/gpu/drm/loongson/loongson_i2c.c | 189 ++++ drivers/gpu/drm/loongson/loongson_i2c.h | 43 + drivers/gpu/drm/loongson/loongson_irq.c | 70 ++ drivers/gpu/drm/loongson/loongson_vbios.c | 836 ++++++++++++++++++ drivers/gpu/drm/loongson/loongson_vbios.h | 283 ++++++ 18 files changed, 3344 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/loongson/Kconfig create mode 100644 drivers/gpu/drm/loongson/Makefile create mode 100644 drivers/gpu/drm/loongson/loongson_connector.c create mode 100644 drivers/gpu/drm/loongson/loongson_crtc.c create mode 100644 drivers/gpu/drm/loongson/loongson_cursor.c create mode 100644 drivers/gpu/drm/loongson/loongson_drv.c create mode 100644 drivers/gpu/drm/loongson/loongson_drv.h create mode 100644 drivers/gpu/drm/loongson/loongson_encoder.c create mode 100644 drivers/gpu/drm/loongson/loongson_i2c.c create mode 100644 drivers/gpu/drm/loongson/loongson_i2c.h create mode 100644 drivers/gpu/drm/loongson/loongson_irq.c create mode 100644 drivers/gpu/drm/loongson/loongson_vbios.c create mode 100644 drivers/gpu/drm/loongson/loongson_vbios.h diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index ee29270ffbbf..2b3b357dc8bf 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -581,6 +581,7 @@ CONFIG_DRM_AMDGPU_SI=y CONFIG_DRM_AMDGPU_CIK=y CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y +CONFIG_DRM_LOONGSON=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y CONFIG_LCD_PLATFORM=m diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index c91355c89ec6..0cc4e0733d83 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -370,7 +370,7 @@ config GPIO_LOGICVC config GPIO_LOONGSON bool "Loongson-2/3 GPIO support" - depends on CPU_LOONGSON2EF || CPU_LOONGSON64 + depends on CPU_LOONGSON2EF || CPU_LOONGSON64 || LOONGARCH help driver for GPIO functionality on Loongson-2F/3A/3B processors. diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c index a42145873cc9..c47bcd33f1eb 100644 --- a/drivers/gpio/gpio-loongson.c +++ b/drivers/gpio/gpio-loongson.c @@ -22,10 +22,10 @@ #define STLS2F_N_GPIO 4 #define STLS3A_N_GPIO 16 -#ifdef CONFIG_CPU_LOONGSON64 -#define LOONGSON_N_GPIO STLS3A_N_GPIO -#else +#ifdef CONFIG_CPU_LOONGSON2EF #define LOONGSON_N_GPIO STLS2F_N_GPIO +#else +#define LOONGSON_N_GPIO STLS3A_N_GPIO #endif /* diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index ca868271f4c4..af407d01e1c4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -312,6 +312,8 @@ source "drivers/gpu/drm/udl/Kconfig" source "drivers/gpu/drm/ast/Kconfig" +source "drivers/gpu/drm/loongson/Kconfig" + source "drivers/gpu/drm/mgag200/Kconfig" source "drivers/gpu/drm/armada/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 81569009f884..e9dd6847c9fa 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ obj-$(CONFIG_DRM_GMA500) += gma500/ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ +obj-$(CONFIG_DRM_LOONGSON) +=loongson/ obj-$(CONFIG_DRM_ARMADA) += armada/ obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/ obj-y += rcar-du/ diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig new file mode 100644 index 000000000000..0c5f22703b6a --- /dev/null +++ b/drivers/gpu/drm/loongson/Kconfig @@ -0,0 +1,19 @@ + +config DRM_LOONGSON + tristate "DRM support for LS7A Display Controller" + depends on DRM && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER + select DRM_GEM_CMA_HELPER + select DRM_KMS_CMA_HELPER + default n + help + Support the display controllers found on the Loongson's LS7A + bridge. + + This driver provides no built-in acceleration; acceleration is + performed by Vivante GC1000. This driver provides kernel mode + setting and buffer management to userspace. diff --git a/drivers/gpu/drm/loongson/Makefile b/drivers/gpu/drm/loongson/Makefile new file mode 100644 index 000000000000..a5272137848c --- /dev/null +++ b/drivers/gpu/drm/loongson/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) +# + +ccflags-y := -Iinclude/drm +loongson-y := loongson_drv.o \ + loongson_i2c.o \ + loongson_irq.o \ + loongson_crtc.o \ + loongson_encoder.o \ + loongson_connector.o \ + loongson_cursor.o \ + loongson_vbios.o + +obj-$(CONFIG_DRM_LOONGSON) += loongson.o diff --git a/drivers/gpu/drm/loongson/loongson_connector.c b/drivers/gpu/drm/loongson/loongson_connector.c new file mode 100644 index 000000000000..f3cfa37c0e4c --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_connector.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "loongson_drv.h" + +/** + * loongson_connector_best_encoder + * + * @connector: point to the drm_connector structure + * + * Select the best encoder for the given connector. Used by both the helpers in + * drm_atomic_helper_check_modeset() and legacy CRTC helpers + */ +static struct drm_encoder *loongson_connector_best_encoder(struct drm_connector + *connector) +{ + struct drm_encoder *encoder; + + /* There is only one encoder per connector */ + drm_connector_for_each_possible_encoder(connector, encoder) + return encoder; + + return NULL; +} + +/** + * loongson_get_modes + * + * @connetcor: central DRM connector control structure + * + * Fill in all modes currently valid for the sink into the connector->probed_modes list. + * It should also update the EDID property by calling drm_connector_update_edid_property(). + */ +static int loongson_get_modes(struct drm_connector *connector) +{ + int id, ret = 0; + enum loongson_edid_method ledid_method; + struct edid *edid = NULL; + struct loongson_connector *lconnector = to_loongson_connector(connector); + struct i2c_adapter *adapter = lconnector->i2c->adapter; + + id = drm_connector_index(connector); + + ledid_method = lconnector->edid_method; + switch (ledid_method) { + case via_i2c: + case via_encoder: + default: + edid = drm_get_edid(connector, adapter); + break; + case via_vbios: + edid = kmalloc(EDID_LENGTH * 2, GFP_KERNEL); + memcpy(edid, lconnector->vbios_edid, EDID_LENGTH * 2); + } + + if (edid) { + drm_connector_update_edid_property(connector, edid); + ret = drm_add_edid_modes(connector, edid); + kfree(edid); + } else { + ret += drm_add_modes_noedid(connector, 1920, 1080); + drm_set_preferred_mode(connector, 1024, 768); + } + + return ret; +} + +static bool is_connected(struct loongson_connector *lconnector) +{ + unsigned char start = 0x0; + struct i2c_adapter *adapter; + struct i2c_msg msgs = { + .addr = DDC_ADDR, + .len = 1, + .flags = 0, + .buf = &start, + }; + + if (!lconnector->i2c) + return false; + + adapter = lconnector->i2c->adapter; + if (i2c_transfer(adapter, &msgs, 1) < 1) { + DRM_DEBUG_KMS("display-%d not connect\n", lconnector->id); + return false; + } + + return true; +} + +/** + * loongson_connector_detect + * + * @connector: point to drm_connector + * @force: bool + * + * Check to see if anything is attached to the connector. + * The parameter force is set to false whilst polling, + * true when checking the connector due to a user request + */ +static enum drm_connector_status loongson_connector_detect(struct drm_connector + *connector, bool force) +{ + int i, r; + enum loongson_edid_method ledid_method; + enum drm_connector_status ret = connector_status_connected; + struct loongson_connector *lconnector = to_loongson_connector(connector); + + i = drm_connector_index(connector); + + ledid_method = lconnector->edid_method; + DRM_DEBUG("loongson_vga_detect connector_id=%d, ledid_method=%d\n", i, ledid_method); + + if (ledid_method != via_vbios) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + + if (is_connected(lconnector)) + ret = connector_status_connected; + else + ret = connector_status_disconnected; + + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + + return ret; +} + +/** + * These provide the minimum set of functions required to handle a connector + * + * Helper operations for connectors.These functions are used + * by the atomic and legacy modeset helpers and by the probe helpers. + */ +static const struct drm_connector_helper_funcs loongson_vga_connector_helper_funcs = { + .get_modes = loongson_get_modes, + .best_encoder = loongson_connector_best_encoder, +}; + +/** + * These provide the minimum set of functions required to handle a connector + * + * Control connectors on a given device. + * The functions below allow the core DRM code to control connectors, + * enumerate available modes and so on. + */ +static const struct drm_connector_funcs loongson_vga_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = loongson_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const unsigned short normal_i2c[] = { 0x50, I2C_CLIENT_END }; + +/** + * loongson_connector_init + * + * @dev: drm device + * @connector_id: + * + * Vga is the interface between host and monitor + * This function is to init vga + */ +struct drm_connector *loongson_connector_init(struct drm_device *dev, unsigned int index) +{ + struct i2c_adapter *adapter; + struct i2c_client *ddc_client; + struct drm_connector *connector; + struct loongson_encoder *loongson_encoder; + struct loongson_connector *loongson_connector; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + + const struct i2c_board_info ddc_info = { + .type = "ddc-dev", + .addr = DDC_ADDR, + .flags = I2C_CLASS_DDC, + }; + + loongson_encoder = ldev->mode_info[index].encoder; + adapter = loongson_encoder->i2c->adapter; + ddc_client = i2c_new_client_device(adapter, &ddc_info); + if (IS_ERR(ddc_client)) { + i2c_del_adapter(adapter); + DRM_ERROR("Failed to create standard ddc client\n"); + return NULL; + } + + loongson_connector = kzalloc(sizeof(struct loongson_connector), GFP_KERNEL); + if (!loongson_connector) + return NULL; + + ldev->connector_active0 = 0; + ldev->connector_active1 = 0; + loongson_connector->id = index; + loongson_connector->ldev = ldev; + loongson_connector->type = get_connector_type(ldev, index); + loongson_connector->i2c_id = get_connector_i2cid(ldev, index); + loongson_connector->hotplug = get_hotplug_mode(ldev, index); + loongson_connector->edid_method = get_edid_method(ldev, index); + if (loongson_connector->edid_method == via_vbios) + loongson_connector->vbios_edid = get_vbios_edid(ldev, index); + + loongson_connector->i2c = &ldev->i2c_bus[index]; + if (!loongson_connector->i2c) + DRM_INFO("connector-%d match i2c-%d err\n", index, + loongson_connector->i2c_id); + + connector = &loongson_connector->base; + + drm_connector_helper_add(connector, &loongson_vga_connector_helper_funcs); + + drm_connector_init(dev, connector, + &loongson_vga_connector_funcs, DRM_MODE_CONNECTOR_VGA); + + drm_connector_register(connector); + + return connector; +} diff --git a/drivers/gpu/drm/loongson/loongson_crtc.c b/drivers/gpu/drm/loongson/loongson_crtc.c new file mode 100644 index 000000000000..b6909f9a5f11 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_crtc.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * Jiaxun Yang + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "loongson_drv.h" + +/** + * This file contains setup code for the CRTC + */ + +DEFINE_SPINLOCK(loongson_crtc_lock); + +static int loongson_crtc_enable_vblank(struct drm_crtc *crtc) +{ + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = lcrtc->ldev; + + if (lcrtc->crtc_id == 0) { + ldev->int_reg |= (BIT(INT_DVO0_FB_END) << 16); + } else { + ldev->int_reg |= (BIT(INT_DVO1_FB_END) << 16); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, ldev->mmio + FB_INT_REG); + spin_unlock(&loongson_reglock); + + return 0; +} + +static void loongson_crtc_disable_vblank(struct drm_crtc *crtc) +{ + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = lcrtc->ldev; + + + if (lcrtc->crtc_id == 0) { + ldev->int_reg &= (~BIT(INT_DVO0_FB_END) << 16); + } else { + ldev->int_reg &= (~BIT(INT_DVO1_FB_END) << 16); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, ldev->mmio + FB_INT_REG); + spin_unlock(&loongson_reglock); +} + +#define PLL_REF_CLK_MHZ 100 +#define PCLK_PRECISION_INDICATOR 10000 + +/** + * cal_freq + * + * @pixclock: unsigned int + * @pll_config: point to the pix_pll structure + * + * Calculate frequency + */ +static unsigned int cal_freq(unsigned int pixclock, struct pix_pll *pll_config) +{ + int i, j, loopc_offset; + unsigned int refc_set[] = {4, 5, 3}; + unsigned int prec_set[] = {1, 5, 10, 50, 100}; /*in 1/PCLK_PRECISION_INDICATOR*/ + unsigned int pstdiv, loopc, refc; + unsigned int precision_req, precision; + unsigned int loopc_min, loopc_max, loopc_mid; + unsigned long long real_dvo, req_dvo; + + /*try precision from high to low*/ + for (j = 0; j < sizeof(prec_set)/sizeof(int); j++) { + precision_req = prec_set[j]; + + /*try each refc*/ + for (i = 0; i < sizeof(refc_set)/sizeof(int); i++) { + refc = refc_set[i]; + loopc_min = (1200 / PLL_REF_CLK_MHZ) * refc; /*1200 / (PLL_REF_CLK_MHZ / refc)*/ + loopc_max = (3200 / PLL_REF_CLK_MHZ) * refc; /*3200 / (PLL_REF_CLK_MHZ / refc)*/ + loopc_mid = (2200 / PLL_REF_CLK_MHZ) * refc; /*(loopc_min + loopc_max) / 2;*/ + loopc_offset = -1; + + /*try each loopc*/ + for (loopc = loopc_mid; (loopc <= loopc_max) && (loopc >= loopc_min); loopc += loopc_offset) { + if (loopc_offset < 0) + loopc_offset = -loopc_offset; + else + loopc_offset = -(loopc_offset+1); + + pstdiv = loopc * PLL_REF_CLK_MHZ * 1000 / refc / pixclock; + if ((pstdiv > 127) || (pstdiv < 1)) + continue; + + /*real_freq is float type which is not available, but read_freq * pstdiv is available.*/ + req_dvo = (pixclock * pstdiv); + real_dvo = (loopc * PLL_REF_CLK_MHZ * 1000 / refc); + precision = abs(real_dvo * PCLK_PRECISION_INDICATOR / req_dvo - PCLK_PRECISION_INDICATOR); + + if (precision < precision_req) { + pll_config->l2_div = pstdiv; + pll_config->l1_loopc = loopc; + pll_config->l1_frefc = refc; + if (j > 1) + printk("Warning: PIX clock precision degraded to %d / %d\n", precision_req, PCLK_PRECISION_INDICATOR); + return 1; + } + } + } + } + return 0; +} + +/** + * config_pll + * + * @pll_base: represent a long type + * @pll_cfg: point to the pix_pll srtucture + * + * Config pll apply to ls7a + */ +static void config_pll(void *pll_base, struct pix_pll *pll_cfg) +{ + unsigned long val; + + /* clear sel_pll_out0 */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 8); + writel(val, pll_base + 0x4); + /* set pll_pd */ + val = readl(pll_base + 0x4); + val |= (1UL << 13); + writel(val, pll_base + 0x4); + /* clear set_pll_param */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 11); + writel(val, pll_base + 0x4); + /* clear old value & config new value */ + val = readl(pll_base + 0x4); + val &= ~(0x7fUL << 0); + val |= (pll_cfg->l1_frefc << 0); /* refc */ + writel(val, pll_base + 0x4); + val = readl(pll_base + 0x0); + val &= ~(0x7fUL << 0); + val |= (pll_cfg->l2_div << 0); /* div */ + val &= ~(0x1ffUL << 21); + val |= (pll_cfg->l1_loopc << 21);/* loopc */ + writel(val, pll_base + 0x0); + /* set set_pll_param */ + val = readl(pll_base + 0x4); + val |= (1UL << 11); + writel(val, pll_base + 0x4); + /* clear pll_pd */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 13); + writel(val, pll_base + 0x4); + /* wait pll lock */ + while (!(readl(pll_base + 0x4) & 0x80)) + cpu_relax(); + /* set sel_pll_out0 */ + val = readl(pll_base + 0x4); + val |= (1UL << 8); + writel(val, pll_base + 0x4); +} + +static void loongson_config_pll(int id, unsigned int pix_freq) +{ + unsigned int out; + struct pix_pll pll_cfg; + + out = cal_freq(pix_freq, &pll_cfg); + if (id == 0) + config_pll(LS7A_PIX0_PLL, &pll_cfg); + else + config_pll(LS7A_PIX1_PLL, &pll_cfg); +} + +/** + * These provide the minimum set of functions required to handle a CRTC + * Each driver is responsible for filling out this structure at startup time + * + * The drm_crtc_funcs structure is the central CRTC management structure + * in the DRM. Each CRTC controls one or more connectors + */ +static const struct drm_crtc_funcs loongson_crtc_funcs = { + .cursor_set2 = loongson_crtc_cursor_set2, + .cursor_move = loongson_crtc_cursor_move, + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = loongson_crtc_enable_vblank, + .disable_vblank = loongson_crtc_disable_vblank, +}; + +static const uint32_t loongson_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static const uint64_t loongson_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static enum drm_mode_status loongson_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + int id = crtc->index; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + + if (mode->hdisplay > get_crtc_max_width(ldev, id)) + return MODE_BAD; + if (mode->vdisplay > get_crtc_max_height(ldev, id)) + return MODE_BAD; + if (ldev->num_crtc == 1) { + if (mode->hdisplay % 16) + return MODE_BAD; + } else { + if (mode->hdisplay % 64) + return MODE_BAD; + } + + return MODE_OK; +} + +u32 crtc_read(struct loongson_crtc *lcrtc, u32 offset) +{ + struct loongson_drm_device *ldev = lcrtc->ldev; + return readl(ldev->mmio + offset + (lcrtc->crtc_id * CRTC_REG_OFFSET)); +} + +void crtc_write(struct loongson_crtc *lcrtc, u32 offset, u32 val) +{ + struct loongson_drm_device *ldev = lcrtc->ldev; + writel(val, ldev->mmio + offset + (lcrtc->crtc_id * CRTC_REG_OFFSET)); +} + +static void loongson_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + unsigned int hr, hss, hse, hfl; + unsigned int vr, vss, vse, vfl; + unsigned int pix_freq; + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + + hr = mode->hdisplay; + hss = mode->hsync_start; + hse = mode->hsync_end; + hfl = mode->htotal; + + vr = mode->vdisplay; + vss = mode->vsync_start; + vse = mode->vsync_end; + vfl = mode->vtotal; + + pix_freq = mode->clock; + + DRM_DEBUG("crtc_id = %d, hr = %d, hss = %d, hse = %d, hfl = %d, vr = %d, vss = %d, vse = %d, vfl = %d, pix_freq = %d,\n", + lcrtc->crtc_id, hr, hss, hse, hfl, vr, vss, vse, vfl, pix_freq); + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_DITCFG_DVO_REG, 0); + crtc_write(lcrtc, FB_DITTAB_LO_DVO_REG, 0); + crtc_write(lcrtc, FB_DITTAB_HI_DVO_REG, 0); + crtc_write(lcrtc, FB_PANCFG_DVO_REG, 0x80001311); + crtc_write(lcrtc, FB_PANTIM_DVO_REG, 0); + + crtc_write(lcrtc, FB_HDISPLAY_DVO_REG, (mode->crtc_htotal << 16) | mode->crtc_hdisplay); + crtc_write(lcrtc, FB_HSYNC_DVO_REG, 0x40000000 | (mode->crtc_hsync_end << 16) | mode->crtc_hsync_start); + + crtc_write(lcrtc, FB_VDISPLAY_DVO_REG, (mode->crtc_vtotal << 16) | mode->crtc_vdisplay); + crtc_write(lcrtc, FB_VSYNC_DVO_REG, 0x40000000 | (mode->crtc_vsync_end << 16) | mode->crtc_vsync_start); + + crtc_write(lcrtc, FB_STRI_DVO_REG, (crtc->primary->state->fb->pitches[0] + 255) & ~255); + + DRM_DEBUG("Stride: %x\n", (crtc->primary->state->fb->pitches[0] + 255) & ~255); + + switch (crtc->primary->state->fb->format->format) { + case DRM_FORMAT_RGB565: + lcrtc->cfg_reg |= 0x3; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + break; + case DRM_FORMAT_RGB888: + default: + lcrtc->cfg_reg |= 0x4; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + break; + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + loongson_config_pll(lcrtc->crtc_id, mode->clock); +} + +static void loongson_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) +{ + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + + if (lcrtc->cfg_reg & CFG_ENABLE) + goto vblank_on; + + lcrtc->cfg_reg |= CFG_ENABLE; + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); + +vblank_on: + drm_crtc_vblank_on(crtc); +} + +static void loongson_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) +{ + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + + lcrtc->cfg_reg &= ~CFG_ENABLE; + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); + + spin_lock_irq(&crtc->dev->event_lock); + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + spin_unlock_irq(&crtc->dev->event_lock); + + drm_crtc_vblank_off(crtc); +} + +static void loongson_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ + struct drm_pending_vblank_event *event = crtc->state->event; + + if (event) { + crtc->state->event = NULL; + + spin_lock_irq(&crtc->dev->event_lock); + if (drm_crtc_vblank_get(crtc) == 0) + drm_crtc_arm_vblank_event(crtc, event); + else + drm_crtc_send_vblank_event(crtc, event); + spin_unlock_irq(&crtc->dev->event_lock); + } +} + +static void loongson_plane_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + int id, clonemode; + unsigned int pitch; + unsigned long flags; + struct loongson_crtc *lcrtc; + struct loongson_drm_device *ldev; + struct drm_plane_state *state = plane->state; + + if (!state->crtc || !state->fb) + return; + + pitch = state->fb->pitches[0]; + lcrtc = to_loongson_crtc(state->crtc); + ldev = lcrtc->ldev; + id = lcrtc->crtc_id; + clonemode = clone_mode(ldev); + + /* CRTC1 cloned from CRTC0 in clone mode */ + if (clonemode) + ldev->lcrtc[1].cfg_reg |= CFG_PANELSWITCH; + else + ldev->lcrtc[1].cfg_reg &= ~CFG_PANELSWITCH; + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_STRI_DVO_REG, (pitch + 255) & ~255); + if (crtc_read(lcrtc, FB_CFG_DVO_REG) & CFG_FBNUM) + crtc_write(lcrtc, FB_ADDR0_DVO_REG, drm_fb_cma_get_gem_addr(state->fb, state, 0)); + else + crtc_write(lcrtc, FB_ADDR1_DVO_REG, drm_fb_cma_get_gem_addr(state->fb, state, 0)); + + lcrtc->cfg_reg |= CFG_ENABLE; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg | CFG_FBSWITCH); + if (clonemode) { + if (id) + crtc_write(&ldev->lcrtc[0], FB_CFG_DVO_REG, ldev->lcrtc[0].cfg_reg | CFG_ENABLE); + else + crtc_write(&ldev->lcrtc[1], FB_CFG_DVO_REG, ldev->lcrtc[1].cfg_reg | CFG_ENABLE); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + udelay(3000); +} + +/** + * These provide the minimum set of functions required to handle a CRTC + * + * The drm_crtc_helper_funcs is a helper operations for CRTC + */ +static const struct drm_crtc_helper_funcs loongson_crtc_helper_funcs = { + .mode_valid = loongson_crtc_mode_valid, + .mode_set_nofb = loongson_crtc_mode_set_nofb, + .atomic_enable = loongson_crtc_atomic_enable, + .atomic_disable = loongson_crtc_atomic_disable, + .atomic_flush = loongson_crtc_atomic_flush, +}; + +static void loongson_plane_destroy(struct drm_plane *plane) +{ + drm_plane_cleanup(plane); +} + +static bool loongson_format_mod_supported(struct drm_plane *plane, + uint32_t format, uint64_t modifier) +{ + return (modifier == DRM_FORMAT_MOD_LINEAR); +} + +static const struct drm_plane_funcs loongson_plane_funcs = { + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, + .destroy = loongson_plane_destroy, + .disable_plane = drm_atomic_helper_disable_plane, + .reset = drm_atomic_helper_plane_reset, + .update_plane = drm_atomic_helper_update_plane, + .format_mod_supported = loongson_format_mod_supported, +}; + +static const struct drm_plane_helper_funcs loongson_plane_helper_funcs = { + .atomic_update = loongson_plane_atomic_update, +}; + +/** + * loongosn_crtc_init + * + * @ldev: point to the loongson_drm_device structure + * + * Init CRTC + */ +int loongson_crtc_init(struct loongson_drm_device *ldev) +{ + int i, ret; + + for (i = 0; i < ldev->num_crtc; i++) { + ldev->lcrtc[i].ldev = ldev; + ldev->lcrtc[i].crtc_id = i; + + ldev->lcrtc[i].cfg_reg = CFG_RESET; + ldev->lcrtc[i].primary = devm_kzalloc(ldev->dev->dev, sizeof(*ldev->lcrtc[i].primary), GFP_KERNEL); + if (!ldev->lcrtc[i].primary) + return -ENOMEM; + + ret = drm_universal_plane_init(ldev->dev, ldev->lcrtc[i].primary, BIT(i), &loongson_plane_funcs, + loongson_formats, ARRAY_SIZE(loongson_formats), + loongson_format_modifiers, DRM_PLANE_TYPE_PRIMARY, NULL); + if (ret) + return ret; + + drm_plane_helper_add(ldev->lcrtc[i].primary, &loongson_plane_helper_funcs); + + ret = drm_crtc_init_with_planes(ldev->dev, &ldev->lcrtc[i].base, ldev->lcrtc[i].primary, NULL, + &loongson_crtc_funcs, NULL); + if (ret) { + loongson_plane_destroy(ldev->lcrtc[i].primary); + return ret; + } + drm_crtc_helper_add(&ldev->lcrtc[i].base, &loongson_crtc_helper_funcs); + } + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_cursor.c b/drivers/gpu/drm/loongson/loongson_cursor.c new file mode 100644 index 000000000000..e993201e68bc --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_cursor.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include "loongson_drv.h" + +static void flush_scache_range(void *addr, unsigned long size) +{ + int inc; + unsigned long start, end; + + start = (unsigned long)addr; + end = (unsigned long)addr + size; + inc = cpu_scache_line_size(); + + for (; start < end; start += inc) + flush_scache_line(start); +} + +/* + Hide the cursor off screen. We can't disable the cursor hardware because it + takes too long to re-activate and causes momentary corruption +*/ +static void loongson_hide_cursor(struct drm_crtc *crtc) +{ + unsigned long flags; + volatile void __iomem *base; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + unsigned int tmp, crtc_id = loongson_crtc->crtc_id; + + base = ldev->mmio; + tmp = readl(base + FB_CUR_CFG_REG); + tmp &= ~0xff; + if (clone_mode(ldev)) { + spin_lock_irqsave(&loongson_reglock, flags); + writel(tmp | 0x00, base + FB_CUR_CFG_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_showed = false; + } else { + if (ldev->cursor_crtc_id != crtc_id) + return; + + spin_lock_irqsave(&loongson_reglock, flags); + if (crtc_id) { + writel(tmp | 0x10, base + FB_CUR_CFG_REG); + } else { + writel(tmp | 0x00, base + FB_CUR_CFG_REG); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_showed = false; + } +} + +static void loongson_show_cursor(struct drm_crtc *crtc) +{ + unsigned long flags; + volatile void __iomem *base; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + unsigned int crtc_id = loongson_crtc->crtc_id; + + base = ldev->mmio; + if (clone_mode(ldev)) { + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x00050202, base + FB_CUR_CFG_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_crtc_id = 0; + ldev->cursor_showed = true; + } else { + if (ldev->cursor_crtc_id == crtc_id) { + spin_lock_irqsave(&loongson_reglock, flags); + if (crtc_id == 0) { + writel(0x00050202, base + FB_CUR_CFG_REG); + } else { + writel(0x00050212, base + FB_CUR_CFG_REG); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + ldev->cursor_showed = true; + ldev->cursor_crtc_id = crtc_id; + } + } +} + +int loongson_crtc_cursor_set2(struct drm_crtc *crtc, + struct drm_file *file_priv, + uint32_t handle, + uint32_t width, + uint32_t height, + int32_t hot_x, int32_t hot_y) +{ + u32 gpu_addr; + unsigned long flags; + unsigned int crtc_id; + volatile void __iomem *base; + struct drm_gem_object *obj; + struct drm_device *dev = crtc->dev; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct drm_gem_cma_object *cma, *cursor = ldev->cursor; + + base = ldev->mmio; + crtc_id = loongson_crtc->crtc_id; + + if ((width != 32 || height != 32) && handle) { + return -EINVAL; + } + + if (!handle || !file_priv) { + loongson_hide_cursor(crtc); + return 0; + } + + obj = drm_gem_object_lookup(file_priv, handle); + if (!obj) + return -ENOENT; + + cma = to_drm_gem_cma_obj(obj); + + flush_scache_range(cma->vaddr, 32*32*4); + memcpy(cursor->vaddr, cma->vaddr, 32*32*4); + + /* Program gpu address of cursor buffer */ + gpu_addr = ldev->cursor->paddr; + spin_lock_irqsave(&loongson_reglock, flags); + writel(gpu_addr, base + FB_CUR_ADDR_REG); + writel(0x00eeeeee, base + FB_CUR_BACK_REG); + writel(0x00aaaaaa, base + FB_CUR_FORE_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + + loongson_show_cursor(crtc); + + drm_gem_object_put(obj); + + return 0; +} + +int loongson_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) +{ + unsigned long flags; + unsigned int tmp, crtc_id; + int xorign = 0, yorign = 0; + volatile void __iomem *base; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = (struct loongson_drm_device *)crtc->dev->dev_private; + + base = ldev->mmio; + crtc_id = loongson_crtc->crtc_id; + + /* upper edge condition */ + yorign = y + crtc->y; + if (yorign < 0) + y = 0; + + /* left edge condition */ + xorign = x + crtc->x; + if (xorign < 0) + x = 0; + + /* move from one crtc to another, check which crtc should he shown + * the x or y < 0, it means the cursor it out of current review, + * && xorign/ yorign > 0, it means the cursor is in the framebuffer + * but not in curren review */ + if ((x < 0 && xorign > 0) || (y < 0 && yorign > 0)) { + if (ldev->cursor_crtc_id == crtc_id && !clone_mode(ldev)) + /*the cursor is not show, so hide if the (x,y) is in active crtc*/ + loongson_hide_cursor(crtc); + return 0; + } + + if (x < 0) + x = 0; + if (y < 0) + y = 0; + + tmp = x & 0xffff; + tmp |= y << 16; + spin_lock_irqsave(&loongson_reglock, flags); + writel(tmp, base + FB_CUR_LOC_ADDR_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + if (ldev->cursor_crtc_id != crtc_id && !clone_mode(ldev)) { + ldev->cursor_crtc_id = crtc_id; + ldev->cursor_showed = false; + } + if (!ldev->cursor_showed) + loongson_show_cursor(crtc); + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_drv.c b/drivers/gpu/drm/loongson/loongson_drv.c new file mode 100644 index 000000000000..35320f8465e0 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_drv.c @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "loongson_drv.h" + +#define DEVICE_NAME "loongson-drm" +#define DRIVER_NAME "loongson-drm" +#define DRIVER_DESC "Loongson DRM Driver" +#define DRIVER_DATE "20201201" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 1 + +static bool poll_connector; +module_param_named(poll, poll_connector, bool, 0600); + +DEFINE_SPINLOCK(loongson_reglock); + +/** + * loongson_mode_funcs---basic driver provided mode setting functions + * + * Some global (i.e. not per-CRTC, connector, etc) mode setting functions that + * involve drivers. + */ +static const struct drm_mode_config_funcs loongson_mode_funcs = { + .fb_create = drm_gem_fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, + .output_poll_changed = drm_fb_helper_output_poll_changed +}; + +/** + * loongson_drm_device_init ----init drm device + * + * @dev pointer to drm_device structure + * @flags start up flag + * + * RETURN + * drm device init result + */ +static int loongson_drm_device_init(struct drm_device *dev, uint32_t flags) +{ + int ret = 0; + struct loongson_drm_device *ldev = dev->dev_private; + + ldev->num_crtc = 2; + loongson_vbios_init(ldev); + + /*BAR 0 contains registers */ + ldev->mmio_base = pci_resource_start(ldev->dev->pdev, 0); + ldev->mmio_size = pci_resource_len(ldev->dev->pdev, 0); + + ldev->mmio = pcim_iomap(dev->pdev, 0, 0); + if (ldev->mmio == NULL) + return -ENOMEM; + + DRM_INFO("ldev->mmio_base = 0x%llx, ldev->mmio_size = 0x%llx\n", + ldev->mmio_base, ldev->mmio_size); + + if (!devm_request_mem_region(ldev->dev->dev, ldev->mmio_base, ldev->mmio_size, + "loongson_drmfb_mmio")) { + DRM_ERROR("Can't reserve mmio registers\n"); + return -ENOMEM; + } + + ret = loongson_gpio_init(ldev); + if (ret < 0) + DRM_ERROR("Failed to initialize dc gpios\n"); + + return ret; +} + +/** + * loongson_modeset_init --- init kernel mode setting + * + * @ldev: pointer to loongson_drm_device structure + * + * RETURN + * return init result + */ +int loongson_modeset_init(struct loongson_drm_device *ldev) +{ + int i, ret; + struct drm_encoder *encoder; + struct drm_connector *connector; + + ldev->mode_info[0].mode_config_initialized = true; + ldev->mode_info[1].mode_config_initialized = true; + + ldev->dev->mode_config.max_width = LOONGSON_MAX_FB_WIDTH; + ldev->dev->mode_config.max_height = LOONGSON_MAX_FB_HEIGHT; + + ldev->dev->mode_config.cursor_width = 32; + ldev->dev->mode_config.cursor_height = 32; + + ldev->dev->mode_config.allow_fb_modifiers = true; + + ret = loongson_i2c_init(ldev); + if (ret < 0) { + DRM_ERROR("Failed to initialize i2c\n"); + return ret; + } + + loongson_crtc_init(ldev); + + for (i = 0; i < ldev->num_crtc; i++) { + DRM_DEBUG("loongson drm encoder init\n"); + ldev->mode_info[i].crtc = &ldev->lcrtc[i]; + encoder = loongson_encoder_init(ldev->dev, i); + if (!encoder) { + DRM_ERROR("loongson_encoder_init failed\n"); + return -1; + } + ldev->mode_info[i].encoder = to_loongson_encoder(encoder); + + DRM_DEBUG("loongson drm connector init\n"); + connector = loongson_connector_init(ldev->dev, i); + if (!connector) { + DRM_ERROR("loongson_connector_init failed\n"); + return -1; + } + ldev->mode_info[i].connector = to_loongson_connector(connector); + + drm_connector_attach_encoder(connector, encoder); + if (poll_connector) + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + } + + return 0; +} + +/** + * loongson_modeset_fini --- deinit kernel mode setting + * + * @ldev: pointer to loongson_drm_device structure + * + * RETURN + */ +void loongson_modeset_fini(struct loongson_drm_device *ldev) +{ +} + +/** + * loongson_vga_load - setup chip and create an initial config + * @dev: DRM device + * @flags: startup flags + * + * The driver load routine has to do several things: + * - initialize the memory manager + * - allocate initial config memory + * - setup the DRM framebuffer with the allocated memory + */ +static int loongson_drm_load(struct drm_device *dev, unsigned long flags) +{ + int r, ret, irq; + struct loongson_drm_device *ldev; + + dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)); + + ldev = devm_kzalloc(dev->dev, sizeof(struct loongson_drm_device), GFP_KERNEL); + if (ldev == NULL) + return -ENOMEM; + dev->dev_private = (void *)ldev; + ldev->dev = dev; + + ret = loongson_drm_device_init(dev, flags); + DRM_DEBUG("end loongson drm device init.\n"); + + drm_mode_config_init(dev); + dev->mode_config.funcs = (void *)&loongson_mode_funcs; + dev->mode_config.preferred_depth = 24; + dev->mode_config.prefer_shadow = 1; + + irq = dev->pdev->irq; + pci_set_drvdata(dev->pdev, dev); + vga_set_default_device(dev->pdev); + dev_set_drvdata(dev->dev, dev); + + r = drm_irq_install(dev, irq); + if (r) + dev_err(dev->dev, "Fatal error during irq install: %d\n", r); + + r = loongson_modeset_init(ldev); + if (r) + dev_err(dev->dev, "Fatal error during modeset init: %d\n", r); + + ldev->inited = true; + drm_mode_config_reset(dev); + + r = drm_vblank_init(dev, ldev->num_crtc); + if (r) + dev_err(dev->dev, "Fatal error during vblank init: %d\n", r); + + /* Make small buffers to store a hardware cursor (double buffered icon updates) */ + ldev->cursor = drm_gem_cma_create(dev, roundup(32*32*4, PAGE_SIZE)); + + drm_kms_helper_poll_init(dev); + + return 0; +} + +/** + * loongson_drm_unload--release drm resource + * + * @dev: pointer to drm_device + * + */ +static void loongson_drm_unload(struct drm_device *dev) +{ + struct loongson_drm_device *ldev = dev->dev_private; + + if (ldev == NULL) + return; + + loongson_modeset_fini(ldev); + drm_mode_config_cleanup(dev); + dev->dev_private = NULL; + dev_set_drvdata(dev->dev, NULL); + ldev->inited = false; +} + +/** + * loongson_drm_open -Driver callback when a new struct drm_file is opened. + * Useful for setting up driver-private data structures like buffer allocators, + * execution contexts or similar things. + * + * @dev DRM device + * @file DRM file private date + * + * RETURN + * 0 on success, a negative error code on failure, which will be promoted to + * userspace as the result of the open() system call. + */ +static int loongson_drm_open(struct drm_device *dev, struct drm_file *file) +{ + file->driver_priv = NULL; + + DRM_DEBUG("open: dev=%p, file=%p", dev, file); + + return 0; +} + +DEFINE_DRM_GEM_CMA_FOPS(fops); + +/** + * loongson_drm_driver - DRM device structure + * + * .load: driver callback to complete initialization steps after the driver is registered + * .unload:Reverse the effects of the driver load callback + * .open:Driver callback when a new struct drm_file is opened + * .fops:File operations for the DRM device node. + * .gem_free_object:deconstructor for drm_gem_objects + * .dumb_create:This creates a new dumb buffer in the driver’s backing storage manager + * (GEM, TTM or something else entirely) and returns the resulting buffer handle. + * This handle can then be wrapped up into a framebuffer modeset object + * .dumb_map_offset:Allocate an offset in the drm device node’s address space + * to be able to memory map a dumb buffer + * .dump_destroy:This destroys the userspace handle for the given dumb backing storage buffer + */ +static struct drm_driver loongson_drm_driver = { + .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_ATOMIC, + .open = loongson_drm_open, + .fops = &fops, + + .dumb_create = drm_gem_cma_dumb_create, + .gem_free_object_unlocked = drm_gem_cma_free_object, + .gem_vm_ops = &drm_gem_cma_vm_ops, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + + .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, + .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_vmap = drm_gem_cma_prime_vmap, + .gem_prime_vunmap = drm_gem_cma_prime_vunmap, + .gem_prime_mmap = drm_gem_cma_prime_mmap, + + .irq_handler = loongson_irq_handler, + .irq_preinstall = loongson_irq_preinstall, + .irq_postinstall = loongson_irq_postinstall, + .irq_uninstall = loongson_irq_uninstall, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +/** + * loongson_drm_pci_devices -- pci device id info + * + * __u32 vendor, device Vendor and device ID or PCI_ANY_ID + * __u32 subvendor, subdevice Subsystem ID's or PCI_ANY_ID + * __u32 class, class_mask (class,subclass,prog-if) triplet + * kernel_ulong_t driver_data Data private to the driver + */ +static struct pci_device_id loongson_drm_pci_devices[] = { + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1)}, + {0, 0, 0, 0, 0, 0, 0} +}; + +/** + * loongson_drm_pci_register -- add pci device + * + * @pdev PCI device + * @ent pci device id + */ +static int loongson_drm_pci_register(struct pci_dev *pdev, + const struct pci_device_id *ent) + +{ + int ret; + struct drm_device *dev; + + dev = drm_dev_alloc(&loongson_drm_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; + + loongson_drm_load(dev, 0x0); + + ret = drm_dev_register(dev, 0); + if (ret) + goto err_pdev; + + drm_fbdev_generic_setup(dev, 32); + + return 0; + +err_pdev: + pci_disable_device(pdev); +err_free: + drm_dev_put(dev); + return ret; +} + +/** + * loongson_drm_pci_unregister -- release drm device + * + * @pdev PCI device + */ +static void loongson_drm_pci_unregister(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + loongson_drm_unload(dev); + drm_dev_put(dev); +} + +/* + * Suspend & resume. + */ +/* + * loongson_drm_suspend - initiate device suspend + * + * @pdev: drm dev pointer + * @state: suspend state + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int loongson_drm_suspend(struct drm_device *dev, bool suspend) +{ + struct loongson_drm_device *ldev; + + if (dev == NULL || dev->dev_private == NULL) + return -ENODEV; + + ldev = dev->dev_private; + + drm_kms_helper_poll_disable(dev); + ldev->state = drm_atomic_helper_suspend(dev); + + pci_save_state(dev->pdev); + if (suspend) { + /* Shut down the device */ + pci_disable_device(dev->pdev); + pci_set_power_state(dev->pdev, PCI_D3hot); + } + + console_lock(); + drm_fb_helper_set_suspend(ldev->dev->fb_helper, 1); + console_unlock(); + + return 0; +} + +/* + * * loongson_drm_resume - initiate device suspend + * + * @pdev: drm dev pointer + * @state: suspend state + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ + +int loongson_drm_resume(struct drm_device *dev, bool resume) +{ + struct loongson_drm_device *ldev = dev->dev_private; + + console_lock(); + + if (resume) { + pci_set_power_state(dev->pdev, PCI_D0); + pci_restore_state(dev->pdev); + if (pci_enable_device(dev->pdev)) { + console_unlock(); + return -1; + } + } + + /* blat the mode back in */ + drm_atomic_helper_resume(dev, ldev->state); + + drm_kms_helper_poll_enable(dev); + + drm_fb_helper_set_suspend(ldev->dev->fb_helper, 0); + + console_unlock(); + + return 0; +} + +/** + * loongson_drm_pm_suspend + * + * @dev pointer to the device + * + * Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. + */ +static int loongson_drm_pm_suspend(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_suspend(drm_dev, true); +} + +/** + * loongson_drm_pm_resume + * + * @dev pointer to the device + * + * Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. + */ +static int loongson_drm_pm_resume(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_resume(drm_dev, true); +} + +/** + * loongson_drm_pm_freeze + * + * @dev pointer to device + * + * Hibernation-specific, executed before creating a hibernation image. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. + */ +static int loongson_drm_pm_freeze(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_suspend(drm_dev, false); +} + +/** + * loongson_drm_pm_draw + * + * @dev pointer to device + * + * Hibernation-specific, executed after creating a hibernation image OR + * if the creation of an image has failed. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + */ +static int loongson_drm_pm_thaw(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_resume(drm_dev, false); +} + +#define loongson_drm_pm_poweroff loongson_drm_pm_freeze +#define loongson_drm_pm_restore loongson_drm_pm_resume + +/* + * * struct dev_pm_ops - device PM callbacks + * + *@suspend: Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. + *@resume: Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. + *@freeze: Hibernation-specific, executed before creating a hibernation image. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. The majority of subsystems + * (with the notable exception of the PCI bus type) expect the driver-level + * @freeze() to save the device settings in memory to be used by @restore() + * during the subsequent resume from hibernation. + *@thaw: Hibernation-specific, executed after creating a hibernation image OR + * if the creation of an image has failed. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + *@poweroff: Hibernation-specific, executed after saving a hibernation image. + * Analogous to @suspend(), but it need not save the device's settings in + * memory. + *@restore: Hibernation-specific, executed after restoring the contents of main + * memory from a hibernation image, analogous to @resume(). + */ +static const struct dev_pm_ops loongson_drm_pm_ops = { + .suspend = loongson_drm_pm_suspend, + .resume = loongson_drm_pm_resume, + .freeze = loongson_drm_pm_freeze, + .thaw = loongson_drm_pm_thaw, + .poweroff = loongson_drm_pm_poweroff, + .restore = loongson_drm_pm_restore, +}; + +/** + * loongson_drm_pci_driver -- pci driver structure + * + * .id_table : must be non-NULL for probe to be called + * .probe: New device inserted + * .remove: Device removed + * .resume: Device suspended + * .suspend: Device woken up + */ +static struct pci_driver loongson_drm_pci_driver = { + .name = DRIVER_NAME, + .id_table = loongson_drm_pci_devices, + .probe = loongson_drm_pci_register, + .remove = loongson_drm_pci_unregister, + .driver.pm = &loongson_drm_pm_ops, +}; + +/** + * loongson_drm_pci_init() -- kernel module init function + */ +static int __init loongson_drm_init(void) +{ + int ret; + struct pci_dev *pdev = NULL; + + /* If external graphics card exist, use it as default */ + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) { + if (pdev->vendor == PCI_VENDOR_ID_ATI) + return 0; + if (pdev->vendor == 0x1a03) /* ASpeed */ + return 0; + } + + ret = pci_register_driver(&loongson_drm_pci_driver); + + return ret; +} + +/** + * loongson_drm_pci_exit() -- kernel module exit function + */ +static void __exit loongson_drm_exit(void) +{ + pci_unregister_driver(&loongson_drm_pci_driver); +} + +module_init(loongson_drm_init); +module_exit(loongson_drm_exit); + +MODULE_AUTHOR("Chen Zhu "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson LS7A DRM Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/loongson/loongson_drv.h b/drivers/gpu/drm/loongson/loongson_drv.h new file mode 100644 index 000000000000..383c147c3ffa --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_drv.h @@ -0,0 +1,216 @@ +#ifndef __LOONGSON_DRV_H__ +#define __LOONGSON_DRV_H__ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "loongson_i2c.h" +#include "loongson_vbios.h" + +#define to_loongson_crtc(x) container_of(x, struct loongson_crtc, base) +#define to_loongson_encoder(x) container_of(x, struct loongson_encoder, base) +#define to_loongson_connector(x) container_of(x, struct loongson_connector, base) + +#define LOONGSON_MAX_FB_HEIGHT 4096 +#define LOONGSON_MAX_FB_WIDTH 4096 + +#define CUR_WIDTH_SIZE 32 +#define CUR_HEIGHT_SIZE 32 + +#define LO_OFF 0 +#define HI_OFF 8 + +#define LS7A_PIX0_PLL (void *)TO_UNCAC(LS7A_CHIPCFG_REG_BASE + 0x04b0) +#define LS7A_PIX1_PLL (void *)TO_UNCAC(LS7A_CHIPCFG_REG_BASE + 0x04c0) + +#define CURIOSET_CORLOR 0x4607 +#define CURIOSET_POSITION 0x4608 +#define CURIOLOAD_ARGB 0x4609 +#define CURIOLOAD_IMAGE 0x460A +#define CURIOHIDE_SHOW 0x460B +#define FBEDID_GET 0X860C + +#define CRTC_REG_OFFSET 0x10 + +#define CFG_FMT GENMASK(2, 0) +#define CFG_FBSWITCH BIT(7) +#define CFG_ENABLE BIT(8) +#define CFG_PANELSWITCH BIT(9) +#define CFG_FBNUM_BIT 11 +#define CFG_FBNUM BIT(11) +#define CFG_GAMMAR BIT(12) +#define CFG_RESET BIT(20) + +#define FB_CFG_DVO_REG (0x1240) +#define FB_ADDR0_DVO_REG (0x1260) +#define FB_ADDR1_DVO_REG (0x1580) +#define FB_STRI_DVO_REG (0x1280) +#define FB_DITCFG_DVO_REG (0x1360) +#define FB_DITTAB_LO_DVO_REG (0x1380) +#define FB_DITTAB_HI_DVO_REG (0x13a0) +#define FB_PANCFG_DVO_REG (0x13c0) +#define FB_PANTIM_DVO_REG (0x13e0) +#define FB_HDISPLAY_DVO_REG (0x1400) +#define FB_HSYNC_DVO_REG (0x1420) +#define FB_VDISPLAY_DVO_REG (0x1480) +#define FB_VSYNC_DVO_REG (0x14a0) +#define FB_GAMINDEX_DVO_REG (0x14e0) +#define FB_GAMDATA_DVO_REG (0x1500) + +#define FB_CUR_CFG_REG (0x1520) +#define FB_CUR_ADDR_REG (0x1530) +#define FB_CUR_LOC_ADDR_REG (0x1540) +#define FB_CUR_BACK_REG (0x1550) +#define FB_CUR_FORE_REG (0x1560) +#define FB_INT_REG (0x1570) + +#define INT_DVO1_VSYNC 0 +#define INT_DVO1_HSYNC 1 +#define INT_DVO0_VSYNC 2 +#define INT_DVO0_HSYNC 3 +#define INT_CURSOR_FB_END 4 +#define INT_DVO1_FB_END 5 +#define INT_DVO0_FB_END 6 + +#define MAX_CRTC 2 + +struct pix_pll { + unsigned int l2_div; + unsigned int l1_loopc; + unsigned int l1_frefc; +}; + +struct config_reg { + u8 dev_addr; + u8 reg; + u8 value; +} __packed; + +struct cfg_encoder { + u8 reg_num; + u32 hdisplay; + u32 vdisplay; + struct config_reg config_regs[256]; +}; + +struct loongson_crtc { + struct drm_crtc base; + unsigned int crtc_id; + uint32_t cfg_reg; + struct drm_plane *primary; /* Primary panel belongs to this crtc */ + struct drm_pending_vblank_event *event; + struct loongson_drm_device *ldev; +}; + +struct loongson_encoder { + struct drm_encoder base; + u32 i2c_id; + int encoder_id; + struct loongson_i2c *i2c; + struct loongson_crtc *lcrtc; /* Binding crtc, not actual one */ +}; + +struct loongson_connector { + struct drm_connector base; + u16 id; + u32 type; + u16 i2c_id; + u16 hotplug; + u16 edid_method; + u8 *vbios_edid; + struct loongson_i2c *i2c; + struct loongson_drm_device *ldev; +}; + +struct loongson_mode_info { + bool mode_config_initialized; + struct loongson_crtc *crtc; + struct loongson_encoder *encoder; + struct loongson_connector *connector; +}; + +struct loongson_drm_device { + struct drm_device *dev; + struct drm_atomic_state *state; + + resource_size_t mmio_base; + resource_size_t mmio_size; + void __iomem *mmio; + uint32_t int_reg; + + struct drm_display_mode mode; + struct loongson_mode_info mode_info[2]; + struct drm_gem_cma_object *cursor; + + int num_crtc; + struct loongson_crtc lcrtc[MAX_CRTC]; + struct loongson_i2c i2c_bus[DC_I2C_BUS_MAX]; + + void *vbios; + struct list_head desc_list; + + bool inited; + bool suspended; + bool cursor_showed; + int cursor_crtc_id; + + int connector_active0; + int connector_active1; +}; + +extern spinlock_t loongson_reglock; + +static inline bool clone_mode(struct loongson_drm_device *ldev) +{ + if (ldev->num_crtc < 2) + return true; + if (ldev->mode_info[0].connector->base.status != connector_status_connected) + return true; + if (ldev->mode_info[1].connector->base.status != connector_status_connected) + return true; + if (ldev->lcrtc[0].base.x || ldev->lcrtc[0].base.y) + return false; + if (ldev->lcrtc[1].base.x || ldev->lcrtc[1].base.y) + return false; + + return true; +} + +int loongson_irq_enable_vblank(struct drm_device *dev, unsigned int crtc_id); +void loongson_irq_disable_vblank(struct drm_device *dev, unsigned int crtc_id); +irqreturn_t loongson_irq_handler(int irq, void *arg); +void loongson_irq_preinstall(struct drm_device *dev); +int loongson_irq_postinstall(struct drm_device *dev); +void loongson_irq_uninstall(struct drm_device *dev); + +u32 crtc_read(struct loongson_crtc *lcrtc, u32 offset); +void crtc_write(struct loongson_crtc *lcrtc, u32 offset, u32 val); + +int loongson_gpio_init(struct loongson_drm_device *ldev); +int loongson_crtc_init(struct loongson_drm_device *ldev); +struct drm_encoder *loongson_encoder_init(struct drm_device *dev, unsigned int index); +struct drm_connector *loongson_connector_init(struct drm_device *dev, unsigned int index); + +int loongson_fbdev_init(struct loongson_drm_device *ldev); +void loongson_fbdev_fini(struct loongson_drm_device *ldev); +void loongson_fbdev_restore_mode(struct loongson_drm_device *ldev); + +int loongson_drm_drm_suspend(struct drm_device *dev, bool suspend, + bool fbcon, bool freeze); +int loongson_drm_drm_resume(struct drm_device *dev, bool resume, bool fbcon); + /* loongson_cursor.c */ +int loongson_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, + uint32_t handle, uint32_t width, uint32_t height, int32_t hot_x, int32_t hot_y); +int loongson_crtc_cursor_move(struct drm_crtc *crtc, int x, int y); + +#endif diff --git a/drivers/gpu/drm/loongson/loongson_encoder.c b/drivers/gpu/drm/loongson/loongson_encoder.c new file mode 100644 index 000000000000..0b3d6b4d388c --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_encoder.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Copyright (C) 2019 Lemote Inc. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * Jiaxun Yang + */ + +#include +#include "loongson_drv.h" + +/** + * loongson_encoder_destroy + * + * @encoder: encoder object + * + * Clean up encoder resources + */ +static void loongson_encoder_destroy(struct drm_encoder *encoder) +{ + struct loongson_encoder *loongson_encoder = to_loongson_encoder(encoder); + drm_encoder_cleanup(encoder); + kfree(loongson_encoder); +} + +static int loongson_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + return 0; +} + +static void loongson_encoder_atomic_mode_set(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + unsigned long flags; + struct loongson_encoder *lenc = to_loongson_encoder(encoder); + struct loongson_crtc *lcrtc_origin = lenc->lcrtc; + struct loongson_crtc *lcrtc_current = to_loongson_crtc(crtc_state->crtc); + + if (lcrtc_origin->crtc_id != lcrtc_current->crtc_id) + lcrtc_origin->cfg_reg |= CFG_PANELSWITCH; + else + lcrtc_origin->cfg_reg &= ~CFG_PANELSWITCH; + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc_origin, FB_CFG_DVO_REG, lcrtc_origin->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); +} + +/** + * These provide the minimum set of functions required to handle a encoder + * + * Helper operations for encoders + */ +static const struct drm_encoder_helper_funcs loongson_encoder_helper_funcs = { + .atomic_check = loongson_encoder_atomic_check, + .atomic_mode_set = loongson_encoder_atomic_mode_set, +}; + +/** + * These provide the minimum set of functions required to handle a encoder + * + * Encoder controls,encoder sit between CRTCs and connectors + */ +static const struct drm_encoder_funcs loongson_encoder_encoder_funcs = { + .destroy = loongson_encoder_destroy, +}; + + +/** + * loongson_encoder_init + * + * @dev: point to the drm_device structure + * + * Init encoder + */ +struct drm_encoder *loongson_encoder_init(struct drm_device *dev, unsigned int index) +{ + struct drm_encoder *encoder; + struct loongson_encoder *loongson_encoder; + struct loongson_drm_device *ldev = dev->dev_private; + + loongson_encoder = kzalloc(sizeof(struct loongson_encoder), GFP_KERNEL); + if (!loongson_encoder) + return NULL; + + loongson_encoder->encoder_id = index; + loongson_encoder->i2c = &ldev->i2c_bus[index]; + loongson_encoder->lcrtc = &ldev->lcrtc[index]; + encoder = &loongson_encoder->base; + encoder->possible_crtcs = BIT(1) | BIT(0); + encoder->possible_clones = BIT(1) | BIT(0); + + drm_encoder_helper_add(encoder, &loongson_encoder_helper_funcs); + drm_encoder_init(dev, encoder, &loongson_encoder_encoder_funcs, + DRM_MODE_ENCODER_DAC, NULL); + + return encoder; +} diff --git a/drivers/gpu/drm/loongson/loongson_i2c.c b/drivers/gpu/drm/loongson/loongson_i2c.c new file mode 100644 index 000000000000..55a59ccfd78e --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_i2c.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "loongson_i2c.h" +#include "loongson_drv.h" +#include "loongson_vbios.h" + +u32 ls7a_mm_rreg(struct loongson_drm_device *ldev, u32 offset) +{ + return readl(ldev->mmio + offset); +} + +void ls7a_mm_wreg(struct loongson_drm_device *ldev, u32 offset, u32 val) +{ + writel(val, ldev->mmio + offset); +} + +static inline void __dc_gpio_set_dir(struct loongson_drm_device *ldev, + unsigned int pin, int input) +{ + u32 temp; + + temp = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_CFG_OFFSET); + if (input) + temp |= 1UL << pin; + else + temp &= ~(1UL << pin); + ls7a_mm_wreg(ldev, LS7A_DC_GPIO_CFG_OFFSET, temp); +} + +static void __dc_gpio_set_val(struct loongson_drm_device *ldev, unsigned int pin, + int high) +{ + u32 temp; + + temp = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_OUT_OFFSET); + if (high) + temp |= 1UL << pin; + else + temp &= ~(1UL << pin); + ls7a_mm_wreg(ldev, LS7A_DC_GPIO_OUT_OFFSET, temp); +} + +static void loongson_i2c_set_data(void *i2c, int value) +{ + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->data; + + if (value) + __dc_gpio_set_dir(ldev, pin, 1); + else { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } +} + +static void loongson_i2c_set_clock(void *i2c, int value) +{ + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->clock; + + if (value) + __dc_gpio_set_dir(ldev, pin, 1); + else { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } +} + +static int loongson_i2c_get_data(void *i2c) +{ + int val; + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->data; + + val = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_IN_OFFSET); + + return (val >> pin) & 1; +} + +static int loongson_i2c_get_clock(void *i2c) +{ + int val; + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->clock; + + val = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_IN_OFFSET); + + return (val >> pin) & 1; +} + +static int loongson_i2c_create(struct loongson_drm_device *ldev, + struct loongson_i2c *li2c, const char *name) +{ + int ret; + unsigned int i2c_num; + struct i2c_adapter *i2c_adapter; + struct i2c_algo_bit_data *i2c_algo_data; + + i2c_num = li2c->i2c_id; + i2c_adapter = kzalloc(sizeof(struct i2c_adapter), GFP_KERNEL); + if (!i2c_adapter) + return -ENOMEM; + + i2c_algo_data = kzalloc(sizeof(struct i2c_algo_bit_data), GFP_KERNEL); + if (!i2c_algo_data) { + ret = -ENOMEM; + goto free_adapter; + } + + i2c_adapter->owner = THIS_MODULE; + i2c_adapter->class = I2C_CLASS_DDC; + i2c_adapter->algo_data = i2c_algo_data; + i2c_adapter->dev.parent = ldev->dev->dev; + i2c_adapter->nr = -1; + snprintf(i2c_adapter->name, sizeof(i2c_adapter->name), "%s%d", name, + i2c_num); + + li2c->data = i2c_num * 2; + li2c->clock = i2c_num * 2 + 1; + DRM_INFO("Created dc-i2c%d, sda=%d, scl=%d\n", i2c_num, li2c->data, + li2c->clock); + + i2c_algo_data->setsda = loongson_i2c_set_data; + i2c_algo_data->setscl = loongson_i2c_set_clock; + i2c_algo_data->getsda = loongson_i2c_get_data; + i2c_algo_data->getscl = loongson_i2c_get_clock; + i2c_algo_data->udelay = DC_I2C_TON; + i2c_algo_data->timeout = usecs_to_jiffies(2200); /* from VESA */ + + ret = i2c_bit_add_numbered_bus(i2c_adapter); + if (ret) + goto free_algo_data; + + li2c->adapter = i2c_adapter; + i2c_algo_data->data = li2c; + i2c_set_adapdata(li2c->adapter, li2c); + li2c->init = true; + li2c->ldev = ldev; + DRM_INFO("Register i2c algo-bit adapter [%s]\n", i2c_adapter->name); + + return 0; + +free_algo_data: + DRM_ERROR("Failed to register i2c adapter %s\n", i2c_adapter->name); + kfree(i2c_algo_data); +free_adapter: + kfree(i2c_adapter); + + return ret; +} + +int loongson_gpio_init(struct loongson_drm_device *ldev) +{ + int pin; + + /* set gpio dir output 0-3 */ + for (pin = 0; pin < 4; pin++) { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } + + return 0; +} + +int loongson_i2c_init(struct loongson_drm_device *ldev) +{ + int ret, i; + + ret = get_loongson_i2c(ldev); + if (ret != true) { + DRM_ERROR("Failed to get i2c_id form vbios\n"); + return -ENODEV; + } + + for (i = 0; i < DC_I2C_BUS_MAX; i++) { + if (!ldev->i2c_bus[i].use) + continue; + ldev->i2c_bus[i].i2c_id = i; + ret = loongson_i2c_create(ldev, &ldev->i2c_bus[i], DC_I2C_NAME); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_i2c.h b/drivers/gpu/drm/loongson/loongson_i2c.h new file mode 100644 index 000000000000..b8ca0ea45085 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_i2c.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2020 Loongson Technology Co., Ltd. + * Authors: + * sunhao + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LOONGSON_I2C_H__ +#define __LOONGSON_I2C_H__ + +#include +#include +#include + +/* Modify this marco to config i2c bus speed, bus_freq = 500 / T */ +/* Eg: i2c_bus_freq=100k when T=5 */ +#define DC_I2C_TON 1 +#define DC_I2C_NAME "ls7a_dc_i2c" +#define DC_I2C_BUS_MAX 2 + +/* Loongson 7A display controller proprietary GPIOs */ +#define LS7A_DC_GPIO_CFG_OFFSET (0x1660) +#define LS7A_DC_GPIO_IN_OFFSET (0x1650) +#define LS7A_DC_GPIO_OUT_OFFSET (0x1650) + +struct loongson_drm_device; + +struct loongson_i2c { + struct loongson_drm_device *ldev; + struct i2c_client *ddc_client; + struct i2c_adapter *adapter; + u32 data, clock; + bool use, init; + u32 i2c_id; +}; + +int loongson_i2c_init(struct loongson_drm_device *ldev); + +#endif diff --git a/drivers/gpu/drm/loongson/loongson_irq.c b/drivers/gpu/drm/loongson/loongson_irq.c new file mode 100644 index 000000000000..f6d5642d0f54 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_irq.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include "loongson_drv.h" + +irqreturn_t loongson_irq_handler(int irq, void *arg) +{ + unsigned int val; + struct drm_device *dev = (struct drm_device *) arg; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + val = readl(base + FB_INT_REG); + spin_lock(&loongson_reglock); + writel(0x0, base + FB_INT_REG); + spin_unlock(&loongson_reglock); + + if (val & BIT(INT_DVO0_FB_END)) { + drm_crtc_handle_vblank(&ldev->lcrtc[0].base); + } + + if (val & BIT(INT_DVO1_FB_END)) { + drm_crtc_handle_vblank(&ldev->lcrtc[1].base); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, base + FB_INT_REG); + spin_unlock(&loongson_reglock); + + return IRQ_HANDLED; +} + +void loongson_irq_preinstall(struct drm_device *dev) +{ + unsigned long flags; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + /* disable interupt */ + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x0000 << 16, base + FB_INT_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); +} + +int loongson_irq_postinstall(struct drm_device *dev) +{ + return 0; +} + +void loongson_irq_uninstall(struct drm_device *dev) +{ + unsigned long flags; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + /* disable interupt */ + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x0000 << 16, base + FB_INT_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); +} diff --git a/drivers/gpu/drm/loongson/loongson_vbios.c b/drivers/gpu/drm/loongson/loongson_vbios.c new file mode 100644 index 000000000000..4a67762fce5f --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_vbios.c @@ -0,0 +1,836 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include "loongson_drv.h" +#include "loongson_vbios.h" + +#define VBIOS_START 0x1000 +#define VBIOS_SIZE 0x40000 +#define VBIOS_DESC_OFFSET 0x6000 + +static void *get_vbios_from_bios(void) +{ + void *vbios = NULL; + struct loongson_vbios *header; + + vbios = kzalloc(256 * 1024, GFP_KERNEL); + if (!vbios) + return vbios; + + if (!loongson_sysconf.vgabios_addr) { + header = vbios; + header->crtc_num = 2; + return vbios; + } + + memcpy(vbios, (void *)loongson_sysconf.vgabios_addr, VBIOS_SIZE); + + DRM_INFO("Get vbios from bios Success.\n"); + + return vbios; +} + +static u32 get_vbios_version(struct loongson_vbios *vbios) +{ + u32 minor, major, version; + + minor = vbios->version_minor; + major = vbios->version_major; + version = major * 10 + minor; + + return version; +} + +static bool parse_vbios_i2c(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + int i, num, size; + struct vbios_i2c *i2c; + struct vbios_i2c *vbios_i2c = NULL; + struct loongson_i2c *val = (struct loongson_i2c *)cmd->res; + + size = this->desc->size; + vbios_i2c = kzalloc(size, GFP_KERNEL); + if (!vbios_i2c) + return false; + + memset(vbios_i2c, 0xff, size); + memcpy(vbios_i2c, this->data, size); + num = size / sizeof(*vbios_i2c); + + i2c = vbios_i2c; + for (i = 0; (i < num && i < DC_I2C_BUS_MAX); i++) { + val->i2c_id = (u32)i2c->id; + val->use = true; + val++; + i2c++; + } + + kfree(vbios_i2c); + return ret; +} + +static bool parse_vbios_crtc(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_crtc crtc; + + memset(&crtc, 0xff, sizeof(crtc)); + memcpy(&crtc, this->data, min_t(u32, this->desc->size, sizeof(crtc))); + + switch (request) { + case VBIOS_CRTC_ID: + *val = crtc.crtc_id; + break; + case VBIOS_CRTC_ENCODER_ID: + *val = crtc.encoder_id; + break; + case VBIOS_CRTC_MAX_FREQ: + *val = crtc.max_freq; + break; + case VBIOS_CRTC_MAX_WIDTH: + *val = crtc.max_width; + break; + case VBIOS_CRTC_MAX_HEIGHT: + *val = crtc.max_height; + break; + case VBIOS_CRTC_IS_VB_TIMING: + *val = crtc.is_vb_timing; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_encoder(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_encoder encoder; + + memset(&encoder, 0xff, sizeof(encoder)); + memcpy(&encoder, this->data, + min_t(u32, this->desc->size, sizeof(encoder))); + + switch (request) { + case VBIOS_ENCODER_I2C_ID: + *val = encoder.i2c_id; + break; + case VBIOS_ENCODER_CONNECTOR_ID: + *val = encoder.connector_id; + break; + case VBIOS_ENCODER_TYPE: + *val = encoder.type; + break; + case VBIOS_ENCODER_CONFIG_TYPE: + *val = encoder.config_type; + break; + case VBIOS_ENCODER_CHIP: + *val = encoder.chip; + break; + case VBIOS_ENCODER_CHIP_ADDR: + *val = encoder.chip_addr; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_cfg_encoder(struct desc_node *this, + struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct cfg_encoder *cfg_encoder; + struct cfg_encoder *cfg; + struct vbios_cfg_encoder *vbios_cfg_encoder; + u32 num, size, i = 0; + + vbios_cfg_encoder = (struct vbios_cfg_encoder *)this->data; + size = sizeof(struct vbios_cfg_encoder); + num = this->desc->size / size; + + switch (request) { + case VBIOS_ENCODER_CONFIG_PARAM: + cfg_encoder = (struct cfg_encoder *)kzalloc( + sizeof(struct cfg_encoder) * num, GFP_KERNEL); + cfg = cfg_encoder; + for (i = 0; i < num; i++) { + cfg->reg_num = vbios_cfg_encoder->reg_num; + cfg->hdisplay = vbios_cfg_encoder->hdisplay; + cfg->vdisplay = vbios_cfg_encoder->vdisplay; + memcpy(&cfg->config_regs, + &vbios_cfg_encoder->config_regs, + sizeof(struct vbios_conf_reg) * 256); + + cfg++; + vbios_cfg_encoder++; + } + cmd->res = (void *)cfg_encoder; + break; + case VBIOS_ENCODER_CONFIG_NUM: + *val = num; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_connector(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_connector connector; + + memset(&connector, 0xff, sizeof(connector)); + memcpy(&connector, this->data, + min_t(u32, this->desc->size, sizeof(connector))); + + switch (request) { + case VBIOS_CONNECTOR_I2C_ID: + *val = connector.i2c_id; + break; + case VBIOS_CONNECTOR_INTERNAL_EDID: + memcpy((u8 *)(ulong)val, connector.internal_edid, + EDID_LENGTH * 2); + break; + case VBIOS_CONNECTOR_TYPE: + *val = connector.type; + break; + case VBIOS_CONNECTOR_HOTPLUG: + *val = connector.hotplug; + break; + case VBIOS_CONNECTOR_EDID_METHOD: + *val = connector.edid_method; + break; + case VBIOS_CONNECTOR_IRQ_GPIO: + *val = connector.irq_gpio; + break; + case VBIOS_CONNECTOR_IRQ_PLACEMENT: + *val = connector.gpio_placement; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_backlight(struct desc_node *this, struct vbios_cmd *cmd) +{ + return 0; +} + +static bool parse_vbios_pwm(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_pwm *pwm = (struct vbios_pwm *)this->data; + + switch (request) { + case VBIOS_PWM_ID: + *val = pwm->pwm; + break; + case VBIOS_PWM_PERIOD: + *val = pwm->peroid; + break; + case VBIOS_PWM_POLARITY: + *val = pwm->polarity; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_header(struct desc_node *this, struct vbios_cmd *cmd) +{ + return true; +} + +static bool parse_vbios_default(struct desc_node *this, struct vbios_cmd *cmd) +{ + struct vbios_desc *vb_desc; + + vb_desc = this->desc; + DRM_WARN("Current descriptor[T-%d][V-%d] cannot be interprete.\n", + vb_desc->type, vb_desc->ver); + return false; +} + +#define FUNC(t, v, f) \ + { \ + .type = t, .ver = v, .func = f, \ + } + +static struct desc_func tables[] = { + FUNC(desc_header, ver_v1, parse_vbios_header), + FUNC(desc_i2c, ver_v1, parse_vbios_i2c), + FUNC(desc_crtc, ver_v1, parse_vbios_crtc), + FUNC(desc_encoder, ver_v1, parse_vbios_encoder), + FUNC(desc_connector, ver_v1, parse_vbios_connector), + FUNC(desc_cfg_encoder, ver_v1, parse_vbios_cfg_encoder), + FUNC(desc_backlight, ver_v1, parse_vbios_backlight), + FUNC(desc_pwm, ver_v1, parse_vbios_pwm), +}; + +static inline parse_func *get_parse_func(struct vbios_desc *vb_desc) +{ + int i; + u32 type = vb_desc->type; + u32 ver = vb_desc->ver; + parse_func *func = parse_vbios_default; + u32 tt_num = ARRAY_SIZE(tables); + + for (i = 0; i < tt_num; i++) { + if ((tables[i].ver == ver) && (tables[i].type == type)) { + func = tables[i].func; + break; + } + } + + return func; +} + +static inline u32 insert_desc_list(struct loongson_drm_device *ldev, + struct vbios_desc *vb_desc) +{ + struct desc_node *node; + parse_func *func = NULL; + + WARN_ON(!ldev || !vb_desc); + node = (struct desc_node *)kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + func = get_parse_func(vb_desc); + node->parse = func; + node->desc = (void *)vb_desc; + node->data = ((u8 *)ldev->vbios + vb_desc->offset); + list_add_tail(&node->head, &ldev->desc_list); + + return 0; +} + +static inline void free_desc_list(struct loongson_drm_device *ldev) +{ + struct desc_node *node, *tmp; + + list_for_each_entry_safe (node, tmp, &ldev->desc_list, head) { + list_del(&node->head); + kfree(node); + } +} + +static u32 parse_vbios_desc(struct loongson_drm_device *ldev) +{ + u32 ret = 0; + struct vbios_desc *desc; + enum desc_type type = 0; + u8 *vbios = (u8 *)ldev->vbios; + + WARN_ON(!vbios); + + desc = (struct vbios_desc *)(vbios + VBIOS_DESC_OFFSET); + while (1) { + type = desc->type; + if (type == desc_max) + break; + + ret = insert_desc_list(ldev, desc); + if (ret) + DRM_DEBUG_KMS("Parse T-%d V-%d failed[%d]\n", desc->ver, + desc->type, ret); + + desc++; + } + + return ret; +} + +static inline struct desc_node *get_desc_node(struct loongson_drm_device *ldev, + u16 type, u8 index) +{ + struct desc_node *node, *tmp; + struct vbios_desc *vb_desc; + + list_for_each_entry_safe (node, tmp, &ldev->desc_list, head) { + vb_desc = node->desc; + if (vb_desc->type == type && vb_desc->index == index) + return node; + } + + return NULL; +} + +static bool vbios_get_data(struct loongson_drm_device *ldev, struct vbios_cmd *cmd) +{ + struct desc_node *node; + + WARN_ON(!cmd); + + node = get_desc_node(ldev, cmd->type, cmd->index); + if (node && node->parse) + return node->parse(node, cmd); + + DRM_DEBUG_DRIVER("Failed to get node(%d,%d)\n", cmd->type, cmd->index); + + return false; +} + +u32 get_connector_type(struct loongson_drm_device *ldev, u32 index) +{ + u32 type = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_TYPE; + vbt_cmd.res = (void *)(ulong)&type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + type = -1; + + return type; +} + +u16 get_connector_i2cid(struct loongson_drm_device *ldev, u32 index) +{ + u16 i2c_id = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_I2C_ID; + vbt_cmd.res = (void *)(ulong)&i2c_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + i2c_id = -1; + + return i2c_id; +} + +u32 get_connector_irq_gpio(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + u32 irq_gpio; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_IRQ_GPIO; + vbt_cmd.res = (void *)(ulong)&irq_gpio; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return -1; + + return irq_gpio; +} + +enum gpio_placement get_connector_gpio_placement(struct loongson_drm_device *ldev, + u32 index) +{ + int ret; + enum gpio_placement irq_placement; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_IRQ_PLACEMENT; + vbt_cmd.res = (void *)(ulong)&irq_placement; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return -1; + + return irq_placement; +} + +u16 get_hotplug_mode(struct loongson_drm_device *ldev, u32 index) +{ + u16 mode = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_HOTPLUG; + vbt_cmd.res = (void *)(ulong)&mode; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + mode = -1; + + return mode; +} + +u16 get_edid_method(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u16 method = via_null; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_EDID_METHOD; + vbt_cmd.res = (void *)(ulong)&method; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + method = via_null; + + return method; +} + +u8 *get_vbios_edid(struct loongson_drm_device *ldev, u32 index) +{ + u8 *edid = NULL; + bool ret = false; + struct vbios_cmd vbt_cmd; + + edid = kzalloc(sizeof(u8) * EDID_LENGTH * 2, GFP_KERNEL); + if (!edid) + return edid; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_INTERNAL_EDID; + vbt_cmd.res = (void *)(ulong)edid; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return NULL; + + return edid; +} + +u32 get_vbios_pwm(struct loongson_drm_device *ldev, u32 index, u16 request) +{ + u32 value = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_pwm; + vbt_cmd.req = (void *)(ulong)request; + vbt_cmd.res = (void *)(ulong)&value; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + value = 0xffffffff; + + return value; +} + +u32 get_crtc_id(struct loongson_drm_device *ldev, u32 index) +{ + u32 crtc_id = 0; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_ID; + vbt_cmd.res = (void *)(ulong)&crtc_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + crtc_id = 0; + + return crtc_id; +} + +u32 get_crtc_max_freq(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_freq = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_FREQ; + vbt_cmd.res = (void *)(ulong)&max_freq; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_freq = 0; + + return max_freq; +} + +u32 get_crtc_max_width(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_width = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_WIDTH; + vbt_cmd.res = (void *)(ulong)&max_width; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_width = LOONGSON_MAX_FB_WIDTH; + + return max_width; +} + +u32 get_crtc_max_height(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_height = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_HEIGHT; + vbt_cmd.res = (void *)(ulong)&max_height; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_height = LOONGSON_MAX_FB_HEIGHT; + + return max_height; +} + +u32 get_crtc_encoder_id(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 encoder_id = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_ENCODER_ID; + vbt_cmd.res = (void *)(ulong)&encoder_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + encoder_id = 0; + + return encoder_id; +} + +bool get_crtc_is_vb_timing(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + bool vb_timing = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_IS_VB_TIMING; + vbt_cmd.res = (void *)(ulong)&vb_timing; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + vb_timing = false; + + return vb_timing; +} + +struct crtc_timing *get_crtc_timing(struct loongson_drm_device *ldev, u32 index) +{ + return NULL; +} + +u32 get_encoder_connector_id(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 connector_id = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONNECTOR_ID; + vbt_cmd.res = (void *)(ulong)&connector_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + connector_id = 0; + + return connector_id; +} + +u32 get_encoder_i2c_id(struct loongson_drm_device *ldev, u32 index) +{ + u32 i2c_id = 0; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_I2C_ID; + vbt_cmd.res = (void *)(ulong)&i2c_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + i2c_id = 0; + + return i2c_id; +} + +struct cfg_encoder *get_encoder_config(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + struct vbios_cmd vbt_cmd; + struct cfg_encoder *encoder_config = NULL; + + vbt_cmd.index = index; + vbt_cmd.type = desc_cfg_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_PARAM; + ret = vbios_get_data(ldev, &vbt_cmd); + if (ret) + encoder_config = (struct cfg_encoder *)vbt_cmd.res; + + return encoder_config; +} + +u32 get_encoder_cfg_num(struct loongson_drm_device *ldev, u32 index) +{ + struct vbios_cmd vbt_cmd; + bool ret = false; + u32 cfg_num = 0; + + vbt_cmd.index = index; + vbt_cmd.type = desc_cfg_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_NUM; + vbt_cmd.res = (void *)(ulong)&cfg_num; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + cfg_num = 0; + + return cfg_num; +} + +enum encoder_config get_encoder_config_type(struct loongson_drm_device *ldev, + u32 index) +{ + bool ret = false; + enum encoder_config config_type = encoder_bios_config; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_TYPE; + vbt_cmd.res = (void *)(ulong)&config_type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + config_type = encoder_bios_config; + + return config_type; +} + +enum encoder_object get_encoder_chip(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + enum encoder_object chip; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CHIP; + vbt_cmd.res = (void *)(ulong)&chip; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return Unknown; + + return chip; +} + +u8 get_encoder_chip_addr(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + u8 chip_addr; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CHIP_ADDR; + vbt_cmd.res = (void *)(ulong)&chip_addr; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return Unknown; + + return chip_addr; +} + +enum encoder_type get_encoder_type(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + enum encoder_type type = encoder_dac; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_TYPE; + vbt_cmd.res = (void *)(ulong)&type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + type = encoder_dac; + + return type; +} + +bool get_loongson_i2c(struct loongson_drm_device *ldev) +{ + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = 0; + vbt_cmd.type = desc_i2c; + vbt_cmd.res = (void *)&ldev->i2c_bus; + ret = vbios_get_data(ldev, &vbt_cmd); + + if (!ret) { + ldev->i2c_bus[0].use = true; + ldev->i2c_bus[1].use = true; + } + + return true; +} + +bool loongson_vbios_init(struct loongson_drm_device *ldev) +{ + void *vbios; + struct loongson_vbios *header; + + vbios = get_vbios_from_bios(); + if (!vbios) + return false; + + header = ldev->vbios = vbios; + ldev->num_crtc = header->crtc_num; + + DRM_INFO("Loongson VBIOS version %d.%d\n", header->version_major, + header->version_minor); + + INIT_LIST_HEAD(&ldev->desc_list); + parse_vbios_desc(ldev); + + return true; +} + +void loongson_vbios_exit(struct loongson_drm_device *ldev) +{ + free_desc_list(ldev); + kfree(ldev->vbios); +} diff --git a/drivers/gpu/drm/loongson/loongson_vbios.h b/drivers/gpu/drm/loongson/loongson_vbios.h new file mode 100644 index 000000000000..25ae943a6c95 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_vbios.h @@ -0,0 +1,283 @@ +#ifndef __LOONGSON_VBIOS_H__ +#define __LOONGSON_VBIOS_H__ + +#define VBIOS_PWM_ID 0x0 +#define VBIOS_PWM_PERIOD 0x1 +#define VBIOS_PWM_POLARITY 0x2 + +#define VBIOS_CRTC_ID 0x1 +#define VBIOS_CRTC_ENCODER_ID 0x2 +#define VBIOS_CRTC_MAX_FREQ 0x3 +#define VBIOS_CRTC_MAX_WIDTH 0x4 +#define VBIOS_CRTC_MAX_HEIGHT 0x5 +#define VBIOS_CRTC_IS_VB_TIMING 0x6 + +#define VBIOS_ENCODER_I2C_ID 0x1 +#define VBIOS_ENCODER_CONNECTOR_ID 0x2 +#define VBIOS_ENCODER_TYPE 0x3 +#define VBIOS_ENCODER_CONFIG_TYPE 0x4 +#define VBIOS_ENCODER_CONFIG_PARAM 0x1 +#define VBIOS_ENCODER_CONFIG_NUM 0x2 +#define VBIOS_ENCODER_CHIP 0x05 +#define VBIOS_ENCODER_CHIP_ADDR 0x06 + +#define VBIOS_CONNECTOR_I2C_ID 0x1 +#define VBIOS_CONNECTOR_INTERNAL_EDID 0x2 +#define VBIOS_CONNECTOR_TYPE 0x3 +#define VBIOS_CONNECTOR_HOTPLUG 0x4 +#define VBIOS_CONNECTOR_EDID_METHOD 0x5 +#define VBIOS_CONNECTOR_IRQ_PLACEMENT 0x06 +#define VBIOS_CONNECTOR_IRQ_GPIO 0x07 + +struct desc_node; +struct vbios_cmd; +typedef bool(parse_func)(struct desc_node *, struct vbios_cmd *); + +enum desc_type { + desc_header = 0, + desc_crtc, + desc_encoder, + desc_connector, + desc_i2c, + desc_pwm, + desc_gpio, + desc_backlight, + desc_fan, + desc_irq_vblank, + desc_cfg_encoder, + desc_max = 0xffff +}; + +enum desc_ver { + ver_v1, +}; + +enum hotplug { + disable = 0, + polling, + irq, + hotplug_max = 0xffffffff, +}; + +enum loongson_edid_method { + via_null = 0, + via_i2c, + via_vbios, + via_encoder, + via_max = 0xffffffff, +}; + +enum i2c_type { i2c_cpu, i2c_gpio, i2c_max = -1 }; + +enum vbios_backlight_type { bl_unuse, bl_ec, bl_pwm }; + +enum encoder_config { + encoder_transparent = 0, + encoder_os_config, + encoder_bios_config, + encoder_timing_filling, + encoder_kernel_driver, + encoder_type_max = 0xffffffff, +}; + +enum encoder_type { + encoder_none, + encoder_dac, + encoder_tmds, + encoder_lvds, + encoder_tvdac, + encoder_virtual, + encoder_dsi, + encoder_dpmst, + encoder_dpi +}; + +enum connector_type { + connector_unknown, + connector_vga, + connector_dvii, + connector_dvid, + connector_dvia, + connector_composite, + connector_svideo, + connector_lvds, + connector_component, + connector_9pindin, + connector_displayport, + connector_hdmia, + connector_hdmib, + connector_tv, + connector_edp, + connector_virtual, + connector_dsi, + connector_dpi +}; + +enum gpio_placement { + GPIO_PLACEMENT_LS3A = 0, + GPIO_PLACEMENT_LS7A, +}; + +enum encoder_object { + Unknown = 0x00, + INTERNAL_DVO = 0x01, + INTERNAL_HDMI = 0x02, + VGA_CH7055 = 0x10, + VGA_ADV7125 = 0x11, + DVI_TFP410 = 0x20, + HDMI_IT66121 = 0x30, + HDMI_SIL9022 = 0x31, + HDMI_LT8618 = 0x32, + HDMI_MS7210 = 0x33, + EDP_NCS8805 = 0x40 +}; + +struct loongson_vbios { + char title[16]; + uint32_t version_major; + uint32_t version_minor; + char information[20]; + uint32_t crtc_num; + uint32_t crtc_offset; + uint32_t connector_num; + uint32_t connector_offset; + uint32_t encoder_num; + uint32_t encoder_offset; +} __packed; + +struct vbios_header { + u32 feature; + u8 oem_vendor[32]; + u8 oem_product[32]; + u32 legacy_offset; + u32 legacy_size; + u32 desc_offset; + u32 desc_size; + u32 data_offset; + u32 data_size; +} __packed; + +struct vbios_backlight { + u32 feature; + u8 used; + enum vbios_backlight_type type; +} __packed; + +struct vbios_i2c { + u32 feature; + u16 id; + enum i2c_type type; +} __packed; + +struct vbios_pwm { + u32 feature; + u8 pwm; + u8 polarity; + u32 peroid; +} __packed; + +struct vbios_desc { + u16 type; + u8 ver; + u8 index; + u32 offset; + u32 size; + u64 ext[2]; +} __packed; + +struct vbios_cmd { + u8 index; + enum desc_type type; + u64 *req; + void *res; +}; + +struct desc_func { + enum desc_type type; + u16 ver; + s8 *name; + u8 index; + parse_func *func; +}; + +struct desc_node { + struct list_head head; + u8 *data; + struct vbios_desc *desc; + parse_func *parse; +}; + +struct vbios_crtc { + u32 feature; + u32 crtc_id; + u32 encoder_id; + u32 max_freq; + u32 max_width; + u32 max_height; + bool is_vb_timing; +} __packed; + +struct vbios_encoder { + u32 feature; + u32 i2c_id; + u32 connector_id; + enum encoder_type type; + enum encoder_config config_type; + enum encoder_object chip; + u8 chip_addr; +} __packed; + +struct vbios_connector { + u32 feature; + u32 i2c_id; + u8 internal_edid[256]; + enum connector_type type; + enum hotplug hotplug; + enum loongson_edid_method edid_method; + u32 irq_gpio; + enum gpio_placement gpio_placement; +} __packed; + +struct vbios_conf_reg { + u8 dev_addr; + u8 reg; + u8 value; +} __packed; + +struct vbios_cfg_encoder { + u32 hdisplay; + u32 vdisplay; + u8 reg_num; + struct vbios_conf_reg config_regs[256]; +} __packed; + +bool loongson_vbios_init(struct loongson_drm_device *ldev); +void loongson_vbios_exit(struct loongson_drm_device *ldev); +u32 get_connector_type(struct loongson_drm_device *ldev, u32 index); +u16 get_connector_i2cid(struct loongson_drm_device *ldev, u32 index); +u16 get_hotplug_mode(struct loongson_drm_device *ldev, u32 index); +u8 *get_vbios_edid(struct loongson_drm_device *ldev, u32 index); +u16 get_edid_method(struct loongson_drm_device *ldev, u32 index); +u32 get_vbios_pwm(struct loongson_drm_device *ldev, u32 index, u16 request); +u32 get_crtc_id(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_freq(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_width(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_height(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_encoder_id(struct loongson_drm_device *ldev, u32 index); +bool get_crtc_is_vb_timing(struct loongson_drm_device *ldev, u32 index); + +struct crtc_timing *get_crtc_timing(struct loongson_drm_device *ldev, u32 index); + +bool get_loongson_i2c(struct loongson_drm_device *ldev); +u32 get_encoder_i2c_id(struct loongson_drm_device *ldev, u32 index); +u32 get_encoder_connector_id(struct loongson_drm_device *ldev, u32 index); +enum encoder_config get_encoder_config_type(struct loongson_drm_device *ldev, u32 index); +enum encoder_type get_encoder_type(struct loongson_drm_device *ldev, u32 index); +struct cfg_encoder *get_encoder_config(struct loongson_drm_device *ldev, u32 index); +u32 get_encoder_cfg_num(struct loongson_drm_device *ldev, u32 index); +enum encoder_object get_encoder_chip(struct loongson_drm_device *ldev, u32 index); +u8 get_encoder_chip_addr(struct loongson_drm_device *ldev, u32 index); +u32 get_connector_irq_gpio(struct loongson_drm_device *ldev, u32 index); +enum gpio_placement get_connector_gpio_placement(struct loongson_drm_device *ldev, + u32 index); +#endif -- Gitee From 3c40bc7df271cb73bf6587e22a08e0d3c4f9ea51 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 1 Dec 2016 09:51:35 +0800 Subject: [PATCH 58/59] alsa: Add Loongson LS7A HD-Audio support Change-Id: I364548a8f859d14897dc50ebf0b6d1dec798072a Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 1 + sound/hda/hdac_bus.c | 22 +- sound/hda/hdac_controller.c | 57 +- sound/hda/hdac_stream.c | 63 +- sound/pci/hda/Kconfig | 11 + sound/pci/hda/Makefile | 2 + sound/pci/hda/hda_controller.c | 41 +- sound/pci/hda/hda_controller.h | 5 +- sound/pci/hda/hda_loongson.c | 841 +++++++++++++++++++++ sound/pci/hda/patch_realtek.c | 12 +- 10 files changed, 1018 insertions(+), 37 deletions(-) create mode 100644 sound/pci/hda/hda_loongson.c diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 2b3b357dc8bf..794d46aa8377 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -597,6 +597,7 @@ CONFIG_SND_SEQ_DUMMY=m CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_LOONGSON=y CONFIG_SND_HDA_HWDEP=y CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c index 9766f6af8743..0b1279d46f81 100644 --- a/sound/hda/hdac_bus.c +++ b/sound/hda/hdac_bus.c @@ -11,6 +11,7 @@ #include #include "local.h" #include "trace.h" +#include "../pci/hda/hda_controller.h" static void snd_hdac_bus_process_unsol_events(struct work_struct *work); @@ -108,6 +109,7 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr, { unsigned int tmp; int err; + struct azx *chip = bus_to_azx(bus); if (cmd == ~0) return -EINVAL; @@ -116,15 +118,19 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr, *res = -1; else if (bus->sync_write) res = &tmp; - for (;;) { - trace_hda_send_cmd(bus, cmd); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) err = bus->ops->command(bus, cmd); - if (err != -EAGAIN) - break; - /* process pending verbs */ - err = bus->ops->get_response(bus, addr, &tmp); - if (err) - break; + else { + for (;;) { + trace_hda_send_cmd(bus, cmd); + err = bus->ops->command(bus, cmd); + if (err != -EAGAIN) + break; + /* process pending verbs */ + err = bus->ops->get_response(bus, addr, &tmp); + if (err) + break; + } } if (!err && res) { err = bus->ops->get_response(bus, addr, res); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 522d1897659c..e35ae58e738f 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -10,6 +10,7 @@ #include #include #include "local.h" +#include "../pci/hda/hda_controller.h" /* clear CORB read pointer properly */ static void azx_clear_corbrp(struct hdac_bus *bus) @@ -42,6 +43,8 @@ static void azx_clear_corbrp(struct hdac_bus *bus) */ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) { + struct azx *chip = bus_to_azx(bus); + WARN_ON_ONCE(!bus->rb.area); spin_lock_irq(&bus->reg_lock); @@ -58,11 +61,15 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) /* reset the corb hw read pointer */ snd_hdac_chip_writew(bus, CORBRP, AZX_CORBRP_RST); - if (!bus->corbrp_self_clear) + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_writew(bus, CORBRP, 0); + else if (!bus->corbrp_self_clear) azx_clear_corbrp(bus); /* enable corb dma */ snd_hdac_chip_writeb(bus, CORBCTL, AZX_CORBCTL_RUN); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_readb(bus, CORBCTL); /* RIRB set up */ bus->rirb.addr = bus->rb.addr + 2048; @@ -79,7 +86,11 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) /* set N=1, get RIRB response interrupt for new entry */ snd_hdac_chip_writew(bus, RINTCNT, 1); /* enable rirb dma and response irq */ - snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN); + snd_hdac_chip_readb(bus, RIRBCTL); + } else + snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN); /* Accept unsolicited responses */ snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, AZX_GCTL_UNSOL); spin_unlock_irq(&bus->reg_lock); @@ -132,6 +143,18 @@ static unsigned int azx_command_addr(u32 cmd) return addr; } +static unsigned int azx_response_addr(u32 res) +{ + unsigned int addr = res & 0xf; + + if (addr >= AZX_MAX_CODECS) { + snd_BUG(); + addr = 0; + } + + return addr; +} + /** * snd_hdac_bus_send_cmd - send a command verb via CORB * @bus: HD-audio core bus @@ -207,13 +230,8 @@ void snd_hdac_bus_update_rirb(struct hdac_bus *bus) rp = bus->rirb.rp << 1; /* an RIRB entry is 8-bytes */ res_ex = le32_to_cpu(bus->rirb.buf[rp + 1]); res = le32_to_cpu(bus->rirb.buf[rp]); - addr = res_ex & 0xf; - if (addr >= HDA_MAX_CODECS) { - dev_err(bus->dev, - "spurious response %#x:%#x, rp = %d, wp = %d", - res, res_ex, bus->rirb.rp, wp); - snd_BUG(); - } else if (res_ex & AZX_RIRB_EX_UNSOL_EV) + addr = azx_response_addr(res_ex); + if (res_ex & AZX_RIRB_EX_UNSOL_EV) snd_hdac_bus_queue_event(bus, res, res_ex); else if (bus->rirb.cmds[addr]) { bus->rirb.res[addr] = res; @@ -485,16 +503,24 @@ static void azx_int_disable(struct hdac_bus *bus) static void azx_int_clear(struct hdac_bus *bus) { struct hdac_stream *azx_dev; + struct azx *chip = bus_to_azx(bus); /* clear stream status */ - list_for_each_entry(azx_dev, &bus->stream_list, list) - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + list_for_each_entry(azx_dev, &bus->stream_list, list) { + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updateb(azx_dev, SD_STS, 0, 0); + else + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + } /* clear STATESTS */ snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); /* clear rirb status */ - snd_hdac_chip_writeb(bus, RIRBSTS, RIRB_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_updateb(bus, RIRBSTS, ~RIRB_INT_MASK, 0); + else + snd_hdac_chip_writeb(bus, RIRBSTS, RIRB_INT_MASK); /* clear int status */ snd_hdac_chip_writel(bus, INTSTS, AZX_INT_CTRL_EN | AZX_INT_ALL_STREAM); @@ -575,11 +601,16 @@ int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status, struct hdac_stream *azx_dev; u8 sd_status; int handled = 0; + struct azx *chip = bus_to_azx(bus); list_for_each_entry(azx_dev, &bus->stream_list, list) { if (status & azx_dev->sd_int_sta_mask) { sd_status = snd_hdac_stream_readb(azx_dev, SD_STS); - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_stream_writeb(azx_dev, SD_STS, sd_status); + snd_hdac_stream_readb(azx_dev, SD_STS); + } else + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); handled |= 1 << azx_dev->index; if (!azx_dev->substream || !azx_dev->running || !(sd_status & SD_INT_COMPLETE)) diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index ce77a5320163..50e7211a6556 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -12,6 +12,7 @@ #include #include #include "trace.h" +#include "../pci/hda/hda_controller.h" /** * snd_hdac_get_stream_stripe_ctl - get stripe control value @@ -83,12 +84,13 @@ EXPORT_SYMBOL_GPL(snd_hdac_stream_init); void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start) { struct hdac_bus *bus = azx_dev->bus; + struct azx *chip = bus_to_azx(bus); int stripe_ctl; trace_snd_hdac_stream_start(bus, azx_dev); azx_dev->start_wallclk = snd_hdac_chip_readl(bus, WALLCLK); - if (!fresh_start) + if (!fresh_start && !(chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND)) azx_dev->start_wallclk -= azx_dev->period_wallclk; /* enable SIE */ @@ -101,11 +103,19 @@ void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start) stripe_ctl = snd_hdac_get_stream_stripe_ctl(bus, azx_dev->substream); else stripe_ctl = 0; - snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updatel(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, + stripe_ctl); + else + snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, stripe_ctl); } /* set DMA start and interrupt mask */ - snd_hdac_stream_updateb(azx_dev, SD_CTL, + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updatel(azx_dev, SD_CTL, + 0, SD_CTL_DMA_START | SD_INT_MASK); + else + snd_hdac_stream_updateb(azx_dev, SD_CTL, 0, SD_CTL_DMA_START | SD_INT_MASK); azx_dev->running = true; } @@ -117,11 +127,31 @@ EXPORT_SYMBOL_GPL(snd_hdac_stream_start); */ void snd_hdac_stream_clear(struct hdac_stream *azx_dev) { - snd_hdac_stream_updateb(azx_dev, SD_CTL, - SD_CTL_DMA_START | SD_INT_MASK, 0); - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ - if (azx_dev->stripe) - snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + int stream; + struct azx *chip = bus_to_azx(azx_dev->bus); + struct snd_pcm_substream *substream = azx_dev->substream; + + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_stream_updatel(azx_dev, SD_CTL, + SD_CTL_DMA_START | SD_INT_MASK, 0); + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ + if (azx_dev->stripe) + snd_hdac_stream_updatel(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + + if (!substream) + stream_to_azx_dev(azx_dev)->fix_prvpos = 0; + else { + stream = substream->stream; + stream_to_azx_dev(azx_dev)->fix_prvpos = + chip->get_position[stream](chip, stream_to_azx_dev(azx_dev)); + } + } else { + snd_hdac_stream_updateb(azx_dev, SD_CTL, + SD_CTL_DMA_START | SD_INT_MASK, 0); + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ + if (azx_dev->stripe) + snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + } azx_dev->running = false; } EXPORT_SYMBOL_GPL(snd_hdac_stream_clear); @@ -151,6 +181,10 @@ void snd_hdac_stream_reset(struct hdac_stream *azx_dev) unsigned char val; int timeout; int dma_run_state; + struct azx *chip = bus_to_azx(azx_dev->bus); + + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + goto out; snd_hdac_stream_clear(azx_dev); @@ -182,6 +216,7 @@ void snd_hdac_stream_reset(struct hdac_stream *azx_dev) break; } while (--timeout); +out: /* reset first position - may not be synced with hw at this time */ if (azx_dev->posbuf) *azx_dev->posbuf = 0; @@ -197,6 +232,7 @@ int snd_hdac_stream_setup(struct hdac_stream *azx_dev) struct hdac_bus *bus = azx_dev->bus; struct snd_pcm_runtime *runtime; unsigned int val; + struct azx *chip = bus_to_azx(bus); if (azx_dev->substream) runtime = azx_dev->substream->runtime; @@ -213,7 +249,13 @@ int snd_hdac_stream_setup(struct hdac_stream *azx_dev) snd_hdac_stream_writel(azx_dev, SD_CTL, val); /* program the length of samples in cyclic buffer */ - snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + if (azx_dev->substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize - 64); + else + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize - 16); + } else + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize); /* program the stream format */ /* this value needs to be the same as the one programmed */ @@ -419,6 +461,7 @@ int snd_hdac_stream_setup_periods(struct hdac_stream *azx_dev) __le32 *bdl; int i, ofs, periods, period_bytes; int pos_adj, pos_align; + struct azx *chip = bus_to_azx(bus); /* reset BDL address */ snd_hdac_stream_writel(azx_dev, SD_BDLPL, 0); @@ -433,6 +476,8 @@ int snd_hdac_stream_setup_periods(struct hdac_stream *azx_dev) azx_dev->frags = 0; pos_adj = bus->bdl_pos_adj; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + pos_adj = 0; if (!azx_dev->no_period_wakeup && pos_adj > 0) { pos_align = pos_adj; pos_adj = (pos_adj * runtime->rate + 47999) / 48000; diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 90759391cbac..77ee0b088050 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -42,6 +42,17 @@ config SND_HDA_TEGRA To compile this driver as a module, choose M here: the module will be called snd-hda-tegra. +config SND_HDA_LOONGSON + tristate "Loongson HD Audio" + depends on MACH_LOONGSON64 + select SND_HDA + help + Say Y here to include support for Loongson's LS7A "High + Definition Audio" controller. + + To compile this driver as a module, choose M here: the module + will be called snd-hda-loongson. + if SND_HDA config SND_HDA_HWDEP diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile index b57432f00056..2b6d1b32409c 100644 --- a/sound/pci/hda/Makefile +++ b/sound/pci/hda/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 snd-hda-intel-objs := hda_intel.o snd-hda-tegra-objs := hda_tegra.o +snd-hda-loongson-objs := hda_loongson.o snd-hda-codec-y := hda_bind.o hda_codec.o hda_jack.o hda_auto_parser.o hda_sysfs.o snd-hda-codec-y += hda_controller.o @@ -48,3 +49,4 @@ obj-$(CONFIG_SND_HDA_CODEC_HDMI) += snd-hda-codec-hdmi.o # when built in kernel obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-intel.o obj-$(CONFIG_SND_HDA_TEGRA) += snd-hda-tegra.o +obj-$(CONFIG_SND_HDA_LOONGSON) += snd-hda-loongson.o diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 3b17af0dac2c..e287845b7979 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -295,6 +295,23 @@ unsigned int azx_get_position(struct azx *chip, int stream = substream->stream; int delay = 0; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + pos = chip->get_position[stream](chip, azx_dev); + + if (pos >= azx_dev->fix_prvpos) { + pos = pos - azx_dev->fix_prvpos; + pos %= azx_dev->core.bufsize; + } else { + if (azx_dev->fix_prvpos > azx_dev->core.bufsize) + pos = (0x100000000ULL + pos-azx_dev->fix_prvpos) + % azx_dev->core.bufsize; + else + pos = pos + azx_dev->core.bufsize - azx_dev->fix_prvpos; + } + + return pos; + } + if (chip->get_position[stream]) pos = chip->get_position[stream](chip, azx_dev); else /* use the position buffer as default */ @@ -920,6 +937,8 @@ static int azx_send_cmd(struct hdac_bus *bus, unsigned int val) if (chip->disabled) return 0; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + udelay(500); if (chip->single_cmd) return azx_single_send_cmd(bus, val); else @@ -1088,8 +1107,9 @@ static void stream_update(struct hdac_bus *bus, struct hdac_stream *s) irqreturn_t azx_interrupt(int irq, void *dev_id) { struct azx *chip = dev_id; + struct hdac_stream *azx_dev; struct hdac_bus *bus = azx_bus(chip); - u32 status; + u32 i = 0, status = 0; bool active, handled = false; int repeat = 0; /* count for avoiding endless loop */ @@ -1105,8 +1125,18 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) goto unlock; do { - status = azx_readl(chip, INTSTS); - if (status == 0 || status == 0xffffffff) + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + list_for_each_entry(azx_dev, &bus->stream_list, list) { + status |= (snd_hdac_stream_readb(azx_dev, SD_STS) & SD_INT_MASK) ? + (1 << i) : 0; + i++; + } + status |= (status & ~0) ? (1 << 31) : 0; + } else + status = azx_readl(chip, INTSTS); + + if (status == 0 || + (status == 0xffffffff && !(chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND))) break; handled = true; @@ -1124,7 +1154,10 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) * remain unserviced by IRQ, eventually falling back * to polling mode in azx_rirb_get_response. */ - azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + azx_writeb(chip, RIRBSTS, status & RIRB_INT_MASK); + else + azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); active = true; if (status & RIRB_INT_RESPONSE) { if ((chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND) || diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index c18aa6b654c6..05f342099932 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_LS2X_WORKAROUND (1 << 14) /* Loongson-2H workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ @@ -64,6 +64,9 @@ struct azx_dev { * when link position is not greater than FIFO size */ unsigned int insufficient:1; + + /* For Loongson */ + unsigned int fix_prvpos; }; #define azx_stream(dev) (&(dev)->core) diff --git a/sound/pci/hda/hda_loongson.c b/sound/pci/hda/hda_loongson.c new file mode 100644 index 000000000000..9ea5d4f1a1b8 --- /dev/null +++ b/sound/pci/hda/hda_loongson.c @@ -0,0 +1,841 @@ +/* + * + * hda_loongson.c - Implementation of primary alsa driver code base + * for Intel HD Audio. + * + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * + * Copyright (c) 2004 Takashi Iwai + * PeiSen Hou + * + * Copyright (c) 2014 Huacai Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * CONTACTS: + * + * Matt Jared matt.jared@intel.com + * Andy Kopp andy.kopp@intel.com + * Dan Kogan dan.d.kogan@intel.com + * + * CHANGES: + * + * 2004.12.01 Major rewrite by tiwai, merged the work of pshou + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "hda_controller.h" + +#define LS7A_NUM_CAPTURE 4 +#define LS7A_NUM_PLAYBACK 4 + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; +static char *model[SNDRV_CARDS]; +static int bdl_pos_adj[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = -1}; +static int probe_only[SNDRV_CARDS]; +static int jackpoll_ms[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1200}; +static bool single_cmd; +static int enable_msi = -1; +#ifdef CONFIG_SND_HDA_INPUT_BEEP +static bool beep_mode[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = + CONFIG_SND_HDA_INPUT_BEEP_MODE}; +#endif + +module_param_array(index, int, NULL, 0444); +MODULE_PARM_DESC(index, "Index value for Intel HD audio interface."); +module_param_array(id, charp, NULL, 0444); +MODULE_PARM_DESC(id, "ID string for Intel HD audio interface."); +module_param_array(enable, bool, NULL, 0444); +MODULE_PARM_DESC(enable, "Enable Intel HD audio interface."); +module_param_array(model, charp, NULL, 0444); +MODULE_PARM_DESC(model, "Use the given board model."); +module_param_array(bdl_pos_adj, int, NULL, 0644); +MODULE_PARM_DESC(bdl_pos_adj, "BDL position adjustment offset."); +module_param_array(probe_only, int, NULL, 0444); +MODULE_PARM_DESC(probe_only, "Only probing and no codec initialization."); +module_param_array(jackpoll_ms, int, NULL, 0444); +MODULE_PARM_DESC(jackpoll_ms, "Ms between polling for jack events (default = 0, using unsol events only)"); +module_param(single_cmd, bool, 0444); +MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs " + "(for debugging only)."); +module_param(enable_msi, bint, 0444); +MODULE_PARM_DESC(enable_msi, "Enable Message Signaled Interrupt (MSI)"); +#ifdef CONFIG_SND_HDA_INPUT_BEEP +module_param_array(beep_mode, bool, NULL, 0444); +MODULE_PARM_DESC(beep_mode, "Select HDA Beep registration mode " + "(0=off, 1=on) (default=1)."); +#endif + +#ifdef CONFIG_PM +static int param_set_xint(const char *val, const struct kernel_param *kp); +static struct kernel_param_ops param_ops_xint = { + .set = param_set_xint, + .get = param_get_int, +}; +#define param_check_xint param_check_int + +static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; +module_param(power_save, xint, 0644); +MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " + "(in second, 0 = disable)."); + +/* reset the HD-audio controller in power save mode. + * this may give more power-saving, but will take longer time to + * wake up. + */ +static bool power_save_controller = 1; +module_param(power_save_controller, bool, 0644); +MODULE_PARM_DESC(power_save_controller, "Reset controller in power save mode."); +#else +#define power_save 0 +#endif /* CONFIG_PM */ + +static int align_buffer_size = -1; +module_param(align_buffer_size, bint, 0644); +MODULE_PARM_DESC(align_buffer_size, + "Force buffer and period sizes to be multiple of 128 bytes."); + +MODULE_LICENSE("GPL"); +MODULE_SUPPORTED_DEVICE("{Loongson, LS7A}"); +MODULE_DESCRIPTION("Loongson LS7A HDA driver"); + +/* driver types */ +enum { + AZX_DRIVER_LS7A, + AZX_NUM_DRIVERS, /* keep this as last entry */ +}; + +static char *driver_short_names[] = { + [AZX_DRIVER_LS7A] = "HD-Audio Loongson", +}; + +struct hda_loongson { + struct azx chip; + + /* for pending irqs */ + struct work_struct irq_pending_work; + + /* sync probing */ + struct completion probe_wait; + struct work_struct probe_work; + + /* card list (for power_save trigger) */ + struct list_head list; + + /* extra flags */ + unsigned int irq_pending_warned:1; + unsigned int probe_continued:1; +}; + +static int azx_acquire_irq(struct azx *chip, int do_disconnect); + +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev); + +/* called from IRQ */ +static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + int ok; + + ok = azx_position_ok(chip, azx_dev); + if (ok == 1) { + azx_dev->irq_pending = 0; + return ok; + } else if (ok == 0) { + /* bogus IRQ, process it later */ + azx_dev->irq_pending = 1; + schedule_work(&hda->irq_pending_work); + } + return 0; +} + +/* + * Check whether the current DMA position is acceptable for updating + * periods. Returns non-zero if it's OK. + * + * Many HD-audio controllers appear pretty inaccurate about + * the update-IRQ timing. The IRQ is issued before actually the + * data is processed. So, we need to process it afterwords in a + * workqueue. + */ +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) +{ + return 1; /* OK, it's fine */ +} + +/* + * The work for pending PCM period updates. + */ +static void azx_irq_pending_work(struct work_struct *work) +{ + struct hda_loongson *hda = container_of(work, struct hda_loongson, irq_pending_work); + struct azx *chip = &hda->chip; + struct hdac_bus *bus = azx_bus(chip); + struct hdac_stream *s; + int pending, ok; + + if (!hda->irq_pending_warned) { + dev_info(chip->card->dev, + "IRQ timing workaround is activated for card #%d. Suggest a bigger bdl_pos_adj.\n", + chip->card->number); + hda->irq_pending_warned = 1; + } + + for (;;) { + pending = 0; + spin_lock_irq(&bus->reg_lock); + list_for_each_entry(s, &bus->stream_list, list) { + struct azx_dev *azx_dev = stream_to_azx_dev(s); + if (!azx_dev->irq_pending || + !s->substream || + !s->running) + continue; + ok = azx_position_ok(chip, azx_dev); + if (ok > 0) { + azx_dev->irq_pending = 0; + spin_unlock(&bus->reg_lock); + snd_pcm_period_elapsed(s->substream); + spin_lock(&bus->reg_lock); + } else if (ok < 0) { + pending = 0; /* too early */ + } else + pending++; + } + spin_unlock_irq(&bus->reg_lock); + if (!pending) + return; + msleep(1); + } +} + +/* clear irq_pending flags and assure no on-going workq */ +static void azx_clear_irq_pending(struct azx *chip) +{ + struct hdac_bus *bus = azx_bus(chip); + struct hdac_stream *s; + + spin_lock_irq(&bus->reg_lock); + list_for_each_entry(s, &bus->stream_list, list) { + struct azx_dev *azx_dev = stream_to_azx_dev(s); + azx_dev->irq_pending = 0; + } + spin_unlock_irq(&bus->reg_lock); +} + +static int azx_acquire_irq(struct azx *chip, int do_disconnect) +{ + struct hdac_bus *bus = azx_bus(chip); + int irq; + + if (chip->pci) + irq = chip->pci->irq; + else + irq = platform_get_irq(to_platform_device(chip->card->dev), 0); + + if (request_irq(irq, azx_interrupt, chip->msi ? 0 : IRQF_SHARED, + KBUILD_MODNAME, chip)) { + dev_err(chip->card->dev, + "unable to grab IRQ %d, disabling device\n", irq); + if (do_disconnect) + snd_card_disconnect(chip->card); + return -1; + } + bus->irq = irq; + return 0; +} + +#ifdef CONFIG_PM +static DEFINE_MUTEX(card_list_lock); +static LIST_HEAD(card_list); + +static void azx_add_card_list(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + mutex_lock(&card_list_lock); + list_add(&hda->list, &card_list); + mutex_unlock(&card_list_lock); +} + +static void azx_del_card_list(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + mutex_lock(&card_list_lock); + list_del_init(&hda->list); + mutex_unlock(&card_list_lock); +} + +/* trigger power-save check at writing parameter */ +static int param_set_xint(const char *val, const struct kernel_param *kp) +{ + struct hda_loongson *hda; + struct azx *chip; + int prev = power_save; + int ret = param_set_int(val, kp); + + if (ret || prev == power_save) + return ret; + + mutex_lock(&card_list_lock); + list_for_each_entry(hda, &card_list, list) { + chip = &hda->chip; + if (!hda->probe_continued || chip->disabled) + continue; + snd_hda_set_power_save(&chip->bus, power_save * 1000); + } + mutex_unlock(&card_list_lock); + return 0; +} +#else +#define azx_add_card_list(chip) /* NOP */ +#define azx_del_card_list(chip) /* NOP */ +#endif /* CONFIG_PM */ + +#if defined(CONFIG_PM_SLEEP) +/* + * power management + */ +static int azx_suspend(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + struct hdac_bus *bus; + + if (chip->disabled) + return 0; + + bus = azx_bus(chip); + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + azx_clear_irq_pending(chip); + azx_stop_chip(chip); + azx_enter_link_reset(chip); + if (bus->irq >= 0) { + free_irq(bus->irq, chip); + bus->irq = -1; + } + return 0; +} + +static int azx_resume(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + chip->msi = 0; + if (azx_acquire_irq(chip, 1) < 0) + return -EIO; + + azx_init_chip(chip, true); + + snd_power_change_state(card, SNDRV_CTL_POWER_D0); + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int azx_runtime_suspend(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + if (!azx_has_pm_runtime(chip)) + return 0; + + /* enable controller wake up event */ + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) | + STATESTS_INT_MASK); + + azx_stop_chip(chip); + azx_enter_link_reset(chip); + azx_clear_irq_pending(chip); + return 0; +} + +static int azx_runtime_resume(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + struct hda_codec *codec; + int status; + + if (chip->disabled) + return 0; + + if (!azx_has_pm_runtime(chip)) + return 0; + + /* Read STATESTS before controller reset */ + status = azx_readw(chip, STATESTS); + + azx_init_chip(chip, true); + + if (status) { + list_for_each_codec(codec, &chip->bus) + if (status & (1 << codec->addr)) + schedule_delayed_work(&codec->jackpoll_work, + codec->jackpoll_interval); + } + + /* disable controller Wake Up event*/ + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & + ~STATESTS_INT_MASK); + + return 0; +} + +static int azx_runtime_idle(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + if (!power_save_controller || !azx_has_pm_runtime(chip) || + azx_bus(chip)->codec_powered) + return -EBUSY; + + return 0; +} + +#endif /* CONFIG_PM */ + +#ifdef CONFIG_PM +static const struct dev_pm_ops azx_pm = { + SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) + SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle) +}; + +#define AZX_PM_OPS &azx_pm +#else +#define AZX_PM_OPS NULL +#endif /* CONFIG_PM */ + +static int azx_probe_continue(struct azx *chip); + +/* + * destructor + */ +static int azx_free(struct azx *chip) +{ + struct device *snddev = chip->card->dev; + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + struct hdac_bus *bus = azx_bus(chip); + + if (azx_has_pm_runtime(chip) && chip->running) + pm_runtime_get_noresume(snddev); + + azx_del_card_list(chip); + + complete_all(&hda->probe_wait); + + if (bus->chip_init) { + azx_clear_irq_pending(chip); + azx_stop_all_streams(chip); + azx_stop_chip(chip); + } + + if (bus->irq >= 0) + free_irq(bus->irq, (void *)chip); + if (bus->remap_addr) + iounmap(bus->remap_addr); + + azx_free_stream_pages(chip); + azx_free_streams(chip); + snd_hdac_bus_exit(bus); + kfree(hda); + + return 0; +} + +static int azx_dev_disconnect(struct snd_device *device) +{ + struct azx *chip = device->device_data; + + chip->bus.shutdown = 1; + return 0; +} + +static int azx_dev_free(struct snd_device *device) +{ + return azx_free(device->device_data); +} + +/* + * constructor + */ +static const struct hda_controller_ops loongson_hda_ops; + +static int azx_create(struct snd_card *card, struct pci_dev *pcidev, + struct platform_device *platdev, int dev, + unsigned int driver_caps, struct azx **rchip) +{ + static struct snd_device_ops ops = { + .dev_disconnect = azx_dev_disconnect, + .dev_free = azx_dev_free, + }; + struct hda_loongson *hda; + struct azx *chip; + int err; + + *rchip = NULL; + + hda = kzalloc(sizeof(*hda), GFP_KERNEL); + if (!hda) { + dev_err(card->dev, "Cannot allocate hda\n"); + return -ENOMEM; + } + + chip = &hda->chip; + mutex_init(&chip->open_mutex); + chip->card = card; + chip->pci = pcidev; + chip->ops = &loongson_hda_ops; + chip->driver_caps = driver_caps; + chip->driver_type = driver_caps & 0xff; + chip->dev_index = dev; + if (jackpoll_ms[dev] >= 50 && jackpoll_ms[dev] <= 60000) + chip->jackpoll_interval = msecs_to_jiffies(jackpoll_ms[dev]); + INIT_LIST_HEAD(&chip->pcm_list); + INIT_WORK(&hda->irq_pending_work, azx_irq_pending_work); + INIT_LIST_HEAD(&hda->list); + init_completion(&hda->probe_wait); + + chip->get_position[0] = chip->get_position[1] = azx_get_pos_lpib; + + chip->snoop = false; + chip->single_cmd = single_cmd; + azx_bus(chip)->codec_mask = chip->codec_probe_mask = 0x1; + + if (bdl_pos_adj[dev] < 0) { + switch (chip->driver_type) { + case AZX_DRIVER_LS7A: + bdl_pos_adj[dev] = 1; + break; + default: + bdl_pos_adj[dev] = 32; + break; + } + } + chip->bdl_pos_adj = bdl_pos_adj[dev]; + + err = azx_bus_init(chip, model[dev]); + if (err < 0) { + kfree(hda); + return err; + } + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) { + dev_err(card->dev, "Error creating device [card]!\n"); + azx_free(chip); + return err; + } + + *rchip = chip; + + return 0; +} + +static int azx_first_init(struct azx *chip) +{ + int dev = chip->dev_index; + struct snd_card *card = chip->card; + struct hdac_bus *bus = azx_bus(chip); + int err; + unsigned short gcap; + + bus->addr = pci_resource_start(chip->pci, 0); + bus->remap_addr = ioremap(bus->addr, + pci_resource_end(chip->pci, 0) - pci_resource_start(chip->pci, 0) + 1); + if (bus->remap_addr == NULL) { + dev_err(card->dev, "ioremap error\n"); + return -ENXIO; + } + + chip->msi = 0; + + if (azx_acquire_irq(chip, 0) < 0) + return -EBUSY; + + synchronize_irq(bus->irq); + + gcap = azx_readw(chip, GCAP); + dev_dbg(card->dev, "chipset global capabilities = 0x%x\n", gcap); + + /* disable 64bit DMA address on some devices */ + if (chip->driver_caps & AZX_DCAPS_NO_64BIT) { + dev_dbg(card->dev, "Disabling 64bit DMA\n"); + gcap &= ~AZX_GCAP_64OK; + } + + /* disable buffer size rounding to 128-byte multiples if supported */ + if (align_buffer_size >= 0) + chip->align_buffer_size = !!align_buffer_size; + else { + if (chip->driver_caps & AZX_DCAPS_NO_ALIGN_BUFSIZE) + chip->align_buffer_size = 0; + else + chip->align_buffer_size = 1; + } + + /* allow 64bit DMA address if supported by H/W */ + if ((gcap & AZX_GCAP_64OK) && !dma_set_mask(chip->card->dev, DMA_BIT_MASK(64))) + dma_set_coherent_mask(chip->card->dev, DMA_BIT_MASK(64)); + else { + dma_set_mask(chip->card->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(chip->card->dev, DMA_BIT_MASK(32)); + } + + /* read number of streams from GCAP register instead of using + * hardcoded value + */ + chip->capture_streams = (gcap >> 8) & 0x0f; + chip->playback_streams = (gcap >> 12) & 0x0f; + if (!chip->playback_streams && !chip->capture_streams) { + /* gcap didn't give any info, switching to old method */ + chip->capture_streams = LS7A_NUM_CAPTURE; + chip->playback_streams = LS7A_NUM_PLAYBACK; + } + chip->capture_index_offset = 0; + chip->playback_index_offset = chip->capture_streams; + chip->num_streams = chip->playback_streams + chip->capture_streams; + + /* initialize streams */ + err = azx_init_streams(chip); + if (err < 0) + return err; + chip->playback_streams = chip->capture_streams = 1; /* Loongson */ + + err = azx_alloc_stream_pages(chip); + if (err < 0) + return err; + + /* initialize chip */ + azx_init_chip(chip, (probe_only[dev] & 2) == 0); + + /* codec detection */ + if (!azx_bus(chip)->codec_mask) { + dev_err(card->dev, "no codecs found!\n"); + return -ENODEV; + } + + strcpy(card->driver, "HDA-Loongson"); + strlcpy(card->shortname, driver_short_names[chip->driver_type], + sizeof(card->shortname)); + snprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx irq %i", + card->shortname, bus->addr, bus->irq); + + return 0; +} + +static const struct hda_controller_ops loongson_hda_ops = { + .position_check = azx_position_check, +}; + +/* number of codec slots for each chipset: 0 = default slots (i.e. 4) */ +static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {}; + +static int azx_probe_continue(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + int dev = chip->dev_index; + int err; + struct device *snddev = chip->card->dev; + + hda->probe_continued = 1; + + err = azx_first_init(chip); + if (err < 0) + goto out_free; + +#ifdef CONFIG_SND_HDA_INPUT_BEEP + chip->beep_mode = beep_mode[dev]; +#endif + + /* create codec instances */ + err = azx_probe_codecs(chip, azx_max_codecs[chip->driver_type]); + if (err < 0) + goto out_free; + + if ((probe_only[dev] & 1) == 0) { + err = azx_codec_configure(chip); + if (err < 0) + goto out_free; + } + + err = snd_card_register(chip->card); + if (err < 0) + goto out_free; + + chip->running = 1; + azx_add_card_list(chip); +#ifdef CONFIG_PM + pm_runtime_forbid(snddev); + pm_runtime_set_active(snddev); +#endif + snd_hda_set_power_save(&chip->bus, power_save * 1000); + if (azx_has_pm_runtime(chip)) + pm_runtime_put_noidle(snddev); + +out_free: + complete_all(&hda->probe_wait); + return err; +} + +static const struct pci_device_id azx_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_HDA)}, + {} +}; + +MODULE_DEVICE_TABLE(pci, azx_ids); + +static int azx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) +{ + int ret; + bool probe_now; + static int dev; + struct snd_card *card; + struct azx *chip; + struct hda_loongson *hda; + + /* Enable device in PCI config */ + ret = pci_enable_device(pdev); + if (ret < 0) { + printk(KERN_ERR "Loongson HDA (%s): Cannot enable PCI device\n", + pci_name(pdev)); + goto out; + } + + /* request the mem regions */ + ret = pci_request_region(pdev, 0, "Loongson HDA"); + if (ret < 0) { + printk(KERN_ERR "Loongson HDA (%s): cannot request region 0.\n", + pci_name(pdev)); + goto out; + } + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) { + dev++; + return -ENOENT; + } + + ret = snd_card_new(&pdev->dev, index[dev], id[dev], THIS_MODULE, + 0, &card); + if (ret < 0) { + dev_err(&pdev->dev, "Error creating card!\n"); + return ret; + } + + ret = azx_create(card, pdev, NULL, dev, AZX_DRIVER_LS7A | AZX_DCAPS_LS2X_WORKAROUND, &chip); + if (ret < 0) + goto out_free; + card->private_data = chip; + hda = container_of(chip, struct hda_loongson, chip); + + dev_set_drvdata(&pdev->dev, card); + + probe_now = !chip->disabled; + + if (probe_now) { + ret = azx_probe_continue(chip); + if (ret < 0) + goto out_free; + } + + dev++; + if (chip->disabled) + complete_all(&hda->probe_wait); + return 0; + +out_free: + snd_card_free(card); +out: + return ret; +} + +static void azx_pci_remove(struct pci_dev *pdev) +{ + snd_card_free(dev_get_drvdata(&pdev->dev)); +} + +static void azx_pci_shutdown(struct pci_dev *pdev) +{ + struct snd_card *card = dev_get_drvdata(&pdev->dev); + struct azx *chip; + + if (!card) + return; + chip = card->private_data; + if (chip && chip->running) + azx_stop_chip(chip); +} + +/* pci_driver definition */ +static struct pci_driver azx_pci_driver = { + .name = "loongson-audio", + .id_table = azx_ids, + .probe = azx_pci_probe, + .remove = azx_pci_remove, + .shutdown = azx_pci_shutdown, + .driver.pm = AZX_PM_OPS, +}; + +static int __init alsa_card_azx_init(void) +{ + int ret; + + ret = pci_register_driver(&azx_pci_driver); + if (ret) + pr_err("hda azx pci driver register\n"); + + return ret; +} + +static void __exit alsa_card_azx_exit(void) +{ + pci_unregister_driver(&azx_pci_driver); +} + +module_init(alsa_card_azx_init) +module_exit(alsa_card_azx_exit) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 14ce48f1a8e4..4284bdd9933a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -25,6 +25,7 @@ #include "hda_auto_parser.h" #include "hda_jack.h" #include "hda_generic.h" +#include "hda_controller.h" /* keep halting ALC5505 DSP, for power saving */ #define HALT_REALTEK_ALC5505 @@ -296,6 +297,13 @@ static void alc_fixup_micmute_led(struct hda_codec *codec, snd_hda_gen_add_micmute_led_cdev(codec, NULL); } +int has_loongson_workaround(struct hda_codec *codec) +{ + struct azx *chip = bus_to_azx(&codec->bus->core); + + return chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND; +} + /* * Fix hardware PLL issue * On some codecs, the analog PLL gating control must be off while @@ -635,10 +643,10 @@ static int alc_auto_parse_customize_define(struct hda_codec *codec) goto do_sku; } - if (!codec->bus->pci) + if (!codec->bus->pci && !has_loongson_workaround(codec)) return -1; ass = codec->core.subsystem_id & 0xffff; - if (ass != codec->bus->pci->subsystem_device && (ass & 1)) + if (codec->bus->pci && ass != codec->bus->pci->subsystem_device && (ass & 1)) goto do_sku; nid = 0x1d; -- Gitee From f8dfe8d8b4ef018a2f4a2c16d1b9aeabdb2fb813 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 22 Nov 2021 11:13:42 +0800 Subject: [PATCH 59/59] stmmac: pci: Add dwmac support for Loongson Change-Id: I6d0de0aebff3d4f1a58fb348dac66ec65804b8c4 Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 9 + drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/dwmac-loongson.c | 222 ++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 53f14c5a9e02..13eda0c37f7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -230,6 +230,15 @@ config DWMAC_INTEL This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. +config DWMAC_LOONGSON + tristate "Loongson PCI DWMAC support" + default MACH_LOONGSON64 + depends on STMMAC_ETH && PCI + depends on COMMON_CLK + help + This selects the LOONGSON PCI bus support for the stmmac driver, + Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 24e6145d4eae..11ea4569c43d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -34,4 +34,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o +obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c new file mode 100644 index 000000000000..ecf759ee1c9f --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Loongson Corporation + */ + +#include +#include +#include +#include +#include +#include "stmmac.h" + +static int loongson_default_data(struct plat_stmmacenet_data *plat) +{ + plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->has_gmac = 1; + plat->force_sf_dma_mode = 1; + + /* Set default value for multicast hash bins */ + plat->multicast_filter_bins = HASH_TABLE_SIZE; + + /* Set default value for unicast filter entries */ + plat->unicast_filter_entries = 1; + + /* Set the maxmtu to a default of JUMBO_LEN */ + plat->maxmtu = JUMBO_LEN; + + /* Set default number of RX and TX queues to use */ + plat->tx_queues_to_use = 1; + plat->rx_queues_to_use = 1; + + /* Disable Priority config by default */ + plat->tx_queues_cfg[0].use_prio = false; + plat->rx_queues_cfg[0].use_prio = false; + + /* Disable RX queues routing by default */ + plat->rx_queues_cfg[0].pkt_route = 0x0; + + /* Default to phy auto-detection */ + plat->phy_addr = -1; + + plat->dma_cfg->pbl = 32; + plat->dma_cfg->pblx8 = true; + + plat->multicast_filter_bins = 256; + return 0; +} + +static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct plat_stmmacenet_data *plat; + struct stmmac_resources res; + struct device_node *np; + int ret, i, phy_mode; + bool mdio = false; + + np = dev_of_node(&pdev->dev); + + if (!np) { + pr_info("dwmac_loongson_pci: No OF node\n"); + return -ENODEV; + } + + if (!of_device_is_compatible(np, "loongson, pci-gmac")) { + pr_info("dwmac_loongson_pci: Incompatible OF node\n"); + return -ENODEV; + } + + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); + if (!plat) + return -ENOMEM; + + if (plat->mdio_node) { + dev_err(&pdev->dev, "Found MDIO subnode\n"); + mdio = true; + } + + if (mdio) { + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + plat->mdio_bus_data->needs_reset = true; + } + + plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); + if (!plat->dma_cfg) + return -ENOMEM; + + /* Enable pci device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); + return ret; + } + + /* Get the base address of device */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) + return ret; + break; + } + + plat->bus_id = of_alias_get_id(np, "ethernet"); + if (plat->bus_id < 0) + plat->bus_id = pci_dev_id(pdev); + + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); + return phy_mode; + } + + plat->phy_interface = phy_mode; + plat->interface = PHY_INTERFACE_MODE_GMII; + + pci_set_master(pdev); + + loongson_default_data(plat); + pci_enable_msi(pdev); + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + + res.irq = of_irq_get_byname(np, "macirq"); + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); + if (res.wol_irq < 0) { + dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n"); + res.wol_irq = res.irq; + } + + res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; + } + + return stmmac_dvr_probe(&pdev->dev, plat, &res); +} + +static void loongson_dwmac_remove(struct pci_dev *pdev) +{ + int i; + + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + + pci_disable_device(pdev); +} + +static int __maybe_unused loongson_dwmac_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int __maybe_unused loongson_dwmac_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, + loongson_dwmac_resume); + +static const struct pci_device_id loongson_dwmac_id_table[] = { + { PCI_VDEVICE(LOONGSON, 0x7a03) }, + {} +}; +MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); + +struct pci_driver loongson_dwmac_driver = { + .name = "dwmac-loongson-pci", + .id_table = loongson_dwmac_id_table, + .probe = loongson_dwmac_probe, + .remove = loongson_dwmac_remove, + .driver = { + .pm = &loongson_dwmac_pm_ops, + }, +}; + +module_pci_driver(loongson_dwmac_driver); + +MODULE_DESCRIPTION("Loongson DWMAC PCI driver"); +MODULE_AUTHOR("Qing Zhang "); +MODULE_LICENSE("GPL v2"); -- Gitee