diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 83697bf28c02a45e4c5c07c91ab34a935684cb9c..14a0123acc30d8e9f63648174c84663e4a4cc7b3 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -1233,6 +1233,7 @@ CONFIG_LRU_GEN=y
 CONFIG_ARM64_HAFT=y
 CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
 CONFIG_PER_VMA_LOCK=y
+CONFIG_GMEM=y
 CONFIG_LOCK_MM_AND_FIND_VMA=y
 CONFIG_IOMMU_MM_DATA=y
 # CONFIG_ASCEND_FEATURES is not set
@@ -7022,6 +7023,13 @@ CONFIG_CPU_INSPECTOR_ATF=m
 CONFIG_ROH=m
 CONFIG_ROH_HNS=m
 CONFIG_ARM_SPE_MEM_SAMPLING=y
+
+#
+# remote pager device
+#
+CONFIG_REMOTE_PAGER=m
+CONFIG_REMOTE_PAGER_MASTER=m
+# end of remote pager device
 # end of Device Drivers
 
 #
diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
index e6a211001bd38edbb8fa3922b17ec59e6d12bbc4..ccdeffcefbfff33b53b9541a452222d43ae4ab26 100644
--- a/arch/arm64/include/asm/rsi_cmds.h
+++ b/arch/arm64/include/asm/rsi_cmds.h
@@ -9,6 +9,7 @@
 #include <linux/arm-smccc.h>
 
 #include <asm/rsi_smc.h>
+#include "string.h"
 
 #define RSI_GRANULE_SHIFT		12
 #define RSI_GRANULE_SIZE		(_AC(1, UL) << RSI_GRANULE_SHIFT)
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index 4050098c07757c8d479f13d67deb38276c36b1a0..e9445d996e462887be865a5f95d4d57d380b1ffc 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -1204,6 +1204,7 @@ CONFIG_LRU_GEN=y
 # CONFIG_LRU_GEN_STATS is not set
 CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
 CONFIG_PER_VMA_LOCK=y
+CONFIG_GMEM=y
 CONFIG_LOCK_MM_AND_FIND_VMA=y
 CONFIG_IOMMU_MM_DATA=y
 CONFIG_PAGE_CACHE_LIMIT=y
@@ -8209,6 +8210,13 @@ CONFIG_INTEL_TH_PTI=m
 #
 # CONFIG_CPU_INSPECT is not set
 # end of CPU Inspect
+
+#
+# remote pager device
+#
+CONFIG_REMOTE_PAGER=m
+CONFIG_REMOTE_PAGER_MASTER=m
+# end of remote pager device
 # end of Device Drivers
 
 #
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 4d588f4658c85cc1471da691fecbe744811812b4..b9e095cf349822c6ddb97271d2b32fd1a227fd36 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -931,6 +931,9 @@ static struct node_attr node_state_attr[] = {
 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
 	[N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
 					   N_GENERIC_INITIATOR),
+#ifdef CONFIG_GMEM
+	[N_HETEROGENEOUS] = _NODE_ATTR(has_hetero_memory, N_HETEROGENEOUS),
+#endif
 };
 
 static struct attribute *node_state_attrs[] = {
@@ -943,6 +946,9 @@ static struct attribute *node_state_attrs[] = {
 	&node_state_attr[N_MEMORY].attr.attr,
 	&node_state_attr[N_CPU].attr.attr,
 	&node_state_attr[N_GENERIC_INITIATOR].attr.attr,
+#ifdef CONFIG_GMEM
+	&node_state_attr[N_HETEROGENEOUS].attr.attr,
+#endif
 	NULL
 };
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8a691365061c1704ebdbbb2e5fbd08c99b5f1d36..84faaddafddfeed4a92dc9bab090def9ce174ac9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -698,6 +698,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
 		[ilog2(VM_UFFD_MINOR)]	= "ui",
 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+#ifdef CONFIG_GMEM
+		[ilog2(VM_PEER_SHARED)]	= "ps",
+#endif
 #ifdef CONFIG_X86_USER_SHADOW_STACK
 		[ilog2(VM_SHADOW_STACK)] = "ss",
 #endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 54a4967c496cd562715a592931144acfc35f0169..94262735406a44bf96461e304b0dc60ecee20c42 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -826,7 +826,13 @@ struct device {
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 #endif
+
+#ifdef CONFIG_GMEM
+	KABI_USE(4, void *gm_dev)
+#else
 	KABI_RESERVE(4)
+#endif
+
 	KABI_RESERVE(5)
 	KABI_RESERVE(6)
 	KABI_RESERVE(7)
diff --git a/include/linux/gmem.h b/include/linux/gmem.h
new file mode 100644
index 0000000000000000000000000000000000000000..23e87f2d7fe317230ade7b02df0babc4fc037252
--- /dev/null
+++ b/include/linux/gmem.h
@@ -0,0 +1,449 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generalized Memory Management.
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi Zhu
+ *
+ */
+#ifndef _GMEM_H
+#define _GMEM_H
+
+#include <linux/mm.h>
+
+struct hnode;
+
+/*
+ * enum gm_ret - The return value of GMEM KPI that can be used to tell
+ * the core VM or peripheral driver whether the GMEM KPI was
+ * executed successfully.
+ *
+ * @GM_RET_SUCCESS:	The invoked GMEM KPI behaved as expected.
+ * @GM_RET_FAILURE_UNKNOWN:	The GMEM KPI failed with unknown reason.
+ * Any external status related to this KPI invocation changes must be rolled back.
+ */
+enum gm_ret {
+	GM_RET_SUCCESS = 0,
+	GM_RET_NOMEM,
+	GM_RET_PAGE_EXIST,
+	GM_RET_DMA_ERROR,
+	GM_RET_MIGRATING,
+	GM_RET_FAILURE_UNKNOWN,
+	GM_RET_UNIMPLEMENTED,
+};
+
+/*
+ * Defines a contiguous range of virtual addresses inside a struct gm_as
+ * As an analogy, this is conceptually similar as virtual_address_struct
+ */
+struct gm_region {
+	unsigned long start_va;
+	unsigned long end_va;
+	struct rb_node node;
+	struct gm_as *as; /* The address space that it belongs to */
+
+	/* Do we need another list_node to maintain a tailQ of allocated VMAs inside a gm_as? */
+	struct list_head mapping_set_link;
+
+	void (*callback_op)(void *args);
+	void *cb_args;
+};
+
+/* This holds a list of regions that must not be concurrently manipulated. */
+struct gm_mapping_set {
+	unsigned int region_cnt;
+	struct list_head gm_region_list;
+};
+
+/**
+ * enum gm_mmu_mode - defines the method to share a physical page table.
+ *
+ * @GM_MMU_MODE_SHARE: Literally share a physical page table with another
+ * attached device's MMU. Nothing is guaranteed about the allocated address.
+ * @GM_MMU_MODE_COHERENT_EXCLUSIVE: Maintain a coherent page table that holds
+ * exclusive mapping entries, so that device memory accesses can trigger fault-driven
+ * migration for automatic data locality optimizations.
+ * @GM_MMU_MODE_REPLICATE: Maintain a coherent page table that replicates physical
+ * mapping entries whenever a physical mapping is installed inside the address space, so
+ * that it may minimize the page faults to be triggered by this device.
+ */
+enum gm_mmu_mode {
+	GM_MMU_MODE_SHARE,
+	GM_MMU_MODE_COHERENT_EXCLUSIVE,
+	GM_MMU_MODE_REPLICATE,
+};
+
+/*
+ * This is the parameter list of peer_map/unmap mmu operations.
+ * if device should copy data to/from host, set copy and dma_addr
+ */
+struct gm_fault_t {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	unsigned long pfn;
+	unsigned long va;
+	unsigned long size;
+	unsigned long prot;
+	bool copy;
+	dma_addr_t dma_addr;
+	int behavior;
+};
+
+enum gm_memcpy_kind {
+	GM_MEMCPY_INIT,
+	GM_MEMCPY_H2H,
+	GM_MEMCPY_H2D,
+	GM_MEMCPY_D2H,
+	GM_MEMCPY_D2D,
+	GM_MEMCPY_KIND_INVALID,
+};
+
+struct gm_memcpy_t {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	dma_addr_t src;
+	dma_addr_t dest;
+
+	size_t size;
+	enum gm_memcpy_kind kind;
+};
+
+/**
+ *
+ * This struct defines a series of MMU functions registered by a peripheral
+ * device that is to be invoked by GMEM.
+ *
+ * pmap is an opaque pointer that identifies a physical page table of a device.
+ * A physical page table holds the physical mappings that can be interpreted by
+ * the hardware MMU.
+ */
+struct gm_mmu {
+	/*
+	 * Each bit indicates a supported page size for page-based TLB.
+	 * Currently we do not consider range TLBs.
+	 */
+	unsigned long pgsize_bitmap;
+
+	/*
+	 * cookie identifies the type of the MMU. If two gm_mmu shares the same cookie,
+	 * then it means their page table formats are compatible.
+	 * In that case, they can share the same void *pmap as the input arg.
+	 */
+	unsigned long cookie;
+
+	/* Synchronize VMA in a peer OS to interact with the host OS */
+	enum gm_ret (*peer_va_alloc_fixed)(struct gm_fault_t *gmf);
+	enum gm_ret (*peer_va_free)(struct gm_fault_t *gmf);
+
+	/* Create physical mappings on peer host.
+	 * If copy is set, copy data [dma_addr, dma_addr + size] to peer host
+	 */
+	enum gm_ret (*peer_map)(struct gm_fault_t *gmf);
+	/*
+	 * Destroy physical mappings on peer host.
+	 * If copy is set, copy data back to [dma_addr, dma_addr + size]
+	 */
+	enum gm_ret (*peer_unmap)(struct gm_fault_t *gmf);
+
+	enum gm_ret (*import_phys_mem)(struct mm_struct *mm, int hnid, unsigned long page_cnt);
+
+	/* Create or destroy a device's physical page table. */
+	enum gm_ret (*pmap_create)(struct gm_dev *dev, void **pmap);
+	enum gm_ret (*pmap_destroy)(void *pmap);
+
+	/* Create or destroy a physical mapping of a created physical page table */
+	enum gm_ret (*pmap_enter)(void *pmap, unsigned long va, unsigned long size,
+			     unsigned long pa, unsigned long prot);
+	enum gm_ret (*pmap_release)(void *pmap, unsigned long va, unsigned long size);
+
+	/* Change the protection of a virtual page */
+	enum gm_ret (*pmap_protect)(void *pmap, unsigned long va, unsigned long size,
+						unsigned long new_prot);
+
+	/* Invalidation functions of the MMU TLB */
+	enum gm_ret (*tlb_invl)(void *pmap, unsigned long va, unsigned long size);
+	enum gm_ret (*tlb_invl_coalesced)(void *pmap, struct list_head *mappings);
+
+	// copy one area of memory from device to host or from host to device
+	enum gm_ret (*peer_hmemcpy)(struct gm_memcpy_t *gmc);
+};
+
+/**
+ * unsigned long defines a composable flag to describe the capabilities of a device.
+ *
+ * @GM_DEV_CAP_REPLAYABLE: Memory accesses can be replayed to recover page faults.
+ * @GM_DEV_CAP_PEER: The device has its own VMA/PA management, controlled by another peer OS
+ */
+#define GM_DEV_CAP_REPLAYABLE	0x00000001
+#define GM_DEV_CAP_PEER		0x00000010
+
+#define gm_dev_is_peer(dev) (((dev)->capability & GM_DEV_CAP_PEER) != 0)
+
+struct gm_context {
+	struct gm_as *as;
+	struct gm_dev *dev;
+	void *pmap;
+	/*
+	 * consider a better container to maintain multiple ctx inside a device or multiple ctx
+	 * inside a va space.
+	 * A device may simultaneously have multiple contexts for time-sliced ctx switching
+	 */
+	struct list_head gm_dev_link;
+
+	/* A va space may have multiple gm_context */
+	struct list_head gm_as_link;
+};
+#define get_gm_context(head) (list_entry((head)->prev, struct gm_context, ctx_link))
+
+struct gm_dev {
+	int id;
+
+	/* identifies the device capability
+	 * For example, whether the device supports page faults or whether it has its
+	 * own OS that manages the VA and PA resources.
+	 */
+	unsigned long capability;
+	struct gm_mmu *mmu;
+	void *dev_data;
+	/*
+	 * TODO: Use a better container of struct gm_context to support time-sliced context switch.
+	 * A collection of device contexts. If the device does not support time-sliced context
+	 * switch, then the size of the collection should never be greater than one.
+	 * We need to think about what operators should the container be optimized for.
+	 * A list, a radix-tree or what? What would gm_dev_activate require?
+	 * Are there any accelerators that are really going to support time-sliced context switch?
+	 */
+	struct gm_context *current_ctx;
+
+	struct list_head gm_ctx_list;
+
+	/* Add tracking of registered device local physical memory. */
+	nodemask_t registered_hnodes;
+	struct device *dma_dev;
+
+	struct gm_mapping *gm_mapping;
+};
+
+#define GM_MAPPING_CPU		0x10 /* Determines whether page is a pointer or a pfn number. */
+#define GM_MAPPING_DEVICE	0x20
+#define GM_MAPPING_NOMAP	0x40
+#define GM_MAPPING_PINNED	0x80
+#define GM_MAPPING_WILLNEED	0x100
+
+#define GM_MAPPING_TYPE_MASK	(GM_MAPPING_CPU | GM_MAPPING_DEVICE | GM_MAPPING_NOMAP)
+
+/* Records the status of a page-size physical page */
+struct gm_mapping {
+	unsigned int flag;
+
+	union {
+		struct page *page;		/* CPU node */
+		struct gm_page *gm_page;	/* hetero-node */
+	};
+
+	struct gm_dev *dev;
+	struct mutex lock;
+};
+
+static inline void gm_mapping_flags_set(struct gm_mapping *gm_mapping, int flags)
+{
+	if (flags & GM_MAPPING_TYPE_MASK)
+		gm_mapping->flag &= ~GM_MAPPING_TYPE_MASK;
+
+	gm_mapping->flag |= flags;
+}
+
+static inline void gm_mapping_flags_clear(struct gm_mapping *gm_mapping, int flags)
+{
+	gm_mapping->flag &= ~flags;
+}
+
+static inline bool gm_mapping_cpu(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_MAPPING_CPU);
+}
+
+static inline bool gm_mapping_device(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_MAPPING_DEVICE);
+}
+
+static inline bool gm_mapping_nomap(struct gm_mapping *gm_mapping)
+{
+	return !!(gm_mapping->flag & GM_MAPPING_NOMAP);
+}
+
+#define test_gm_mapping_mapped_on_node(i) { /* implement this */ }
+#define set_gm_mapping_mapped_on_node(i) { /* implement this */ }
+#define unset_gm_mapping_mapped_on_node(i) { /* implement this */ }
+
+/* GMEM Device KPI */
+extern enum gm_ret gm_dev_create(struct gm_mmu *mmu, void *dev_data, unsigned long cap,
+				struct gm_dev **new_dev);
+extern enum gm_ret gm_dev_switch(struct gm_dev *dev, struct gm_as *as);
+extern enum gm_ret gm_dev_detach(struct gm_dev *dev, struct gm_as *as);
+extern int gm_dev_register_hnode(struct gm_dev *dev);
+enum gm_ret gm_dev_fault_locked(struct mm_struct *mm, unsigned long addr,
+				struct gm_dev *dev, int behavior);
+vm_fault_t gm_host_fault_locked(struct vm_fault *vmf, unsigned int order);
+
+/* GMEM address space KPI */
+extern enum gm_ret gm_as_create(unsigned long begin, unsigned long end, enum gm_as_alloc policy,
+				unsigned long cache_quantum, struct gm_as **new_as);
+extern enum gm_ret gm_as_destroy(struct gm_as *as);
+extern enum gm_ret gm_as_attach(struct gm_as *as, struct gm_dev *dev, enum gm_mmu_mode mode,
+				bool activate, struct gm_context **out_ctx);
+extern unsigned long gm_as_alloc(struct gm_as *as, unsigned long hint, unsigned long size,
+				unsigned long align, unsigned long no_cross, unsigned long max_va,
+				struct gm_region **new_region);
+
+extern int hmadvise_inner(int hnid, unsigned long start, size_t len_in, int behavior);
+extern int hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size);
+
+enum gmem_stats_item {
+	NR_PAGE_MIGRATING_H2D,
+	NR_PAGE_MIGRATING_D2H,
+	NR_GMEM_STAT_ITEMS
+};
+
+extern void gmem_stats_counter(enum gmem_stats_item item, int val);
+extern void gmem_stats_counter_show(void);
+
+/* h-NUMA topology */
+struct hnode {
+	unsigned int id;
+	struct gm_dev *dev;
+
+	struct task_struct *swapd_task;
+
+	struct list_head freelist;
+	struct list_head activelist;
+	spinlock_t freelist_lock;
+	spinlock_t activelist_lock;
+	atomic_t nr_free_pages;
+	atomic_t nr_active_pages;
+
+	unsigned long max_memsize;
+
+	bool import_failed;
+};
+
+static inline void hnode_active_pages_inc(struct hnode *hnode)
+{
+	atomic_inc(&hnode->nr_active_pages);
+}
+
+static inline void hnode_active_pages_dec(struct hnode *hnode)
+{
+	atomic_dec(&hnode->nr_active_pages);
+}
+
+static inline void hnode_free_pages_inc(struct hnode *hnode)
+{
+	atomic_inc(&hnode->nr_free_pages);
+}
+
+static inline void hnode_free_pages_dec(struct hnode *hnode)
+{
+	atomic_dec(&hnode->nr_free_pages);
+}
+
+static inline bool is_hnode(int node)
+{
+	return (node < MAX_NUMNODES) && !node_isset(node, node_possible_map) &&
+	       node_isset(node, hnode_map);
+}
+
+static inline int get_hnuma_id(struct gm_dev *gm_dev)
+{
+	return first_node(gm_dev->registered_hnodes);
+}
+
+void __init hnuma_init(void);
+unsigned int alloc_hnode_id(void);
+void free_hnode_id(unsigned int nid);
+struct hnode *get_hnode(unsigned int hnid);
+struct gm_dev *get_gm_dev(unsigned int nid);
+void hnode_init(struct hnode *hnode, unsigned int hnid, struct gm_dev *dev);
+void hnode_deinit(unsigned int hnid, struct gm_dev *dev);
+
+struct gm_page {
+	struct list_head gm_page_list;
+
+	unsigned long flags;
+	unsigned long dev_pfn;
+	unsigned long dev_dma_addr;
+	unsigned int hnid;
+
+	/*
+	* The same functionality as rmap, we need know which process
+	* maps to this gm_page with which virtual address.
+	* */
+	unsigned long va;
+	struct mm_struct *mm;
+	spinlock_t rmap_lock;
+
+	unsigned int flag;
+	atomic_t refcount;
+};
+
+#define GM_PAGE_EVICTING	0x1
+#define GM_PAGE_PINNED		0x2
+
+static inline void gm_page_flags_set(struct gm_page *gm_page, int flags)
+{
+	gm_page->flag |= flags;
+}
+
+static inline void gm_page_flags_clear(struct gm_page *gm_page, int flags)
+{
+	gm_page->flag &= ~flags;
+}
+
+static inline bool gm_page_evicting(struct gm_page *gm_page)
+{
+	return !!(gm_page->flag & GM_PAGE_EVICTING);
+}
+
+static inline bool gm_page_pinned(struct gm_page *gm_page)
+{
+	return !!(gm_page->flag & GM_PAGE_PINNED);
+}
+
+#define NUM_IMPORT_PAGES   16
+
+int __init gm_page_cachep_init(void);
+void gm_page_cachep_destroy(void);
+struct gm_page *alloc_gm_page_struct(void);
+void hnode_freelist_add(struct hnode *hnode, struct gm_page *gm_page);
+void hnode_activelist_add(struct hnode *hnode, struct gm_page *gm_page);
+void hnode_activelist_del(struct hnode *hnode, struct gm_page *gm_page);
+void hnode_activelist_del_and_add(struct hnode *hnode, struct gm_page *gm_page);
+void mark_gm_page_active(struct gm_page *gm_page);
+void mark_gm_page_pinned(struct gm_page *gm_page);
+void mark_gm_page_unpinned(struct gm_page *gm_page);
+void gm_page_add_rmap(struct gm_page *gm_page, struct mm_struct *mm, unsigned long va);
+void gm_page_remove_rmap(struct gm_page *gm_page);
+int gm_add_pages(unsigned int hnid, struct list_head *pages);
+void gm_free_page(struct gm_page *gm_page);
+struct gm_page *gm_alloc_page(struct mm_struct *mm, struct hnode *hnode);
+
+static inline void get_gm_page(struct gm_page *gm_page)
+{
+	atomic_inc(&gm_page->refcount);
+}
+
+static inline void put_gm_page(struct gm_page *gm_page)
+{
+	if (atomic_dec_and_test(&gm_page->refcount))
+		gm_free_page(gm_page);
+}
+
+int hnode_init_sysfs(unsigned int hnid);
+int gm_init_sysfs(void);
+void gm_deinit_sysfs(void);
+
+#define gmem_err(fmt, ...) \
+	((void)pr_err("[gmem]" fmt "\n", ##__VA_ARGS__))
+
+#endif /* _GMEM_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f36bf9ee02f7e9ae1a3d9cc8e16666d0260d663..5850701096ca0bdffcb315e8cf736240abd07731 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -342,6 +342,12 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
 #define VM_HIGH_ARCH_5	BIT(VM_HIGH_ARCH_BIT_5)
+
+#ifdef CONFIG_GMEM
+#define VM_PEER_SHARED	BIT(56)
+#else
+#define VM_PEER_SHARED	VM_NONE
+#endif
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
@@ -3404,6 +3410,12 @@ unsigned long randomize_page(unsigned long start, unsigned long range);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
+#ifdef CONFIG_GMEM
+extern unsigned long get_unmapped_area_aligned(struct file *file,
+	unsigned long addr, unsigned long len, unsigned long pgoff,
+	unsigned long flags, unsigned long align);
+#endif
+
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
 	struct list_head *uf);
@@ -4213,4 +4225,28 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma);
 /* added to mm.h to avoid every caller adding new header file */
 #include <linux/mem_reliable.h>
 
+
+#ifdef CONFIG_GMEM
+DECLARE_STATIC_KEY_FALSE(gmem_status);
+
+static inline bool gmem_is_enabled(void)
+{
+	return static_branch_likely(&gmem_status);
+}
+
+static inline bool vma_is_peer_shared(struct vm_area_struct *vma)
+{
+	if (!gmem_is_enabled())
+		return false;
+
+	return !!(vma->vm_flags & VM_PEER_SHARED);
+}
+#else
+static inline bool gmem_is_enabled(void) { return false; }
+static inline bool vma_is_peer_shared(struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif
+
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 64c38b09e18d5579dd362cc160f68d6535c70428..f012f7c7c4d4a11c5532e33bf4331ce114687233 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -612,6 +612,74 @@ struct vm_userfaultfd_ctx {
 struct vm_userfaultfd_ctx {};
 #endif /* CONFIG_USERFAULTFD */
 
+#ifdef CONFIG_GMEM
+/*
+ * Defines a centralized logical mapping table that reflects the mapping information
+ * regardless of the underlying arch-specific MMUs.
+ * The implementation of this data structure borrows the VM_OBJECT from FreeBSD as well
+ * as the filemap address_space struct from Linux page cache.
+ * Only VMAs point to VM_OBJECTs and maintain logical mappings, because we assume that
+ * the coordiantion between page tables must happen with CPU page table involved. That
+ * is to say, a generalized process unit must involve in a UVA-programming model, otherwise
+ * there is no point to support UVA programming.
+ * However, a VMA only needs to maintain logical mappings if the process has been
+ * attached to a GMEM VA space. In normal cases, a CPU process does not need it. (unless
+ * we later build a reservation system on top of the logical mapping tables to support
+ * reservation-based superpages and rangeTLBs).
+ * A GM_REGION does not need to maintain logical mappings. In the case that a device wants
+ * to support its private address space with local physical memory, GMEM should forward address
+ * space management to the core VM, using VMAs, instead of using GM_REGIONs.
+ */
+struct vm_object {
+	spinlock_t lock;
+	struct vm_area_struct *vma;
+
+	/*
+	 * The logical_page_table is a container that holds the mapping
+	 * information between a VA and a struct page.
+	 */
+	struct xarray *logical_page_table;
+	atomic_t nr_pages;
+
+	/*
+	 * a vm object might be referred by multiple VMAs to share
+	 * memory.
+	 */
+	atomic_t ref_count;
+};
+
+#define GMEM_MMAP_RETRY_TIMES 10 /* gmem retry times before OOM */
+
+/**
+ * enum gm_as_alloc - defines different allocation policy for virtual addresses.
+ *
+ * @GM_AS_ALLOC_DEFAULT:		An object cache is applied to accelerate VA allocations.
+ * @GM_AS_ALLOC_FIRSTFIT:		Prefer allocation efficiency.
+ * @GM_AS_ALLOC_BESTFIT:		Prefer space efficiency.
+ * @GM_AS_ALLOC_NEXTFIT:		Perform an address-ordered search for free addresses,
+ * beginning where the previous search ended.
+ */
+enum gm_as_alloc {
+	GM_AS_ALLOC_DEFAULT = 0,
+	GM_AS_ALLOC_FIRSTFIT,
+	GM_AS_ALLOC_BESTFIT,
+	GM_AS_ALLOC_NEXTFIT,
+};
+
+/* Defines an address space. */
+struct gm_as {
+	spinlock_t rbtree_lock; /* spinlock of struct gm_as */
+	struct rb_root rbroot; /*root of gm_region_t */
+	enum gm_as_alloc policy;
+	unsigned long start_va;
+	unsigned long end_va;
+	/* defines the VA unit size if an object cache is applied */
+	unsigned long cache_quantum;
+	/* tracks device contexts attached to this va space, using gm_as_link */
+	struct list_head gm_ctx_list;
+};
+#endif
+
 struct anon_vma_name {
 	struct kref kref;
 	/* The name needs to be at the end because it is dynamically sized. */
@@ -735,7 +803,11 @@ struct vm_area_struct {
 #ifdef CONFIG_SHARE_POOL
 	struct sp_area *spa;
 #endif
+#ifdef CONFIG_GMEM
+	KABI_USE(1, struct vm_object *vm_obj)
+#else
 	KABI_RESERVE(1)
+#endif
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
@@ -1016,7 +1088,11 @@ struct mm_struct {
 #else
 	KABI_RESERVE(1)
 #endif
+#ifdef CONFIG_GMEM
+	KABI_USE(2, struct gm_as *gm_as)
+#else
 	KABI_RESERVE(2)
+#endif
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
 	KABI_RESERVE(5)
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8ddca62d6460bd461b8afff731bb64a5203b822a..30ec68346f6b0409155afbf32aa3d40e8afb305b 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -55,7 +55,8 @@
 		| MAP_32BIT \
 		| MAP_ABOVE4G \
 		| MAP_HUGE_2MB \
-		| MAP_HUGE_1GB)
+		| MAP_HUGE_1GB \
+		| MAP_PEER_SHARED)
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1b037c3121bd8ca5011dd4568cdc2..f005f3d903aedc52d0d9423f3077b6cfedd10865 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -407,6 +407,11 @@ enum node_states {
 	N_MEMORY,		/* The node has memory(regular, high, movable) */
 	N_CPU,		/* The node has one or more cpus */
 	N_GENERIC_INITIATOR,	/* The node has one or more Generic Initiators */
+#ifdef CONFIG_GMEM
+#ifndef __GENKSYMS__
+	N_HETEROGENEOUS,	/* The node has heterogeneous memory */
+#endif
+#endif
 	NR_NODE_STATES
 };
 
@@ -536,6 +541,13 @@ static inline int node_random(const nodemask_t *maskp)
 #define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
+#ifdef CONFIG_GMEM
+/* For h-NUMA topology */
+#define hnode_map		node_states[N_HETEROGENEOUS]
+#define num_hnodes()		num_node_state(N_HETEROGENEOUS)
+#define for_each_hnode(node)	for_each_node_state(node, N_HETEROGENEOUS)
+#endif
+
 /*
  * For nodemask scratch area.
  * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
diff --git a/include/linux/remote_pager/msg_chan.h b/include/linux/remote_pager/msg_chan.h
new file mode 100644
index 0000000000000000000000000000000000000000..a8049def052d6686a59474846b83c59576cd2263
--- /dev/null
+++ b/include/linux/remote_pager/msg_chan.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __RPG_MSG_CHAN_H__
+#define __RPG_MSG_CHAN_H__
+
+#include <linux/printk.h>
+
+/*
+ * struct phys_channel_ops - Channel physical layer ops
+ * @open: Open the communication channel of node nid and alloc physical resources,
+ *        returns the channel ID
+ * @notify: Notify peer of chan_id to receive messages
+ * @copy_to: Copy the msg_data message from origin to peer
+ * @copy_from: Copy the msg_data message from peer to origin
+ * @close: Close channel and free physical resources
+ */
+struct phys_channel_ops {
+	char *name;
+	int (*open)(int nid);
+	int (*notify)(int chan_id);
+	int (*copy_to)(int chan_id, void *msg_data, size_t msg_len, int flags);
+	int (*copy_from)(int chan_id, void *buf, size_t len, int flags);
+	int (*migrate_page)(void *peer_addr, struct page *local_page, size_t size, int dir);
+	int (*close)(int chan_id);
+};
+
+int msg_layer_install_phy_ops(struct phys_channel_ops *ops, int default_chan_id);
+int msg_layer_uninstall_phy_ops(struct phys_channel_ops *ops);
+
+#define log_err(fmt, ...)	pr_err("[%s:%d]" fmt, __func__, __LINE__, ##__VA_ARGS__)
+#define log_info(fmt, ...)	pr_info("[%s:%d]" fmt, __func__, __LINE__, ##__VA_ARGS__)
+
+#define MSG_CMD_START		0x1
+#define MSG_CMD_IRQ_END		0x2
+#define MSG_CMD_FIFO_NO_MEM	0x3
+#define MSG_CMD_CHANN_OPEN	0x4
+
+#define CHAN_STAT_ENABLE	1
+#define CHAN_STAT_DISABLE	0
+
+#define TO_PEER			0
+#define FROM_PEER		1
+
+#endif
diff --git a/include/linux/vm_object.h b/include/linux/vm_object.h
new file mode 100644
index 0000000000000000000000000000000000000000..480bb12fb6a351bad891295380bd44bd4801fe8a
--- /dev/null
+++ b/include/linux/vm_object.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _VM_OBJECT_H
+#define _VM_OBJECT_H
+
+#include <linux/mm_types.h>
+#include <linux/gmem.h>
+
+#ifdef CONFIG_GMEM
+/* vm_object KPI */
+int __init vm_object_init(void);
+struct vm_object *vm_object_create(struct vm_area_struct *vma);
+void vm_object_drop_locked(struct vm_area_struct *vma);
+void dup_vm_object(struct vm_area_struct *dst, struct vm_area_struct *src, bool dst_peer_shared);
+void vm_object_adjust(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void vm_object_merge(struct vm_area_struct *vma, unsigned long addr);
+void vm_object_split(struct vm_area_struct *old_vma, struct vm_area_struct *new_vma);
+void dup_peer_shared_vma(struct vm_area_struct *vma);
+
+struct gm_mapping *alloc_gm_mapping(void);
+struct gm_mapping *vm_object_lookup(struct vm_object *obj, unsigned long va);
+void vm_object_mapping_create(struct vm_object *obj, unsigned long start);
+void free_gm_mappings(struct vm_area_struct *vma);
+#else
+static inline void __init vm_object_init(void) {}
+static inline struct vm_object *vm_object_create(struct vm_area_struct *vma) { return NULL; }
+static inline void vm_object_drop_locked(struct vm_area_struct *vma) {}
+static inline void dup_vm_object(struct vm_area_struct *dst,
+			struct vm_area_struct *src, bool dst_peer_shared) {}
+static inline void dup_peer_shared_vma(struct vm_area_struct *vma) {}
+static inline void vm_object_adjust(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end) {}
+
+static inline struct gm_mapping *alloc_gm_mapping(void) { return NULL; }
+static inline struct gm_mapping *vm_object_lookup(struct vm_object *obj,
+					unsigned long va) { return NULL; }
+static inline void vm_object_mapping_create(struct vm_object *obj,
+					unsigned long start) {}
+static inline void free_gm_mappings(struct vm_area_struct *vma) {}
+#endif
+
+#endif /* _VM_OBJECT_H */
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 14e5498efd7acab203c0d43e48e0536ed52ffead..19e22492a85b8d24e9d094b90a759e38ccb24154 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -33,6 +33,8 @@
 #define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
 					 * uninitialized */
 
+#define MAP_PEER_SHARED		0x1000000
+
 /*
  * Flags for mlock
  */
@@ -79,6 +81,12 @@
 
 #define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
 
+/* for hmadvise */
+#define MADV_GMEM_BASE	0x1000
+#define MADV_PREFETCH	MADV_GMEM_BASE		/* prefetch pages for hNUMA node */
+#define MADV_PINNED	(MADV_GMEM_BASE+1)	/* pin these pages */
+#define MADV_PINNED_REMOVE	(MADV_GMEM_BASE+2)	/* unpin these pages */
+
 #define MADV_ETMEM_BASE		0x1100
 #define MADV_SWAPFLAG		MADV_ETMEM_BASE /* for memory to be swap out */
 #define MADV_SWAPFLAG_REMOVE	(MADV_SWAPFLAG + 1)
diff --git a/kernel/fork.c b/kernel/fork.c
index 78663ca681600ff7b78150acb521d115e3f1f1a9..7c7f87bd1110a0f5654db12f7dcd94ac3a7a72af 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -99,6 +99,11 @@
 #include <linux/stackprotector.h>
 #include <linux/user_events.h>
 #include <linux/iommu.h>
+
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #ifdef CONFIG_QOS_SCHED_SMART_GRID
 #include <linux/sched/grid_qos.h>
 #endif
@@ -110,10 +115,15 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+
 #ifdef CONFIG_FAST_SYSCALL
 #include <asm/xcall.h>
 #endif
 
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <trace/events/sched.h>
 
 #define CREATE_TRACE_POINTS
@@ -526,6 +536,10 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 	vma_numab_state_init(new);
 	dup_anon_vma_name(orig, new);
 
+#ifdef CONFIG_GMEM
+	dup_peer_shared_vma(new);
+#endif
+
 	return new;
 }
 
@@ -551,6 +565,10 @@ static void vm_area_free_rcu_cb(struct rcu_head *head)
 
 void vm_area_free(struct vm_area_struct *vma)
 {
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma))
+		vm_object_drop_locked(vma);
+#endif
 #ifdef CONFIG_PER_VMA_LOCK
 	call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb);
 #else
@@ -1766,7 +1784,9 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
 	err = dup_mmap(mm, oldmm);
 	if (err)
 		goto free_pt;
-
+#ifdef CONFIG_GMEM
+	mm->gm_as = NULL;
+#endif
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
 
diff --git a/mm/Kconfig b/mm/Kconfig
index bdd8372552ffd0fd17a1c879c5fe1545f99f0f0c..829a0d6a0fb5e50caef77fe02423ecd053119b1c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1480,6 +1480,21 @@ config NUMABALANCING_MEM_SAMPLING
 
 	  if unsure, say N to disable the NUMABALANCING_MEM_SAMPLING.
 
+config GMEM
+	bool "gmem subsystem for multi-MMU cooperative management"
+	depends on (ARM64 || X86_64) && MMU && TRANSPARENT_HUGEPAGE
+	select ARCH_USES_HIGH_VMA_FLAGS
+	default y
+	help
+	  This provides a high-level interface that decouples MMU-specific functions.
+	  Device drivers can thus attach themselves to a process’s address space and
+	  let the OS take charge of their memory management. This eliminates
+	  the need for device drivers to reinvent the wheel and allows them to
+	  benefit from general memory optimizations integrated by GMEM.
+
+	  say Y here to enable gmem subsystem
+
+
 source "mm/damon/Kconfig"
 
 config THP_CONTROL
diff --git a/mm/Makefile b/mm/Makefile
index 08fcaca0d8cd1b8743e5781df785c72e47fa6b45..db7c51e1f563181c982fa027dc8d27ec801eb471 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -41,7 +41,7 @@ mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
 			   msync.o page_vma_mapped.o pagewalk.o \
 			   pgtable-generic.o rmap.o vmalloc.o
-
+mmu-$(CONFIG_GMEM) += gmem.o gmem_phys.o gmem_stat.o vm_object.o
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
 mmu-$(CONFIG_MMU)	+= process_vm_access.o
diff --git a/mm/gmem.c b/mm/gmem.c
new file mode 100644
index 0000000000000000000000000000000000000000..227717b2408e70440a57b9288611e9ae70865c68
--- /dev/null
+++ b/mm/gmem.c
@@ -0,0 +1,1109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generalized Memory Management.
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi Zhu
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/xxhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/hashtable.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
+#include <linux/numa.h>
+#include <linux/mempolicy.h>
+#include <linux/gmem.h>
+#include <linux/xarray.h>
+#include <linux/syscalls.h>
+#include <linux/dma-mapping.h>
+#include <linux/vm_object.h>
+#include <linux/dma-direct.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+
+DEFINE_STATIC_KEY_FALSE(gmem_status);
+EXPORT_SYMBOL_GPL(gmem_status);
+
+static struct kmem_cache *gm_as_cache;
+static struct kmem_cache *gm_dev_cache;
+static struct kmem_cache *gm_ctx_cache;
+static struct kmem_cache *gm_region_cache;
+static DEFINE_XARRAY_ALLOC(gm_dev_id_pool);
+
+static bool enable_gmem;
+
+static inline unsigned long pe_mask(unsigned int order)
+{
+	if (order == 0)
+		return PAGE_MASK;
+	if (order == PMD_ORDER)
+		return HPAGE_PMD_MASK;
+	if (order == PUD_ORDER)
+		return HPAGE_PUD_MASK;
+	return 0;
+}
+
+static struct percpu_counter g_gmem_stats[NR_GMEM_STAT_ITEMS];
+
+void gmem_stats_counter(enum gmem_stats_item item, int val)
+{
+	if (!gmem_is_enabled())
+		return;
+
+	if (WARN_ON_ONCE(unlikely(item >= NR_GMEM_STAT_ITEMS)))
+		return;
+
+	percpu_counter_add(&g_gmem_stats[item], val);
+}
+
+static int gmem_stats_init(void)
+{
+	int i, rc;
+
+	for (i = 0; i < NR_GMEM_STAT_ITEMS; i++) {
+		rc = percpu_counter_init(&g_gmem_stats[i], 0, GFP_KERNEL);
+		if (rc) {
+			int j;
+
+			for (j = i-1; j >= 0; j--)
+				percpu_counter_destroy(&g_gmem_stats[j]);
+
+			break;	/* break the initialization process */
+		}
+	}
+
+	return rc;
+}
+
+#ifdef CONFIG_PROC_FS
+static int gmem_stats_show(struct seq_file *m, void *arg)
+{
+	if (!gmem_is_enabled())
+		return 0;
+
+	seq_printf(
+		m, "migrating H2D     : %lld\n",
+		percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_H2D]));
+	seq_printf(
+		m, "migrating D2H     : %lld\n",
+		percpu_counter_read_positive(&g_gmem_stats[NR_PAGE_MIGRATING_D2H]));
+
+	return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
+static struct workqueue_struct *prefetch_wq;
+
+#define GM_WORK_CONCURRENCY 4
+
+static int __init gmem_init(void)
+{
+	int err = -ENOMEM;
+
+	if (!enable_gmem)
+		return 0;
+
+	gm_as_cache = KMEM_CACHE(gm_as, 0);
+	if (!gm_as_cache)
+		goto out;
+
+	gm_dev_cache = KMEM_CACHE(gm_dev, 0);
+	if (!gm_dev_cache)
+		goto free_as;
+
+	gm_ctx_cache = KMEM_CACHE(gm_context, 0);
+	if (!gm_ctx_cache)
+		goto free_dev;
+
+	gm_region_cache = KMEM_CACHE(gm_region, 0);
+	if (!gm_region_cache)
+		goto free_ctx;
+
+	err = gm_page_cachep_init();
+	if (err)
+		goto free_region;
+
+	err = gm_init_sysfs();
+	if (err)
+		goto free_gm_page;
+
+	err = vm_object_init();
+	if (err)
+		goto free_gm_sysfs;
+
+	err = gmem_stats_init();
+	if (err)
+		goto free_region;
+
+	prefetch_wq = alloc_workqueue("prefetch",
+		__WQ_LEGACY | WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE, GM_WORK_CONCURRENCY);
+	if (!prefetch_wq) {
+		gmem_err("fail to alloc workqueue prefetch_wq\n");
+		err = -EFAULT;
+		goto free_region;
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_create_single("gmemstats", 0444, NULL, gmem_stats_show);
+#endif
+
+	static_branch_enable(&gmem_status);
+
+	return 0;
+
+free_gm_sysfs:
+	gm_deinit_sysfs();
+free_gm_page:
+	gm_page_cachep_destroy();
+free_region:
+	kmem_cache_destroy(gm_region_cache);
+free_ctx:
+	kmem_cache_destroy(gm_ctx_cache);
+free_dev:
+	kmem_cache_destroy(gm_dev_cache);
+free_as:
+	kmem_cache_destroy(gm_as_cache);
+out:
+	return -ENOMEM;
+}
+subsys_initcall(gmem_init);
+
+static int __init setup_gmem(char *str)
+{
+	strtobool(str, &enable_gmem);
+
+	return 1;
+}
+__setup("gmem=", setup_gmem);
+
+/*
+ * Create a GMEM device, register its MMU function and the page table.
+ * The returned device pointer will be passed by new_dev.
+ * A unique id will be assigned to the GMEM device, using Linux's xarray.
+ */
+enum gm_ret gm_dev_create(struct gm_mmu *mmu, void *dev_data, unsigned long cap,
+		       struct gm_dev **new_dev)
+{
+	struct gm_dev *dev;
+
+	if (!gmem_is_enabled())
+		return GM_RET_FAILURE_UNKNOWN;
+
+	dev = kmem_cache_alloc(gm_dev_cache, GFP_KERNEL);
+	if (!dev)
+		return GM_RET_NOMEM;
+
+	if (xa_alloc(&gm_dev_id_pool, &dev->id, dev, xa_limit_32b,
+		     GFP_KERNEL)) {
+		kmem_cache_free(gm_dev_cache, dev);
+		return GM_RET_NOMEM;
+	}
+
+	dev->capability = cap;
+	dev->mmu = mmu;
+	dev->dev_data = dev_data;
+	dev->current_ctx = NULL;
+	INIT_LIST_HEAD(&dev->gm_ctx_list);
+	*new_dev = dev;
+	nodes_clear(dev->registered_hnodes);
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_dev_create);
+
+/* Handle the page fault triggered by a given device with mmap lock*/
+enum gm_ret gm_dev_fault_locked(struct mm_struct *mm, unsigned long  addr, struct gm_dev *dev,
+				int behavior)
+{
+	enum gm_ret ret = GM_RET_SUCCESS;
+	struct gm_mmu *mmu = dev->mmu;
+	struct hnode *hnode;
+	struct device *dma_dev = dev->dma_dev;
+	struct vm_area_struct *vma;
+	struct vm_object *obj;
+	struct gm_mapping *gm_mapping;
+	struct gm_page *gm_page;
+	unsigned long size = HPAGE_SIZE;
+	struct gm_fault_t gmf = {
+		.mm = mm,
+		.va = addr,
+		.dev = dev,
+		.size = size,
+		.copy = false,
+		.behavior = behavior
+	};
+	struct page *page = NULL;
+
+	hnode = get_hnode(get_hnuma_id(dev));
+	if (!hnode) {
+		gmem_err("gmem device should correspond to a hnuma node");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr) {
+		gmem_err("%s failed to find vma", __func__);
+		ret = GM_RET_FAILURE_UNKNOWN;
+		goto out;
+	}
+	obj = vma->vm_obj;
+	if (!obj) {
+		gmem_err("%s no vm_obj", __func__);
+		ret = GM_RET_FAILURE_UNKNOWN;
+		goto out;
+	}
+
+	xa_lock(obj->logical_page_table);
+	gm_mapping = vm_object_lookup(obj, addr);
+	if (!gm_mapping) {
+		vm_object_mapping_create(obj, addr);
+		gm_mapping = vm_object_lookup(obj, addr);
+	}
+	xa_unlock(obj->logical_page_table);
+
+	if (unlikely(!gm_mapping)) {
+		gmem_err("OOM when creating vm_obj!");
+		ret = GM_RET_NOMEM;
+		goto out;
+	}
+	mutex_lock(&gm_mapping->lock);
+	if (gm_mapping_nomap(gm_mapping)) {
+		goto peer_map;
+	} else if (gm_mapping_device(gm_mapping)) {
+		switch (behavior) {
+			case MADV_PINNED:
+				mark_gm_page_pinned(gm_mapping->gm_page);
+				fallthrough;
+			case MADV_WILLNEED:
+				mark_gm_page_active(gm_mapping->gm_page);
+				goto unlock;
+			case MADV_PINNED_REMOVE:
+				mark_gm_page_unpinned(gm_mapping->gm_page);
+				goto unlock;
+			default:
+				ret = 0;
+				goto unlock;
+		}
+	} else if (gm_mapping_cpu(gm_mapping)) {
+		page = gm_mapping->page;
+		if (!page) {
+			gmem_err("host gm_mapping page is NULL. Set nomap");
+			gm_mapping_flags_set(gm_mapping, GM_MAPPING_NOMAP);
+			goto unlock;
+		}
+		get_page(page);
+		/* zap_page_range_single can be used in Linux 6.4 and later versions. */
+		zap_page_range_single(vma, addr, size, NULL);
+		gmf.dma_addr =
+			dma_map_page(dma_dev, page, 0, size, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dma_dev, gmf.dma_addr))
+			gmem_err("dma map failed");
+
+		gmf.copy = true;
+	}
+
+peer_map:
+	gm_page = gm_alloc_page(mm, hnode);
+	if (!gm_page) {
+		gmem_err("Alloc gm_page for device fault failed.");
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	gmf.pfn = gm_page->dev_pfn;
+
+	ret = mmu->peer_map(&gmf);
+	if (ret != GM_RET_SUCCESS) {
+		gmem_err("peer map failed");
+		if (page)
+			gm_mapping_flags_set(gm_mapping, GM_MAPPING_CPU);
+		put_gm_page(gm_page);
+		goto unlock;
+	}
+
+	if (page) {
+		dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+		folio_put(page_folio(page));
+	}
+
+	gm_mapping_flags_set(gm_mapping, GM_MAPPING_DEVICE);
+	gm_mapping->dev = dev;
+	gm_page_add_rmap(gm_page, mm, addr);
+	gm_mapping->gm_page = gm_page;
+	if (behavior == MADV_PINNED) {
+		mark_gm_page_pinned(gm_page);
+	} else if (behavior == MADV_PINNED_REMOVE) {
+		mark_gm_page_unpinned(gm_page);
+	}
+	hnode_activelist_add(hnode, gm_page);
+	hnode_active_pages_inc(hnode);
+unlock:
+	mutex_unlock(&gm_mapping->lock);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gm_dev_fault_locked);
+
+vm_fault_t gm_host_fault_locked(struct vm_fault *vmf,
+				unsigned int order)
+{
+	vm_fault_t ret = 0;
+	struct vm_area_struct *vma = vmf->vma;
+	unsigned long addr = vmf->address & pe_mask(order);
+	struct vm_object *obj = vma->vm_obj;
+	struct gm_mapping *gm_mapping;
+	unsigned long size = HPAGE_SIZE;
+	struct gm_dev *dev;
+	struct hnode *hnode;
+	struct device *dma_dev;
+	struct gm_fault_t gmf = {
+		.mm = vma->vm_mm,
+		.va = addr,
+		.size = size,
+		.copy = true,
+	};
+
+	gm_mapping = vm_object_lookup(obj, addr);
+	if (!gm_mapping) {
+		gmem_err("host fault gm_mapping should not be NULL\n");
+		return VM_FAULT_SIGBUS;
+	}
+
+	dev = gm_mapping->dev;
+	gmf.dev = dev;
+	gmf.pfn = gm_mapping->gm_page->dev_pfn;
+	dma_dev = dev->dma_dev;
+	gmf.dma_addr =
+		dma_map_page(dma_dev, vmf->page, 0, size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dma_dev, gmf.dma_addr)) {
+		gmem_err("host fault dma mapping error\n");
+		return VM_FAULT_SIGBUS;
+	}
+	if (dev->mmu->peer_unmap(&gmf) != GM_RET_SUCCESS) {
+		gmem_err("peer unmap failed\n");
+		dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+		return VM_FAULT_SIGBUS;
+	}
+
+	dma_unmap_page(dma_dev, gmf.dma_addr, size, DMA_BIDIRECTIONAL);
+	hnode = get_hnode(gm_mapping->gm_page->hnid);
+	gm_page_remove_rmap(gm_mapping->gm_page);
+	hnode_activelist_del(hnode, gm_mapping->gm_page);
+	hnode_active_pages_dec(hnode);
+	put_gm_page(gm_mapping->gm_page);
+	return ret;
+}
+
+/* GMEM Virtual Address Space API */
+enum gm_ret gm_as_create(unsigned long begin, unsigned long end, enum gm_as_alloc policy,
+			unsigned long cache_quantum, struct gm_as **new_as)
+{
+	struct gm_as *as;
+
+	if (!new_as)
+		return -EINVAL;
+
+	as = kmem_cache_alloc(gm_as_cache, GFP_ATOMIC);
+	if (!as)
+		return -ENOMEM;
+
+	spin_lock_init(&as->rbtree_lock);
+	as->rbroot = RB_ROOT;
+	as->start_va = begin;
+	as->end_va = end;
+	as->policy = policy;
+
+	INIT_LIST_HEAD(&as->gm_ctx_list);
+
+	*new_as = as;
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_create);
+
+enum gm_ret gm_as_destroy(struct gm_as *as)
+{
+	struct gm_context *ctx, *tmp_ctx;
+
+	list_for_each_entry_safe(ctx, tmp_ctx, &as->gm_ctx_list, gm_as_link)
+		kfree(ctx);
+
+	kmem_cache_free(gm_as_cache, as);
+
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_destroy);
+
+enum gm_ret gm_as_attach(struct gm_as *as, struct gm_dev *dev, enum gm_mmu_mode mode,
+			bool activate, struct gm_context **out_ctx)
+{
+	struct gm_context *ctx;
+	int nid;
+	int ret;
+
+	ctx = kmem_cache_alloc(gm_ctx_cache, GFP_KERNEL);
+	if (!ctx)
+		return GM_RET_NOMEM;
+
+	ctx->as = as;
+	ctx->dev = dev;
+	ctx->pmap = NULL;
+	ret = dev->mmu->pmap_create(dev, &ctx->pmap);
+	if (ret) {
+		kmem_cache_free(gm_ctx_cache, ctx);
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&ctx->gm_dev_link);
+	INIT_LIST_HEAD(&ctx->gm_as_link);
+	list_add_tail(&dev->gm_ctx_list, &ctx->gm_dev_link);
+	list_add_tail(&ctx->gm_as_link, &as->gm_ctx_list);
+
+	if (activate) {
+		/*
+		 * Here we should really have a callback function to perform the context switch
+		 * for the hardware. E.g. in x86 this function is effectively
+		 * flushing the CR3 value. Currently we do not care time-sliced context switch,
+		 * unless someone wants to support it.
+		 */
+		dev->current_ctx = ctx;
+	}
+	*out_ctx = ctx;
+
+	/*
+	 * gm_as_attach will be used to attach device to process address space.
+	 * Handle this case and add hnodes registered by device to process mems_allowed.
+	 */
+	for_each_node_mask(nid, dev->registered_hnodes)
+		node_set(nid, current->mems_allowed);
+	return GM_RET_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(gm_as_attach);
+
+struct prefetch_data {
+	struct mm_struct *mm;
+	struct gm_dev *dev;
+	unsigned long addr;
+	size_t size;
+	struct work_struct work;
+	int behavior;
+	int *res;
+};
+
+static void prefetch_work_cb(struct work_struct *work)
+{
+	struct prefetch_data *d =
+		container_of(work, struct prefetch_data, work);
+	unsigned long addr = d->addr, end = d->addr + d->size;
+	int page_size = HPAGE_SIZE;
+	int ret;
+
+	do {
+		/* MADV_WILLNEED: dev will soon access this addr. */
+		mmap_read_lock(d->mm);
+		ret = gm_dev_fault_locked(d->mm, addr, d->dev, d->behavior);
+		mmap_read_unlock(d->mm);
+		if (ret == GM_RET_PAGE_EXIST) {
+			gmem_err("%s: device has done page fault, ignore prefetch\n",
+				__func__);
+		} else if (ret != GM_RET_SUCCESS) {
+			*d->res = -EFAULT;
+			gmem_err("%s: call dev fault error %d\n", __func__, ret);
+		}
+	} while (addr += page_size, addr != end);
+
+	kfree(d);
+}
+
+static int hmadvise_do_prefetch(struct gm_dev *dev, unsigned long addr, size_t size, int behavior)
+{
+	unsigned long start, end, per_size;
+	int page_size = HPAGE_SIZE;
+	struct prefetch_data *data;
+	struct vm_area_struct *vma;
+	int res = GM_RET_SUCCESS;
+	unsigned long old_start;
+
+	/* overflow */
+	if (check_add_overflow(addr, size, &end)) {
+		gmem_err("addr plus size will cause overflow!\n");
+		return -EINVAL;
+	}
+
+	old_start = end;
+
+	/* Align addr by rounding outward to make page cover addr. */
+	end = round_up(end, page_size);
+	start = round_down(addr, page_size);
+	size = end - start;
+
+	if (!end && old_start) {
+		gmem_err("end addr align up 2M causes invalid addr\n");
+		return -EINVAL;
+	}
+
+	if (size == 0)
+		return 0;
+
+	mmap_read_lock(current->mm);
+	vma = find_vma(current->mm, start);
+	if (!vma || start < vma->vm_start || end > vma->vm_end) {
+		mmap_read_unlock(current->mm);
+		gmem_err("failed to find vma by invalid start or size.\n");
+		return GM_RET_FAILURE_UNKNOWN;
+	}  else if (!vma_is_peer_shared(vma)) {
+		mmap_read_unlock(current->mm);
+		gmem_err("%s the vma does not use VM_PEER_SHARED\n", __func__);
+		return GM_RET_FAILURE_UNKNOWN;
+	}
+	mmap_read_unlock(current->mm);
+
+	per_size = (size / GM_WORK_CONCURRENCY) & ~(page_size - 1);
+
+	while (start < end) {
+		data = kzalloc(sizeof(struct prefetch_data), GFP_KERNEL);
+		if (!data) {
+			flush_workqueue(prefetch_wq);
+			return GM_RET_NOMEM;
+		}
+
+		INIT_WORK(&data->work, prefetch_work_cb);
+		data->mm = current->mm;
+		data->dev = dev;
+		data->addr = start;
+		data->behavior = behavior;
+		data->res = &res;
+		if (per_size == 0)
+			data->size = size;
+		else
+			/* Process (1.x * per_size) for the last time */
+			data->size = (end - start < 2 * per_size) ?
+					     (end - start) :
+					     per_size;
+		queue_work(prefetch_wq, &data->work);
+		start += data->size;
+	}
+
+	flush_workqueue(prefetch_wq);
+	return res;
+}
+
+static int gmem_unmap_vma_pages(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end, int page_size)
+{
+	struct gm_fault_t gmf = {
+		.mm = current->mm,
+		.size = page_size,
+		.copy = false,
+	};
+	struct gm_mapping *gm_mapping;
+	struct vm_object *obj;
+	struct hnode *hnode;
+	int ret;
+
+	obj = vma->vm_obj;
+	if (!obj) {
+		gmem_err("peer-shared vma should have vm_object\n");
+		return -EINVAL;
+	}
+
+	for (; start < end; start += page_size) {
+		xa_lock(obj->logical_page_table);
+		gm_mapping = vm_object_lookup(obj, start);
+		if (!gm_mapping) {
+			xa_unlock(obj->logical_page_table);
+			continue;
+		}
+		xa_unlock(obj->logical_page_table);
+		mutex_lock(&gm_mapping->lock);
+		if (gm_mapping_nomap(gm_mapping)) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		} else if (gm_mapping_cpu(gm_mapping)) {
+			zap_page_range_single(vma, start, page_size, NULL);
+		} else {
+			gmf.va = start;
+			gmf.dev = gm_mapping->dev;
+			ret = gm_mapping->dev->mmu->peer_unmap(&gmf);
+			if (ret) {
+				gmem_err("peer_unmap failed. ret %d\n", ret);
+				mutex_unlock(&gm_mapping->lock);
+				continue;
+			}
+			hnode = get_hnode(gm_mapping->gm_page->hnid);
+			gm_page_remove_rmap(gm_mapping->gm_page);
+			hnode_activelist_del(hnode, gm_mapping->gm_page);
+			hnode_active_pages_dec(hnode);
+			put_gm_page(gm_mapping->gm_page);
+		}
+		gm_mapping_flags_set(gm_mapping, GM_MAPPING_NOMAP);
+		mutex_unlock(&gm_mapping->lock);
+	}
+
+	return 0;
+}
+
+static int hmadvise_do_eagerfree(unsigned long addr, size_t size)
+{
+	unsigned long start, end, i_start, i_end;
+	int page_size = HPAGE_SIZE;
+	struct vm_area_struct *vma;
+	int ret = GM_RET_SUCCESS;
+	unsigned long old_start;
+
+	/* overflow */
+	if (check_add_overflow(addr, size, &end)) {
+		gmem_err("addr plus size will cause overflow!\n");
+		return -EINVAL;
+	}
+
+	old_start = addr;
+
+	/* Align addr by rounding inward to avoid excessive page release. */
+	end = round_down(end, page_size);
+	start = round_up(addr, page_size);
+	if (start >= end) {
+		pr_debug("gmem:start align up 2M >= end align down 2M.\n");
+		return ret;
+	}
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (old_start && !start) {
+		gmem_err("start addr align up 2M causes invalid addr");
+		return -EINVAL;
+	}
+
+	mmap_read_lock(current->mm);
+	do {
+		vma = find_vma_intersection(current->mm, start, end);
+		if (!vma) {
+			gmem_err("gmem: there is no valid vma\n");
+			break;
+		}
+
+		if (!vma_is_peer_shared(vma)) {
+			pr_debug("gmem:not peer-shared vma, skip dontneed\n");
+			start = vma->vm_end;
+			continue;
+		}
+
+		i_start = start > vma->vm_start ? start : vma->vm_start;
+		i_end = end < vma->vm_end ? end : vma->vm_end;
+		ret = gmem_unmap_vma_pages(vma, i_start, i_end, page_size);
+		if (ret)
+			break;
+
+		start = vma->vm_end;
+	} while (start < end);
+
+	mmap_read_unlock(current->mm);
+	return ret;
+}
+
+static bool check_hmadvise_behavior(int behavior)
+{
+	return behavior == MADV_DONTNEED;
+}
+
+int hmadvise_inner(int hnid, unsigned long start, size_t len_in, int behavior)
+{
+	int error = -EINVAL;
+	struct gm_dev *dev = NULL;
+
+	if (hnid == -1) {
+		if (check_hmadvise_behavior(behavior)) {
+			goto no_hnid;
+		} else {
+			gmem_err("hmadvise: behavior %d need hnid or is invalid\n",
+				behavior);
+			return error;
+		}
+	}
+
+	if (hnid < 0) {
+		gmem_err("hmadvise: invalid hnid %d < 0\n", hnid);
+		return error;
+	}
+
+	if (!is_hnode(hnid)) {
+		gmem_err("hmadvise: can't find hnode by hnid:%d or hnode is not allowed\n", hnid);
+		return error;
+	}
+
+	dev = get_gm_dev(hnid);
+	if (!dev) {
+		gmem_err("hmadvise: hnode id %d is invalid\n", hnid);
+		return error;
+	}
+
+no_hnid:
+	switch (behavior) {
+	case MADV_PREFETCH:
+		behavior = MADV_WILLNEED;
+		fallthrough;
+	case MADV_PINNED_REMOVE:
+		fallthrough;
+	case MADV_PINNED:
+		return hmadvise_do_prefetch(dev, start, len_in, behavior);
+	case MADV_DONTNEED:
+		return hmadvise_do_eagerfree(start, len_in);
+	default:
+		gmem_err("hmadvise: unsupported behavior %d\n", behavior);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(hmadvise_inner);
+
+static bool hnid_match_dest(int hnid, struct gm_mapping *dest)
+{
+	return (hnid < 0) ? gm_mapping_cpu(dest) : gm_mapping_device(dest);
+}
+
+static void cpu_page_copy(struct page *dst_page, unsigned long dst_offset,
+	struct page *src_page, unsigned long src_offset, size_t size)
+{
+	unsigned long src, dst;
+
+	src = (unsigned long)page_address(src_page) + src_offset;
+	dst = (unsigned long)page_address(dst_page) + dst_offset;
+	if (!src || !dst) {
+		gmem_err("%s: src (%lx) or dst (%lx) is invalid!", src, dst);
+		return;
+	}
+	memcpy((void *)dst, (void *)src, size);
+}
+
+static enum gmem_copy_dir {
+	COPY_GMEM_TO_NORM,
+	COPY_NORM_TO_GMEM,
+	COPY_GMEM_TO_GMEM,
+};
+
+static void do_hmemcpy(struct mm_struct *mm, int hnid, unsigned long dest,
+		unsigned long src, size_t size)
+{
+	enum gm_ret ret;
+	int page_size = HPAGE_SIZE;
+	struct vm_area_struct *vma_dest, *vma_src;
+	struct gm_mapping *gm_mapping_dest, *gm_mapping_src;
+	struct gm_dev *dev = NULL;
+	struct gm_memcpy_t gmc = {0};
+	enum gmem_copy_dir dir;
+	struct page *trans_hpage;
+	void *trans_addr;
+
+	if (size == 0)
+		return;
+
+	mmap_read_lock(mm);
+	vma_dest = find_vma(mm, dest);
+	vma_src = find_vma(mm, src);
+
+	if (!vma_src || vma_src->vm_start > src || !vma_dest || vma_dest->vm_start > dest) {
+		gmem_err("hmemcpy: the vma find by src/dest is NULL!");
+		goto unlock_mm;
+	}
+
+	if (vma_is_peer_shared(vma_src) && vma_is_peer_shared(vma_dest)) {
+		dir = COPY_GMEM_TO_GMEM;
+		gm_mapping_dest = vm_object_lookup(vma_dest->vm_obj, dest & ~(page_size - 1));
+		gm_mapping_src = vm_object_lookup(vma_src->vm_obj, src & ~(page_size - 1));
+	} else if (vma_is_peer_shared(vma_src)) {
+		dir = COPY_GMEM_TO_NORM;
+		gm_mapping_src = vm_object_lookup(vma_src->vm_obj, src & ~(page_size - 1));
+		gm_mapping_dest = NULL;
+	} else if (vma_is_peer_shared(vma_dest)) {
+		dir = COPY_NORM_TO_GMEM;
+		gm_mapping_dest = vm_object_lookup(vma_dest->vm_obj, dest & ~(page_size - 1));
+		gm_mapping_src = NULL;
+	} else {
+		gmem_err("%s: src %lx and dest %lx both not gmem addr!", __func__, src, dest);
+		goto unlock_mm;
+	}
+
+	trans_hpage = alloc_pages(GFP_TRANSHUGE, HPAGE_PMD_ORDER);
+	if (!trans_hpage) {
+		gmem_err("%s: alloc trans_hpage failed!", __func__);
+		goto unlock_mm;
+	}
+	trans_addr = page_to_virt(trans_hpage);
+
+	if (dir != COPY_NORM_TO_GMEM && (!gm_mapping_src || gm_mapping_nomap(gm_mapping_src))) {
+		gmem_err("%s: gm_mapping_src is NULL or still not mapped! addr is %lx", __func__, src);
+	}
+
+	if (hnid != -1) {
+		dev = get_gm_dev(hnid);
+		if (!dev) {
+			gmem_err("hmemcpy: hnode's dev is NULL");
+			goto free_trans_page;
+		}
+	}
+
+	// Trigger dest page fault on host or device
+	if (!gm_mapping_dest || gm_mapping_nomap(gm_mapping_dest)
+		|| !hnid_match_dest(hnid, gm_mapping_dest)) {
+		if (hnid == -1) {
+			if (gm_mapping_dest && gm_mapping_device(gm_mapping_dest) && gm_page_pinned(gm_mapping_dest->gm_page)) {
+				gmem_err("%s: dest %lx is pinned on device, skip handle_mm_fault", __func__, dest);
+			} else {
+				ret = handle_mm_fault(vma_dest, dest & ~(page_size - 1), FAULT_FLAG_USER |
+							FAULT_FLAG_INSTRUCTION | FAULT_FLAG_WRITE, NULL);
+				if (ret) {
+					gmem_err("%s: failed to execute host page fault, ret:%d",
+						__func__, ret);
+					goto free_trans_page;
+				}
+			}
+		} else {
+			ret = gm_dev_fault_locked(mm, dest & ~(page_size - 1), dev, MADV_WILLNEED);
+			if (ret != GM_RET_SUCCESS) {
+				gmem_err("%s: failed to excecute dev page fault.", __func__);
+				goto free_trans_page;
+			}
+		}
+	}
+	if (!gm_mapping_dest && dir != COPY_GMEM_TO_NORM)
+		gm_mapping_dest = vm_object_lookup(vma_dest->vm_obj, round_down(dest, page_size));
+
+	if (gm_mapping_dest && gm_mapping_dest != gm_mapping_src)
+		mutex_lock(&gm_mapping_dest->lock);
+	if (gm_mapping_src)
+		mutex_lock(&gm_mapping_src->lock);
+	// Use memcpy when there is no device address, otherwise use peer_memcpy
+	if (dir == COPY_GMEM_TO_NORM) {
+		if (!gm_mapping_src) {
+			gmem_err("%s: do COPY_GMEM_TO_NORM but gm_mapping_src is NULL!", __func__);
+			goto unlock_gm_mapping;
+		}
+		if (gm_mapping_cpu(gm_mapping_src)) { // host to host
+			cpu_page_copy(trans_hpage, (unsigned long)trans_addr & (page_size - 1),
+					gm_mapping_src->page, src & (page_size - 1), size);
+			goto copy_to_norm_dest;
+		} else if (gm_mapping_device(gm_mapping_src)) { // device to host
+			dev = gm_mapping_src->dev;
+			gmc.dest = phys_to_dma(dev->dma_dev, page_to_phys(trans_hpage) +
+								((unsigned long)trans_addr & (page_size - 1)));
+			gmc.src = gm_mapping_src->gm_page->dev_dma_addr + (src & (page_size - 1));
+			gmc.kind = GM_MEMCPY_D2H;
+		} else {
+			gmem_err("gm_mapping_src bad status, dir is COPY_GMEM_TO_NORM");
+			goto unlock_gm_mapping;
+		}
+	} else if (dir == COPY_NORM_TO_GMEM) {
+		if (!gm_mapping_dest) {
+			gmem_err("%s: do COPY_NORM_TO_GMEM but gm_mapping_dest is NULL!", __func__);
+			goto unlock_gm_mapping;
+		}
+		if (copy_from_user(trans_addr, (void __user *)src, size) > 0)
+			gmem_err("copy normal src %lx to trans failed", src);
+		if (gm_mapping_cpu(gm_mapping_dest)) { // host to host
+			cpu_page_copy(gm_mapping_dest->page, dest & (page_size - 1),
+					trans_hpage, (unsigned long)trans_addr & (page_size - 1), size);
+			goto unlock_gm_mapping;
+		} else if (gm_mapping_device(gm_mapping_dest)) {
+			if (!dev) {
+				gmem_err("%s: do COPY_NORM_TO_GMEM but dev is NULL, hnid is %d", __func__, hnid);
+				goto unlock_gm_mapping;
+			}
+			gmc.dest = gm_mapping_dest->gm_page->dev_dma_addr +
+							(dest & (page_size - 1));
+			gmc.src = phys_to_dma(dev->dma_dev, page_to_phys(trans_hapge) +
+							((unsigned long)trans_addr & (page_size - 1)));
+			gmc.kind = GM_MEMCPY_H2D;
+		} else { // device to device
+			gmem_err("gm_mapping_dest bad status, dir is COPY_NORM_TO_GMEM\n");
+			goto unlock_gm_mmaping;
+		}
+	} else if (dir == COPY_GMEM_TO_GMEM) {
+		if (gm_mapping_cpu(gm_mapping_src)) {
+			if (gm_mapping_cpu(gm_mapping_dest)) {
+				cpu_page_copy(gm_mapping_dest->page, dest & (page_size - 1),
+						gm_mapping_src->page, src & (page_size - 1), size);
+				goto unlock_gm_mapping;
+			} else if (gm_mapping_device(gm_mapping_dest)) {
+				dev = gm_mapping_dest->dev;
+				gmc.dest = gm_mapping_dest->gm_page->dev_dma_addr + (dest & (page_size - 1));
+				gmc.src = phys_to_dma(dev->dma_dev, page_to_phys(gm_mapping_src->page) +
+								(src & (page_size - 1)));
+				gmc.kind = GM_MEMCPY_H2D;
+			} else {
+				gmem_err("gm_mapping_dest bad status, src is on host!");
+				goto unlock_gm_mapping;
+			}
+		} else if (gm_mapping_device(gm_mapping_src)) {
+			if (gm_mapping_cpu(gm_mapping_dest)) {
+				dev = gm_mapping_src->dev;
+				gmc.dest = phys_to_dma(dev->dma_dev, page_to_phys(gm_mapping_dest->page) +
+									(dest & (page_size - 1)));
+				gmc.src = gm_mapping_src->gm_page->dev_dma_addr + (src & (page_size - 1));
+				gmc.kind = GM_MEMCPY_D2H;
+			} else if (gm_mapping_device(gm_mapping_dest)) {
+				dev = gm_mapping_src->dev;
+				gmc.dest = phys_to_dma(dev->dma_dev, page_to_phys(trans_hpage) +
+									((unsigned long)trans_addr & (page_size - 1)));
+				gmc.src = gm_mapping_src->gm_page->dev_dma_addr + (src & (page_size - 1));
+				gmc.kind = GM_MEMCPY_D2H;
+				gmc.mm = mm;
+				gmc.dev = dev;
+				gmc.size = size;
+				dev->mmu->peer_hmemcpy(&gmc);
+
+				dev = gm_mapping_dest->dev;
+				gmc.dest = gm_mapping_dest->gm_page->dev_dma_addr + (dest & (page_size - 1));
+				gmc.src = phys_to_dma(dev->dma_dev, page_to_phys(trans_hpage) +
+								((unsigned long)trans_addr& (page_size - 1)));
+				gmc.kind = GM_MEMCPY_H2D;
+				gmc.mm = mm;
+				gmc.dev = dev;
+				gmc.size = size;
+				dev->mmu->peer_hmemcpy(&gmc);
+
+				goto unlock_gm_mapping;
+			} else {
+				gmem_err("gm_mapping_dest bad status, src is on device!");
+				goto unlock_gm_mapping;
+			}
+		} else {
+			gmem_err("gm_mapping_src bad status, dir is COPY_GMEM_TO_GMEM");
+			goto unlock_gm_mapping;
+		}
+	}
+	gmc.mm = mm;
+	gmc.dev = dev;
+	gmc.size = size;
+	dev->mmu->peer_hmemcpy(&gmc);
+
+copy_to_norm_dest:
+	if (dir == COPY_GMEM_TO_NORM) {
+		if (copy_to_user((void __user *)dest, trans_addr, size) > 0)
+			gmem_err("copy trans to normal dest %lx failed!", dest);
+	}
+
+unlock_gm_mmaping:
+	if (gm_mapping_src)
+		mutex_unlock(&gm_mapping_src->lock);
+	if (gm_mapping_dest && gm_mapping_dest != gm_mapping_src)
+		mutex_unlock(&gm_mapping_dest->lock);
+free_trans_page:
+	__free_pages(trans_hpage, HPAGE_PMD_ORDER);
+unlock_mm:
+	mmap_read_unlock(mm);
+}
+
+/*
+ * Each page needs to be copied in three parts when the address is not aligned.
+ * |      ml <--0-->|<1><--2->       |
+ * |         -------|---------       |
+ * |        /      /|  /     /       |
+ * |       /      / | /     /        |
+ * |      /      /  |/     /         |
+ * |      ----------|------          |
+ * |                |                |
+ * |<----page x---->|<----page y---->|
+ */
+
+static void __hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size)
+{
+	int i = 0;
+	// offsets within the huge page for the source and destination addresses
+	int src_offset = src & (HPAGE_SIZE - 1);
+	int dst_offset = dest & (HPAGE_SIZE - 1);
+	// Divide each page into three parts according to the align
+	int ml[3] = {
+		HPAGE_SIZE - (src_offset < dst_offset ? dst_offset : src_offset),
+		src_offset < dst_offset ? (dst_offset - src_offset) : (src_offset - dst_offset),
+		src_offset < dst_offset ? src_offset : dst_offset
+	};
+	struct mm_struct *mm = current->mm;
+
+	if (size == 0)
+		return;
+
+	while (size >= ml[i]) {
+		if (ml[i] > 0) {
+			do_hmemcpy(mm, hnid, dest, src, ml[i]);
+			src += ml[i];
+			dest += ml[i];
+			size -= ml[i];
+		}
+		i = (i + 1) % 3;
+	}
+
+	if (size > 0)
+		do_hmemcpy(mm, hnid, dest, src, size);
+}
+
+int hmemcpy(int hnid, unsigned long dest, unsigned long src, size_t size)
+{
+	struct vm_area_struct *vma_dest, *vma_src;
+	struct mm_struct *mm = current->mm;
+
+	if (hnid < 0) {
+		if (hnid != -1) {
+			gmem_err("hmemcpy: invalid hnid %d < 0\n", hnid);
+			return -EINVAL;
+		}
+	} else if (!is_hnode(hnid)) {
+		gmem_err("hmemcpy: can't find hnode by hnid:%d or hnode is not allowed\n", hnid);
+		return -EINVAL;
+	}
+
+	mmap_read_lock(mm);
+	vma_dest = find_vma(mm, dest);
+	vma_src = find_vma(mm, src);
+
+	if ((ULONG_MAX - size < src) || !vma_src || vma_src->vm_start > src ||
+		vma_src->vm_end < (src + size)) {
+		gmem_err("failed to find peer_shared vma by invalid src or size\n");
+		goto unlock;
+	}
+
+	if ((ULONG_MAX - size < dest) || !vma_dest || vma_dest->vm_start > dest ||
+		vma_dest->vm_end < (dest + size)) {
+		gmem_err("failed to find peer_shared vma by invalid dest or size\n");
+		goto unlock;
+	}
+
+	if (!vma_is_peer_shared(vma_src) && !vma_is_peer_shared(vma_dest)) {
+		mmap_read_unlock(mm);
+		return -EAGAIN;
+	}
+
+	if (!(vma_dest->vm_flags & VM_WRITE)) {
+		gmem_err("dest is not writable.\n");
+		goto unlock;
+	}
+	mmap_read_unlock(mm);
+
+	__hmemcpy(hnid, dest, src, size);
+
+	return 0;
+
+unlock:
+	mmap_read_unlock(mm);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(hmemcpy);
+
diff --git a/mm/gmem_phys.c b/mm/gmem_phys.c
new file mode 100644
index 0000000000000000000000000000000000000000..10531edccfc357879de6e7df6cb8eefbe6a6a9f3
--- /dev/null
+++ b/mm/gmem_phys.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GMEM physical memory management.
+ *
+ * Copyright (C) 2025- Huawei, Inc.
+ * Author: Bin Wang
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+
+#include <linux/gmem.h>
+#include <linux/vm_object.h>
+
+#define NUM_SWAP_PAGES		16
+#define MAX_SWAP_RETRY_TIMES	10
+
+static struct kmem_cache *gm_page_cachep;
+
+DEFINE_SPINLOCK(hnode_lock);
+struct hnode *hnodes[MAX_NUMNODES];
+
+void __init hnuma_init(void)
+{
+	unsigned int node;
+
+	spin_lock(&hnode_lock);
+	for_each_node(node)
+		node_set(node, hnode_map);
+	spin_unlock(&hnode_lock);
+}
+
+unsigned int alloc_hnode_id(void)
+{
+	unsigned int node;
+
+	node = first_unset_node(hnode_map);
+	node_set(node, hnode_map);
+
+	return node;
+}
+
+void free_hnode_id(unsigned int nid)
+{
+	node_clear(nid, hnode_map);
+}
+
+void hnode_init(struct hnode *hnode, unsigned int hnid, struct gm_dev *dev)
+{
+	hnode->id = hnid;
+	hnode->dev = dev;
+	INIT_LIST_HEAD(&hnode->freelist);
+	INIT_LIST_HEAD(&hnode->activelist);
+	spin_lock_init(&hnode->freelist_lock);
+	spin_lock_init(&hnode->activelist_lock);
+	atomic_set(&hnode->nr_free_pages, 0);
+	atomic_set(&hnode->nr_active_pages, 0);
+	hnode->import_failed = false;
+	hnode->max_memsize = 0;
+
+	node_set(hnid, dev->registered_hnodes);
+	hnodes[hnid] = hnode;
+}
+
+void hnode_deinit(unsigned int hnid, struct gm_dev *dev)
+{
+	hnodes[hnid]->id = 0;
+	hnodes[hnid]->dev = NULL;
+	node_clear(hnid, dev->registered_hnodes);
+	hnodes[hnid] = NULL;
+}
+
+struct hnode *get_hnode(unsigned int hnid)
+{
+	if (!hnodes[hnid])
+		gmem_err("h-NUMA node for hnode id %u is NULL.", hnid);
+	return hnodes[hnid];
+}
+
+struct gm_dev *get_gm_dev(unsigned int nid)
+{
+	struct hnode *hnode;
+	struct gm_dev *dev = NULL;
+
+	spin_lock(&hnode_lock);
+	hnode = get_hnode(nid);
+	if (hnode)
+		dev = hnode->dev;
+	spin_unlock(&hnode_lock);
+	return dev;
+}
+
+static void init_swapd(struct hnode *hnode);
+
+int gm_dev_register_hnode(struct gm_dev *dev)
+{
+	unsigned int hnid;
+	struct hnode *hnode = kmalloc(sizeof(struct hnode), GFP_KERNEL);
+	int ret;
+
+	if (!hnode)
+		return -ENOMEM;
+
+	spin_lock(&hnode_lock);
+	hnid = alloc_hnode_id();
+	spin_unlock(&hnode_lock);
+
+	if (hnid == MAX_NUMNODES)
+		goto free_hnode;
+
+	ret = hnode_init_sysfs(hnid);
+	if (ret)
+		goto free_hnode;
+
+	hnode_init(hnode, hnid, dev);
+	init_swapd(hnode);
+
+	return GM_RET_SUCCESS;
+
+free_hnode:
+	kfree(hnode);
+	return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(gm_dev_register_hnode);
+
+int __init gm_page_cachep_init(void)
+{
+	gm_page_cachep = KMEM_CACHE(gm_page, 0);
+	if (!gm_page_cachep)
+		return -EINVAL;
+	return 0;
+}
+
+void gm_page_cachep_destroy(void)
+{
+	kmem_cache_destroy(gm_page_cachep);
+}
+
+struct gm_page *alloc_gm_page_struct(void)
+{
+	struct gm_page *gm_page = kmem_cache_zalloc(gm_page_cachep, GFP_KERNEL);
+
+	if (!gm_page)
+		return NULL;
+	atomic_set(&gm_page->refcount, 0);
+	spin_lock_init(&gm_page->rmap_lock);
+	return gm_page;
+}
+EXPORT_SYMBOL(alloc_gm_page_struct);
+
+void hnode_freelist_add(struct hnode *hnode, struct gm_page *gm_page)
+{
+	spin_lock(&hnode->freelist_lock);
+	list_add(&gm_page->gm_page_list, &hnode->freelist);
+	spin_unlock(&hnode->freelist_lock);
+}
+
+void hnode_activelist_add(struct hnode *hnode, struct gm_page *gm_page)
+{
+	spin_lock(&hnode->activelist_lock);
+	list_add_tail(&gm_page->gm_page_list, &hnode->activelist);
+	spin_unlock(&hnode->activelist_lock);
+}
+
+void hnode_activelist_del(struct hnode *hnode, struct gm_page *gm_page)
+{
+	spin_lock(&hnode->activelist_lock);
+	/* If a gm_page is being evicted, it is currently located in the
+	 * temporary linked list. */
+	if (!gm_page_evicting(gm_page))
+		list_del_init(&gm_page->gm_page_list);
+	spin_unlock(&hnode->activelist_lock);
+}
+
+void hnode_activelist_del_and_add(struct hnode *hnode, struct gm_page *gm_page)
+{
+	spin_lock(&hnode->activelist_lock);
+	list_move_tail(&gm_page->gm_page_list, &hnode->activelist);
+	spin_unlock(&hnode->activelist_lock);
+}
+
+void mark_gm_page_active(struct gm_page *gm_page)
+{
+	struct hnode *hnode = get_hnode(gm_page->hnid);
+
+	if (!hnode)
+		return;
+
+	hnode_activelist_del_and_add(hnode, gm_page);
+}
+
+void mark_gm_page_pinned(struct gm_page *gm_page)
+{
+	struct hnode *hnode = get_hnode(gm_page->hnid);
+
+	if (!hnode)
+		return;
+
+	spin_lock(&hnode->activelist_lock);
+	if (gm_page_evicting(gm_page)) {
+		gmem_err("%s: maybe page has been evicted!", __func__);
+		goto unlock;
+	} else if (gm_page_pinned(gm_page)) {
+		goto unlock;
+	}
+	gm_page_flags_set(gm_page, GM_PAGE_PINNED);
+
+unlock:
+	spin_unlock(&hnode->activelist_lock);
+	return;
+}
+
+void mark_gm_page_unpinned(struct gm_page *gm_page)
+{
+	struct hnode *hnode = get_hnode(gm_page->hnid);
+
+	if (!hnode)
+		return;
+
+	spin_lock(&hnode->activelist_lock);
+	if (!gm_page_pinned(gm_page) || gm_page_evicting(gm_page)) {
+		goto unlock;
+	}
+	gm_page_flags_clear(gm_page, GM_PAGE_PINNED);
+
+unlock:
+	spin_unlock(&hnode->activelist_lock);
+	return;
+}
+
+int gm_add_pages(unsigned int hnid, struct list_head *pages)
+{
+	struct hnode *hnode;
+	struct gm_page *gm_page, *n;
+
+	hnode = get_hnode(hnid);
+	if (!hnode)
+		return -EINVAL;
+
+	list_for_each_entry_safe(gm_page, n, pages, gm_page_list) {
+		list_del(&gm_page->gm_page_list);
+		hnode_freelist_add(hnode, gm_page);
+		hnode_free_pages_inc(hnode);
+		gm_page_flags_clear(gm_page, GM_PAGE_PINNED);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(gm_add_pages);
+
+void gm_free_page(struct gm_page *gm_page)
+{
+	struct hnode *hnode;
+
+	hnode = get_hnode(gm_page->hnid);
+	if (!hnode)
+		return;
+	hnode_freelist_add(hnode, gm_page);
+	hnode_free_pages_inc(hnode);
+}
+
+void gm_page_add_rmap(struct gm_page *gm_page, struct mm_struct *mm, unsigned long va)
+{
+	spin_lock(&gm_page->rmap_lock);
+	gm_page->mm = mm;
+	gm_page->va = va;
+	spin_unlock(&gm_page->rmap_lock);
+}
+
+void gm_page_remove_rmap(struct gm_page *gm_page)
+{
+	spin_lock(&gm_page->rmap_lock);
+	gm_page->mm = NULL;
+	gm_page->va = 0;
+	spin_unlock(&gm_page->rmap_lock);
+}
+
+enum gm_evict_ret {
+	GM_EVICT_SUCCESS = 0,
+	GM_EVICT_UNMAP,
+	GM_EVICT_FALLBACK,
+	GM_EVICT_DEVERR,
+};
+
+enum gm_evict_ret gm_evict_page_locked(struct gm_page *gm_page)
+{
+	struct gm_dev *gm_dev;
+	struct gm_mapping *gm_mapping;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct page *page;
+	struct device *dma_dev;
+	unsigned long va;
+	struct folio *folio = NULL;
+	struct gm_fault_t gmf = {
+		.size = HPAGE_SIZE,
+		.copy = true
+	};
+	enum gm_evict_ret ret = GM_EVICT_SUCCESS;
+	int err;
+
+	gm_dev = get_gm_dev(gm_page->hnid);
+	if (!gm_dev)
+		return GM_EVICT_DEVERR;
+
+	spin_lock(&gm_page->rmap_lock);
+	if (!gm_page->mm) {
+		/* Evicting gm_page conflicts with unmap.*/
+		ret = GM_EVICT_UNMAP;
+		goto rmap_unlock;
+	}
+
+	mm = gm_page->mm;
+	va = gm_page->va;
+	vma = find_vma(mm, va);
+	if (!vma || !vma->vm_obj) {
+		gmem_err("%s: cannot find vma or vma->vm_obj is null for va %lx", __func__, va);
+		ret = GM_EVICT_UNMAP;
+		goto rmap_unlock;
+	}
+
+	gm_mapping = vm_object_lookup(vma->vm_obj, va);
+	if (!gm_mapping) {
+		gmem_err("%s: no gm_mapping for va %lx", __func__, va);
+		ret = GM_EVICT_UNMAP;
+		goto rmap_unlock;
+	}
+
+	spin_unlock(&gm_page->rmap_lock);
+
+	mutex_lock(&gm_mapping->lock);
+	if (!gm_mapping_device(gm_mapping)) {
+		/* Evicting gm_page conflicts with unmap.*/
+		ret = GM_EVICT_UNMAP;
+		goto gm_mapping_unlock;
+	}
+
+	if (gm_mapping->gm_page != gm_page) {
+		/* gm_mapping maps to another gm_page. */
+		ret = GM_EVICT_UNMAP;
+		goto gm_mapping_unlock;
+	}
+
+	folio = vma_alloc_folio(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, va, true);
+	if (!folio) {
+		gmem_err("%s: allocate host page failed.", __func__);
+		ret = GM_EVICT_FALLBACK;
+		goto gm_mapping_unlock;
+	}
+	page = &folio->page;
+
+	gmf.mm = mm;
+	gmf.va = va;
+	gmf.dev = gm_dev;
+	gmf.pfn = gm_page->dev_pfn;
+	dma_dev = gm_dev->dma_dev;
+	gmf.dma_addr = dma_map_page(dma_dev, page, 0, HPAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dma_dev, gmf.dma_addr)) {
+		gmem_err("%s: dma map failed.", __func__);
+		ret = GM_EVICT_FALLBACK;
+		goto gm_mapping_unlock;
+	}
+
+	err = gm_dev->mmu->peer_unmap(&gmf);
+	if (err) {
+		gmem_err("%s: peer_unmap failed.", __func__);
+		ret = GM_EVICT_DEVERR;
+		goto dma_unmap;
+	}
+
+	gm_mapping_flags_set(gm_mapping, GM_MAPPING_CPU);
+	gm_page_remove_rmap(gm_page);
+	gm_mapping->page = page;
+	put_gm_page(gm_page);
+dma_unmap:
+	dma_unmap_page(dma_dev, gmf.dma_addr, HPAGE_SIZE, DMA_BIDIRECTIONAL);
+gm_mapping_unlock:
+	mutex_unlock(&gm_mapping->lock);
+	return ret;
+rmap_unlock:
+	spin_unlock(&gm_page->rmap_lock);
+	return ret;
+}
+
+enum gm_evict_ret gm_evict_page(struct gm_page *gm_page)
+{
+	struct mm_struct *mm = gm_page->mm;
+	enum gm_evict_ret ret;
+
+	mmap_read_lock(mm);
+	ret = gm_evict_page_locked(gm_page);
+	mmap_read_unlock(mm);
+	return ret;
+}
+
+static void gm_do_swap(struct hnode *hnode)
+{
+	struct list_head swap_list;
+	struct gm_page *gm_page, *n;
+	unsigned int nr_swap_pages = 0;
+	int ret;
+
+	INIT_LIST_HEAD(&swap_list);
+
+	spin_lock(&hnode->activelist_lock);
+	list_for_each_entry_safe(gm_page, n, &hnode->activelist, gm_page_list) {
+		if (gm_page_pinned(gm_page)) {
+			gmem_err("%s: va %lx is pinned!", __func__, gm_page->va);
+			continue;
+		}
+		/* Move gm_page to temporary list. */
+		get_gm_page(gm_page);
+		gm_page_flags_set(gm_page, GM_PAGE_EVICTING);
+		list_move(&gm_page->gm_page_list, &swap_list);
+		nr_swap_pages++;
+		if (nr_swap_pages >= NUM_SWAP_PAGES)
+			break;
+	}
+	spin_unlock(&hnode->activelist_lock);
+
+	list_for_each_entry_safe(gm_page, n, &swap_list, gm_page_list) {
+		list_del_init(&gm_page->gm_page_list);
+		ret = gm_evict_page_locked(gm_page);
+		gm_page_flags_clear(gm_page, GM_PAGE_EVICTING);
+		if (ret == GM_EVICT_UNMAP) {
+			/* Evicting gm_page conflicts with unmap.*/
+			put_gm_page(gm_page);
+		} else if (ret == GM_EVICT_FALLBACK) {
+			/* An error occurred with the host, and gm_page needs
+			 * to be added back to the activelist. */
+			hnode_activelist_add(hnode, gm_page);
+			put_gm_page(gm_page);
+		} else if (ret == GM_EVICT_DEVERR) {
+			/* It generally occurs when the process has already
+			 * exited, at which point gm_page needs to be returned
+			 * to the freelist. */
+			put_gm_page(gm_page);
+		} else {
+			hnode_active_pages_dec(hnode);
+			put_gm_page(gm_page);
+		}
+	}
+};
+
+static inline bool need_wake_up_swapd(struct hnode *hnode)
+{
+	return false;
+}
+
+static int swapd_func(void *data)
+{
+	struct hnode *hnode = (struct hnode *)data;
+
+	while (!kthread_should_stop()) {
+		if (!need_wake_up_swapd(hnode)) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		}
+
+		gm_do_swap(hnode);
+	}
+
+	return 0;
+};
+
+static void init_swapd(struct hnode *hnode)
+{
+	hnode->swapd_task = kthread_run(swapd_func, NULL, "gm_swapd/%u", hnode->id);
+	if (IS_ERR(hnode->swapd_task)) {
+		gmem_err("%s: create swapd task failed", __func__);
+		hnode->swapd_task = NULL;
+	}
+}
+
+static void wake_up_swapd(struct hnode *hnode)
+{
+	if (likely(hnode->swapd_task))
+		wake_up_process(hnode->swapd_task);
+}
+
+static bool can_import(struct hnode *hnode)
+{
+	unsigned long nr_pages;
+	unsigned long used_mem;
+
+	nr_pages = atomic_read(&hnode->nr_free_pages) + atomic_read(&hnode->nr_active_pages);
+	used_mem = nr_pages * HPAGE_SIZE;
+
+	/* GMEM usable memory is unlimited if max_memsize is zero. */
+	if (!hnode->max_memsize)
+		return true;
+	return used_mem < hnode->max_memsize;
+}
+
+static struct gm_page *get_gm_page_from_freelist(struct hnode *hnode)
+{
+	struct gm_page *gm_page;
+
+	spin_lock(&hnode->freelist_lock);
+	gm_page = list_first_entry_or_null(&hnode->freelist, struct gm_page, gm_page_list);
+	/* Delete from freelist. */
+	if (gm_page) {
+		if (gm_page_pinned(gm_page)) {
+			gmem_err("%s: gm_page %lx from freelist has pinned flag, clear it!", __func__, (unsigned long)gm_page);
+			gm_page_flags_clear(gm_page, GM_PAGE_PINNED);
+		}
+		list_del_init(&gm_page->gm_page_list);
+		hnode_free_pages_dec(hnode);
+		get_gm_page(gm_page);
+		/* TODO: wakeup swapd if needed. */
+		if (need_wake_up_swapd(hnode))
+			wake_up_swapd(hnode);
+	}
+	spin_unlock(&hnode->freelist_lock);
+
+	return gm_page;
+}
+
+/*
+ * gm_alloc_page - Allocate a gm_page.
+ *
+ * Allocate a gm_page from hnode freelist. If failed to allocate gm_page, try
+ * to import memory from device. And if failed to import memory, try to swap
+ * several gm_pages to host and allocate gm_page again.
+ */
+struct gm_page *gm_alloc_page(struct mm_struct *mm, struct hnode *hnode)
+{
+	struct gm_page *gm_page;
+	struct gm_dev *gm_dev;
+	int retry_times = 0;
+	int ret = 0;
+
+	if (hnode->dev)
+		gm_dev = hnode->dev;
+	else
+		return NULL;
+
+retry:
+	gm_page = get_gm_page_from_freelist(hnode);
+	if (!gm_page && can_import(hnode) && !hnode->import_failed) {
+		/* Import pages from device. */
+		ret = gm_dev->mmu->import_phys_mem(mm, hnode->id, NUM_IMPORT_PAGES);
+		if (!ret)
+			goto retry;
+		hnode->import_failed = true;
+	}
+
+	/* Try to swap pages. */
+	if (!gm_page) {
+		if (retry_times > MAX_SWAP_RETRY_TIMES)
+			return NULL;
+		gm_do_swap(hnode);
+		retry_times++;
+		goto retry;
+	}
+
+	return gm_page;
+}
+
diff --git a/mm/gmem_stat.c b/mm/gmem_stat.c
new file mode 100644
index 0000000000000000000000000000000000000000..dbf4de2151cb4c5bb6827590490fce6f4b87f59d
--- /dev/null
+++ b/mm/gmem_stat.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GMEM statistics.
+ *
+ * Copyright (C) 2025- Huawei, Inc.
+ * Author: Bin Wang
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kobject.h>
+#include <linux/gmem.h>
+
+static struct kobject *gm_kobj;
+
+struct hnode_kobject {
+	struct kobject kobj;
+	unsigned int hnid;
+};
+
+#define HNODE_NAME_LEN	32
+
+static struct hnode *get_hnode_kobj(struct kobject *kobj)
+{
+	struct hnode *hnode;
+	struct hnode_kobject *hnode_kobj;
+
+	hnode_kobj = container_of(kobj, struct hnode_kobject, kobj);
+	hnode = get_hnode(hnode_kobj->hnid);
+	if (!hnode) {
+		gmem_err("%s: failed to get hnode from kobject", __func__);
+		return NULL;
+	}
+
+	return hnode;
+}
+
+
+static ssize_t max_memsize_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct hnode *hnode = get_hnode_kobj(kobj);
+
+	if (!hnode)
+		return -EINVAL;
+
+	return sprintf(buf, "%lu\n", hnode->max_memsize);
+}
+
+static ssize_t max_memsize_store(struct kobject *kobj,
+				 struct kobj_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct hnode *hnode = get_hnode_kobj(kobj);
+
+	if (!hnode)
+		return -EINVAL;
+
+	hnode->max_memsize = memparse(buf, NULL) & (~(HPAGE_SIZE - 1));
+	return count;
+}
+
+static struct kobj_attribute max_memsize_attr =
+	__ATTR(max_memsize, 0640, max_memsize_show, max_memsize_store);
+
+static ssize_t nr_freepages_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	struct hnode *hnode = get_hnode_kobj(kobj);
+
+	if (!hnode)
+		return -EINVAL;
+
+	return sprintf(buf, "%u\n", atomic_read(&hnode->nr_free_pages));
+}
+
+static struct kobj_attribute nr_freepages_attr =
+	__ATTR(nr_freepages, 0440, nr_freepages_show, NULL);
+
+static ssize_t nr_activepages_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	struct hnode *hnode = get_hnode_kobj(kobj);
+
+	if (!hnode)
+		return -EINVAL;
+
+	return sprintf(buf, "%u\n", atomic_read(&hnode->nr_active_pages));
+}
+
+static struct kobj_attribute nr_activepages_attr =
+	__ATTR(nr_activepages, 0444, nr_activepages_show, NULL);
+
+static ssize_t nr_freelist_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	unsigned int nr_freelist = 0;
+	struct gm_page *gm_page;
+	struct hnode *hnode = get_hnode_kobj(kobj);
+	if (!hnode)
+		return -EINVAL;
+
+	spin_lock(&hnode->freelist_lock);
+	list_for_each_entry(gm_page, &hnode->freelist, gm_page_list) {
+		nr_freelist++;
+	}
+	spin_unlock(&hnode->freelist_lock);
+	return sprintf(buf, "%u\n", nr_freelist);
+}
+
+static struct kobj_attribute nr_freelist_attr =
+        __ATTR(nr_freelist, 0440, nr_freelist_show, NULL);
+
+static ssize_t nr_activelist_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	unsigned int nr_activelist = 0;
+	struct gm_page *gm_page;
+	struct hnode *hnode = get_hnode_kobj(kobj);
+	if (!hnode)
+		return -EINVAL;
+
+	spin_lock(&hnode->activelist_lock);
+	list_for_each_entry(gm_page, &hnode->activelist, gm_page_list) {
+		nr_activelist++;
+	}
+	spin_unlock(&hnode->activelist_lock);
+	return sprintf(buf, "%u\n", nr_activelist);
+}
+
+static struct kobj_attribute nr_activelist_attr =
+        __ATTR(nr_activelist, 0440, nr_activelist_show, NULL);
+
+static struct attribute *hnode_attrs[] = {
+	&max_memsize_attr.attr,
+	&nr_freepages_attr.attr,
+	&nr_activepages_attr.attr,
+	&nr_freelist_attr.attr,
+	&nr_activelist_attr.attr,
+	NULL,
+};
+
+static struct attribute_group hnode_attr_group = {
+	.attrs = hnode_attrs,
+};
+
+static void hnode_kobj_release(struct kobject *kobj)
+{
+	struct hnode_kobject *hnode_kobj =
+		container_of(kobj, struct hnode_kobject, kobj);
+	kfree(hnode_kobj);
+}
+
+static const struct kobj_type hnode_kobj_ktype = {
+	.release = hnode_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+int hnode_init_sysfs(unsigned int hnid)
+{
+	int ret;
+	struct hnode_kobject *hnode_kobj;
+
+	hnode_kobj = kzalloc(sizeof(struct hnode_kobject), GFP_KERNEL);
+	if (!hnode_kobj)
+		return -ENOMEM;
+
+	ret = kobject_init_and_add(&hnode_kobj->kobj, &hnode_kobj_ktype,
+				   gm_kobj, "hnode%u", hnid);
+	if (ret) {
+		gmem_err("%s: failed to init hnode object", __func__);
+		goto free_hnode_kobj;
+	}
+
+	ret = sysfs_create_group(&hnode_kobj->kobj, &hnode_attr_group);
+	if (ret) {
+		gmem_err("%s: failed to register hnode group", __func__);
+		goto delete_hnode_kobj;
+	}
+
+	hnode_kobj->hnid = hnid;
+	return 0;
+
+delete_hnode_kobj:
+	kobject_put(&hnode_kobj->kobj);
+free_hnode_kobj:
+	kfree(hnode_kobj);
+	return ret;
+}
+EXPORT_SYMBOL(hnode_init_sysfs);
+
+int __init gm_init_sysfs(void)
+{
+	gm_kobj = kobject_create_and_add("gmem", mm_kobj);
+	if (!gm_kobj) {
+		gmem_err("%s: failed to create gmem object", __func__);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+EXPORT_SYMBOL(gm_init_sysfs);
+
+void gm_deinit_sysfs(void)
+{
+	kobject_put(gm_kobj);
+}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a28dda79997820f95a241fed211ce489cd80dc19..59f546540fd0260a0ab714298e3fcf1079ccf130 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -38,6 +38,10 @@
 #include <linux/page_owner.h>
 #include <linux/sched/sysctl.h>
 #include <linux/memory-tiers.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/compat.h>
 
 #include <asm/tlb.h>
@@ -1337,6 +1341,46 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
+static struct folio *vma_alloc_peer_shared_folio_pmd(struct vm_area_struct *vma,
+		unsigned long haddr, gm_mapping_t *gm_mapping)
+{
+	struct folio *folio;
+	gfp_t gfp = GFP_TRANSHUGE;
+
+	folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
+	if (unlikely(!folio)) {
+		count_vm_event(THP_FAULT_FALLBACK);
+		count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK);
+		return NULL;
+	}
+
+	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+	if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
+		folio_put(folio);
+		count_vm_event(THP_FAULT_FALLBACK);
+		count_vm_event(THP_FAULT_FALLBACK_CHARGE);
+		count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK);
+		count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
+		return NULL;
+	}
+	folio_throttle_swaprate(folio, gfp);
+
+	/*
+	 * gmem device overcommit needs to reload the swapped page,
+	 * so skip it to avoid clearing device data.
+	 */
+	if (!gm_mapping_cpu(gm_mapping))
+		clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+
+	/*
+	 * The memory barrier inside __folio_mark_uptodate makes sure that
+	 * clear_huge_page writes become visible before the set_pmd_at()
+	 * write.
+	 */
+	__folio_mark_uptodate(folio);
+	return folio;
+}
+
 static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
 		unsigned long addr)
 {
@@ -1344,6 +1388,12 @@ static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
 	const int order = HPAGE_PMD_ORDER;
 	struct folio *folio;
 
+#ifdef CONFIG_GMEM
+	/* always try to compact hugepage for peer shared vma */
+	if (vma_is_peer_shared(vma))
+		gfp = GFP_TRANSHUGE;
+#endif
+
 	folio = vma_alloc_folio(gfp, order, vma, addr & HPAGE_PMD_MASK, true);
 
 	if (unlikely(!folio)) {
@@ -1391,6 +1441,101 @@ static void map_anon_folio_pmd(struct folio *folio, pmd_t *pmd,
 	count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
 }
 
+struct gm_mapping *vma_prepare_gm_mapping(struct vm_area_struct *vma, unsigned long haddr)
+{
+	struct gm_mapping *gm_mapping;
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	gm_mapping = vm_object_lookup(vma->vm_obj, haddr);
+	if (!gm_mapping) {
+		vm_object_mapping_create(vma->vm_obj, haddr);
+		gm_mapping = vm_object_lookup(vma->vm_obj, haddr);
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+
+	return gm_mapping;
+}
+
+static vm_fault_t __do_peer_shared_anonymous_page(struct vm_fault *vmf)
+{
+	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+	struct vm_area_struct *vma = vmf->vma;
+	struct folio *folio = NULL;
+	bool is_new_folio = false;
+	pgtable_t pgtable = NULL;
+	struct gm_mapping *gm_mapping;
+	vm_fault_t ret = 0;
+
+	gm_mapping = vma_prepare_gm_mapping(vma, haddr);
+	if (!gm_mapping)
+		return VM_FAULT_OOM;
+
+	mutex_lock(&gm_mapping->lock);
+
+	if (gm_mapping_device(gm_mapping) && gm_page_pinned(gm_mapping->gm_page)) {
+		pr_err("page is pinned! addr is %lx\n", gm_mapping->gm_page->va);
+		ret = VM_FAULT_SIGBUS;
+		goto release;
+	}
+
+	if (gm_mapping_cpu(gm_mapping))
+		folio = page_folio(gm_mapping->page);
+	if (!folio) {
+		folio = vma_alloc_anon_folio_pmd(vma, haddr);
+		is_new_folio = true;
+	}
+
+	if (unlikely(!folio)) {
+		ret = VM_FAULT_FALLBACK;
+		goto release;
+	}
+
+	pgtable = pte_alloc_one(vma->vm_mm);
+	if (unlikely(!pgtable)) {
+		ret = VM_FAULT_OOM;
+		goto release;
+	}
+
+	/**
+	 * if page is mapped in device, release device mapping and
+	 * deliver the page content to host.
+	 */
+	if (gm_mapping_device(gm_mapping)) {
+		vmf->page = &folio->page;
+		ret = gm_host_fault_locked(vmf, PMD_ORDER);
+		if (ret)
+			goto release;
+	}
+
+	/* map page in pgtable */
+	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+
+	BUG_ON(!pmd_none(*vmf->pmd));
+	ret = check_stable_address_space(vma->vm_mm);
+	if (ret)
+		goto unlock_release;
+	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+	map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
+	mm_inc_nr_ptes(vma->vm_mm);
+	spin_unlock(vmf->ptl);
+
+	/* finally setup cpu mapping */
+	gm_mapping_flags_set(gm_mapping, GM_MAPPING_CPU);
+	gm_mapping->page = &folio->page;
+	mutex_unlock(&gm_mapping->lock);
+
+	return 0;
+unlock_release:
+	spin_unlock(vmf->ptl);
+release:
+	if (pgtable)
+		pte_free(vma->vm_mm, pgtable);
+	if (is_new_folio)
+		folio_put(folio);
+	mutex_unlock(&gm_mapping->lock);
+	return ret;
+}
+
 static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
@@ -1440,7 +1585,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		pte_free(vma->vm_mm, pgtable);
 	folio_put(folio);
 	return ret;
-
 }
 
 /*
@@ -1506,6 +1650,9 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		return ret;
 	khugepaged_enter_vma(vma, vma->vm_flags);
 
+	if (vma_is_peer_shared(vma))
+		return __do_peer_shared_anonymous_page(vmf);
+
 	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
 			!mm_forbids_zeropage(vma->vm_mm) &&
 			transparent_hugepage_use_zero_page()) {
@@ -1545,7 +1692,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		}
 		return ret;
 	}
-
 	return __do_huge_pmd_anonymous_page(vmf);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 4bb3acfc3dd9c885956ec02bac7545b5f5e9947f..8891831579e45b9cc8e32282a196305c744b1b79 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -77,6 +77,10 @@
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/userswap.h>
 #include <linux/dynamic_pool.h>
 
@@ -1710,6 +1714,50 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	return addr;
 }
 
+#ifdef CONFIG_GMEM
+static inline void zap_logic_pmd_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end)
+{
+	struct gm_mapping *gm_mapping = NULL;
+	struct page *page = NULL;
+
+	if (!vma->vm_obj)
+		return;
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	gm_mapping = vm_object_lookup(vma->vm_obj, addr);
+
+	if (gm_mapping && gm_mapping_cpu(gm_mapping)) {
+		page = gm_mapping->page;
+		if (page && (page_ref_count(page) != 0)) {
+			put_page(page);
+			gm_mapping->page = NULL;
+		}
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+}
+
+static inline void zap_logic_pud_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end)
+{
+	unsigned long next;
+
+	do {
+		next = pmd_addr_end(addr, end);
+		zap_logic_pmd_range(vma, addr, next);
+	} while (addr = next, addr != end);
+}
+#else
+static inline void zap_logic_pmd_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end) {}
+static inline void zap_logic_pud_range(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long end) {}
+#endif
+
 static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
@@ -1740,6 +1788,19 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			 */
 			spin_unlock(ptl);
 		}
+#ifdef CONFIG_GMEM
+		/*
+		 * Here there can be other concurrent MADV_DONTNEED or
+		 * trans huge page faults running, and if the pmd is
+		 * none or trans huge it can change under us. This is
+		 * because MADV_DONTNEED holds the mmap_lock in read
+		 * mode.
+		 */
+		if (vma_is_peer_shared(vma)) {
+			if (pmd_none_or_clear_bad(pmd) || pmd_trans_huge(*pmd))
+				zap_logic_pmd_range(vma, addr, next);
+		}
+#endif
 		if (pmd_none(*pmd)) {
 			addr = next;
 			continue;
@@ -1771,8 +1832,11 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				goto next;
 			/* fall through */
 		}
-		if (pud_none_or_clear_bad(pud))
+		if (pud_none_or_clear_bad(pud)) {
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
 			continue;
+		}
 		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
 next:
 		cond_resched();
@@ -1792,8 +1856,11 @@ static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);
-		if (p4d_none_or_clear_bad(p4d))
+		if (p4d_none_or_clear_bad(p4d)) {
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
 			continue;
+		}
 		next = zap_pud_range(tlb, vma, p4d, addr, next, details);
 	} while (p4d++, addr = next, addr != end);
 
@@ -1813,8 +1880,13 @@ void unmap_page_range(struct mmu_gather *tlb,
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
+		if (pgd_none_or_clear_bad(pgd)) {
+#ifdef CONFIG_GMEM
+			if (vma_is_peer_shared(vma))
+				zap_logic_pud_range(vma, addr, next);
+#endif
 			continue;
+		}
 		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
@@ -1865,6 +1937,77 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 	}
 }
 
+static void unmap_single_peer_shared_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+					 unsigned long start_addr, unsigned long end_addr)
+{
+	unsigned long start, end, addr;
+	struct vm_object *obj = vma->vm_obj;
+	enum gm_ret ret;
+	struct gm_mapping *gm_mapping;
+	struct hnode *hnode;
+	struct gm_fault_t gmf = {
+		.mm = mm,
+		.copy = false,
+	};
+
+	start = max(vma->vm_start, start_addr);
+	if (start >= vma->vm_end)
+		return;
+	addr = start;
+	end = min(vma->vm_end, end_addr);
+	if (end <= vma->vm_start)
+		return;
+
+	if (!obj)
+		return;
+
+	if (!mm->gm_as)
+		return;
+
+	do {
+		xa_lock(obj->logical_page_table);
+		gm_mapping = vm_object_lookup(obj, addr);
+		if (!gm_mapping) {
+			xa_unlock(obj->logical_page_table);
+			continue;
+		}
+		xa_unlock(obj->logical_page_table);
+
+		mutex_lock(&gm_mapping->lock);
+		if (!gm_mapping_device(gm_mapping)) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+
+		/* In fact, during the exit_mmap process of the host, we do not
+		 * need to call peer_unmap to release the memory within the NPU
+		 * card, as the NPU card has an independent process that will
+		 * handle the unmap operation. */
+		//gmf.va = addr;
+		//gmf.size = HPAGE_SIZE;
+		//gmf.pfn = gm_mapping->gm_page->dev_pfn;
+		//gmf.dev = gm_mapping->dev;
+		//ret = gm_mapping->dev->mmu->peer_unmap(&gmf);
+		//if (ret != GM_RET_SUCCESS)
+		//	gmem_err("%s: call dev peer_unmap error %d", __func__, ret);
+
+		/*
+		 * Regardless of whether the gm_page is unmapped, we should release it.
+		 */
+		hnode = get_hnode(gm_mapping->gm_page->hnid);
+		if (!hnode) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+		gm_page_remove_rmap(gm_mapping->gm_page);
+		hnode_activelist_del(hnode, gm_mapping->gm_page);
+		hnode_active_pages_dec(hnode);
+		put_gm_page(gm_mapping->gm_page);
+		gm_mapping->gm_page = NULL;
+		mutex_unlock(&gm_mapping->lock);
+	} while (addr += HPAGE_SIZE, addr != end);
+}
+
 /**
  * unmap_vmas - unmap a range of memory covered by a list of vma's
  * @tlb: address of the caller's struct mmu_gather
@@ -1908,6 +2051,9 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
 		unmap_single_vma(tlb, vma, start, end, &details,
 				 mm_wr_locked);
 		hugetlb_zap_end(vma, &details);
+#ifdef CONFIG_GMEM
+		unmap_single_peer_shared_vma(vma->vm_mm, vma, start, end);
+#endif
 		vma = mas_find(mas, tree_end - 1);
 	} while (vma && likely(!xa_is_zero(vma)));
 	mmu_notifier_invalidate_range_end(&range);
@@ -5802,7 +5948,9 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	pgd_t *pgd;
 	p4d_t *p4d;
 	vm_fault_t ret;
-
+#ifdef CONFIG_GMEM
+	char *thp_enable_path = "/sys/kernel/mm/transparent_hugepage/enabled";
+#endif
 	pgd = pgd_offset(mm, address);
 	p4d = p4d_alloc(mm, pgd, address);
 	if (!p4d)
@@ -5855,9 +6003,21 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		ret = create_huge_pmd(&vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
+		if (vma_is_peer_shared(vma))
+			return VM_FAULT_OOM;
 	} else {
 		vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
 
+#ifdef CONFIG_GMEM
+		if (vma_is_peer_shared(vma) && pmd_none(*vmf.pmd) &&
+			(thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))) {
+			/* if transparent hugepage is not enabled, return pagefault failed */
+			gmem_err("transparent hugepage is not enabled. check %s\n",
+					thp_enable_path);
+			return VM_FAULT_SIGBUS;
+		}
+#endif
+
 		if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
 			VM_BUG_ON(thp_migration_supported() &&
 					  !is_pmd_migration_entry(vmf.orig_pmd));
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a82aab7ab47a5ea444b2cc01c77520dea817b683..bb35f9fafcf61083b4b040d7f98ad9787175c98f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1902,8 +1902,13 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
 
 bool vma_migratable(struct vm_area_struct *vma)
 {
+#ifdef CONFIG_GMEM
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_PEER_SHARED))
+		return false;
+#else
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		return false;
+#endif
 
 	/*
 	 * DAX device mappings require predictable access latency, so avoid
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 6677aaa5972d4e97fe5604d64d73dab3903fe7c6..1a3d3b6e52c9c20d73f7b557663b67eb86d71960 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -30,6 +30,9 @@
 #include "internal.h"
 #include "slab.h"
 #include "shuffle.h"
+#ifdef CONFIG_GMEM
+#include <linux/gmem.h>
+#endif
 
 #include <asm/setup.h>
 
@@ -2797,6 +2800,9 @@ static void __init mem_init_print_info(void)
  */
 void __init mm_core_init(void)
 {
+#ifdef CONFIG_GMEM
+	hnuma_init();
+#endif
 	/* Initializations relying on SMP setup */
 	build_all_zonelists(NULL);
 	page_alloc_init_cpuhp();
diff --git a/mm/mmap.c b/mm/mmap.c
index fb54df419ea2c360f1e0c23f921f54627a08c5b5..2e777ad31323629354257169f8f11557fb1695d1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -47,6 +47,10 @@
 #include <linux/oom.h>
 #include <linux/sched/mm.h>
 #include <linux/ksm.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
+
 #include <linux/share_pool.h>
 
 #include <linux/uaccess.h>
@@ -645,7 +649,9 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
 	 */
 	if (src->anon_vma && !dst->anon_vma) {
 		int ret;
-
+#ifdef CONFIG_GMEM
+		dup_vm_object(dst, src, true);
+#endif
 		vma_assert_write_locked(dst);
 		dst->anon_vma = src->anon_vma;
 		ret = anon_vma_clone(dst, src);
@@ -701,6 +707,13 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	/* Only handles expanding */
 	VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);
 
+	if (vma_is_peer_shared(vma)) {
+		if (!remove_next)
+			vm_object_adjust(vma, start, end);
+		else
+			vm_object_merge(vma, next->vm_end);
+	}
+
 	/* Note: vma iterator must be pointing to 'start' */
 	vma_iter_config(vmi, start, end);
 	if (vma_iter_prealloc(vmi, vma))
@@ -752,6 +765,9 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	vma_prepare(&vp);
 	vma_adjust_trans_huge(vma, start, end, 0);
 
+	if (vma_is_peer_shared(vma))
+		vm_object_adjust(vma, start, end);
+
 	vma_iter_clear(vmi);
 	vma->vm_start = start;
 	vma->vm_end = end;
@@ -1002,6 +1018,9 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			if (!next->anon_vma)
 				err = dup_anon_vma(prev, curr, &anon_dup);
 		}
+		if (vma_is_peer_shared(prev)) {
+			vm_object_merge(prev, next->vm_end);
+		}
 	} else if (merge_prev) {			/* case 2 */
 		if (curr) {
 			vma_start_write(curr);
@@ -1020,6 +1039,9 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			}
 			if (!err)
 				err = dup_anon_vma(prev, curr, &anon_dup);
+			if (vma_is_peer_shared(prev)) {
+				vm_object_merge(prev, end);
+			}
 		}
 	} else { /* merge_next */
 		vma_start_write(next);
@@ -1030,6 +1052,9 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			adjust = next;
 			adj_start = -(prev->vm_end - addr);
 			err = dup_anon_vma(next, prev, &anon_dup);
+			if (vma_is_peer_shared(prev)) {
+				vm_object_merge(prev, addr);
+			}
 		} else {
 			/*
 			 * Note that cases 3 and 8 are the ONLY ones where prev
@@ -1045,6 +1070,8 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 				remove = curr;
 				err = dup_anon_vma(next, curr, &anon_dup);
 			}
+			if (vma_is_peer_shared(curr))
+				vm_object_merge(vma, next->vm_end);
 		}
 	}
 
@@ -1316,11 +1343,21 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && (flags & MAP_PEER_SHARED)) {
+		len = round_up(len, PMD_SIZE);
+		addr = get_unmapped_area_aligned(file, addr, len, pgoff, flags,
+						PMD_SIZE);
+	} else {
+		addr = get_unmapped_area(file, addr, len, pgoff, flags);
+	}
+#else
 	addr = get_unmapped_area(file, addr, len, pgoff, flags);
+#endif
 	if (IS_ERR_VALUE(addr))
 		return addr;
 
-	if (flags & MAP_FIXED_NOREPLACE) {
+	if ((flags & MAP_FIXED_NOREPLACE) || (gmem_is_enabled() && (flags & MAP_PEER_SHARED))) {
 		if (find_vma_intersection(mm, addr, addr + len))
 			return -EEXIST;
 	}
@@ -1439,6 +1476,14 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 		if (file && is_file_hugepages(file))
 			vm_flags |= VM_NORESERVE;
 	}
+#ifdef CONFIG_GMEM
+	if (flags & MAP_PEER_SHARED) {
+		if (gmem_is_enabled())
+			vm_flags |= VM_PEER_SHARED;
+		else
+			return -EINVAL;
+	}
+#endif
 
 	addr = __mmap_region_ext(mm, file, addr, len, vm_flags, pgoff, uf);
 	if (!IS_ERR_VALUE(addr) &&
@@ -1447,6 +1492,7 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file, unsigned lon
 		*populate = len;
 	return addr;
 }
+EXPORT_SYMBOL(__do_mmap_mm);
 
 unsigned long do_mmap(struct file *file, unsigned long addr,
 		      unsigned long len, unsigned long prot,
@@ -1465,7 +1511,26 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 	struct file *file = NULL;
 	unsigned long retval;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && (flags & MAP_SHARED) && (flags & MAP_PEER_SHARED)) {
+		retval = -EINVAL;
+		gmem_err(" MAP_PEER_SHARED and MAP_SHARE cannot be used together.\n");
+		goto out_fput;
+	}
+	if (gmem_is_enabled() && (flags & MAP_HUGETLB) && (flags & MAP_PEER_SHARED)) {
+		retval = -EINVAL;
+		gmem_err(" MAP_PEER_SHARED and MAP_HUGETLB cannot be used together.\n");
+		goto out_fput;
+	}
+#endif
 	if (!(flags & MAP_ANONYMOUS)) {
+#ifdef CONFIG_GMEM
+		if (gmem_is_enabled() && (flags & MAP_PEER_SHARED)) {
+			retval = -EINVAL;
+			gmem_err(" MAP_PEER_SHARED cannot map file page.\n");
+			goto out_fput;
+		}
+#endif
 		audit_mmap_fd(fd, flags);
 		file = fget(fd);
 		if (!file)
@@ -1933,6 +1998,29 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 EXPORT_SYMBOL(get_unmapped_area);
 
+#ifdef CONFIG_GMEM
+unsigned long
+get_unmapped_area_aligned(struct file *file, unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags, unsigned long align)
+{
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	addr = current->mm->get_unmapped_area(file, addr, len + align, pgoff, flags);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	addr = round_up(addr, align);
+	if (addr > TASK_SIZE - len)
+		return -ENOMEM;
+	if (!IS_ALIGNED(addr, PMD_SIZE))
+		return -EINVAL;
+
+	return addr;
+}
+EXPORT_SYMBOL(get_unmapped_area_aligned);
+#endif
+
 /**
  * find_vma_intersection() - Look up the first VMA which intersects the interval
  * @mm: The process address space.
@@ -2471,7 +2559,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	err = anon_vma_clone(new, vma);
 	if (err)
 		goto out_free_mpol;
-
+#ifdef COFNIG_GMEM
+	dup_vm_object(new, vma, false);
+#endif
 	if (new->vm_file)
 		get_file(new->vm_file);
 
@@ -2486,6 +2576,9 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	vma_prepare(&vp);
 	vma_adjust_trans_huge(vma, vma->vm_start, addr, 0);
 
+	if (vma_is_peer_shared(vma))
+		vm_object_split(vma, new);
+
 	if (new_below) {
 		vma->vm_start = addr;
 		vma->vm_pgoff += (addr - new->vm_start) >> PAGE_SHIFT;
@@ -2523,6 +2616,135 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	return __split_vma(vmi, vma, addr, new_below);
 }
 
+#ifdef CONFIG_GMEM
+static void munmap_single_vma_in_peer_devices(struct mm_struct *mm, struct vm_area_struct *vma,
+					      unsigned long start_addr, unsigned long end_addr)
+{
+	unsigned long start, end, addr;
+	struct vm_object *obj = vma->vm_obj;
+	enum gm_ret ret;
+	struct gm_context *ctx, *tmp;
+	struct gm_mapping *gm_mapping;
+	struct hnode *hnode;
+	struct gm_fault_t gmf = {
+		.mm = mm,
+		.copy = false,
+	};
+
+	start = max(vma->vm_start, start_addr);
+	if (start >= vma->vm_end)
+		return;
+	addr = start;
+	end = min(vma->vm_end, end_addr);
+	if (end <= vma->vm_start)
+		return;
+
+	if (!obj)
+		return;
+
+	if (!mm->gm_as)
+		return;
+
+	do {
+		xa_lock(obj->logical_page_table);
+		gm_mapping = vm_object_lookup(obj, addr);
+		if (!gm_mapping) {
+			xa_unlock(obj->logical_page_table);
+			continue;
+		}
+		xa_unlock(obj->logical_page_table);
+
+		mutex_lock(&gm_mapping->lock);
+		if (!gm_mapping_device(gm_mapping)) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+
+		gmf.va = addr;
+		gmf.size = HPAGE_SIZE;
+		gmf.pfn = gm_mapping->gm_page->dev_pfn;
+		gmf.dev = gm_mapping->dev;
+		ret = gm_mapping->dev->mmu->peer_unmap(&gmf);
+		if (ret != GM_RET_SUCCESS)
+			gmem_err("%s: call dev peer_unmap error %d", __func__, ret);
+
+		/*
+		 * Regardless of whether the gm_page is unmapped, we should release it.
+		 */
+		hnode = get_hnode(gm_mapping->gm_page->hnid);
+		if (!hnode) {
+			mutex_unlock(&gm_mapping->lock);
+			continue;
+		}
+		gm_page_remove_rmap(gm_mapping->gm_page);
+		hnode_activelist_del(hnode, gm_mapping->gm_page);
+		hnode_active_pages_dec(hnode);
+		put_gm_page(gm_mapping->gm_page);
+		gm_mapping_flags_set(gm_mapping, GM_MAPPING_NOMAP);
+		gm_mapping->gm_page = NULL;
+		mutex_unlock(&gm_mapping->lock);
+	} while (addr += HPAGE_SIZE, addr != end);
+
+	list_for_each_entry_safe(ctx, tmp, &mm->gm_as->gm_ctx_list, gm_as_link) {
+		if (!gm_dev_is_peer(ctx->dev))
+			continue;
+		if (!ctx->dev->mmu->peer_va_free)
+			continue;
+
+		gmf.va = start;
+		gmf.size = end - start;
+		gmf.dev = ctx->dev;
+
+		ret = ctx->dev->mmu->peer_va_free(&gmf);
+		if (ret != GM_RET_SUCCESS)
+			pr_debug("gmem: free_vma failed, ret %d\n", ret);
+	}
+}
+
+static void munmap_in_peer_devices(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+
+	VMA_ITERATOR(vmi, mm, start);
+	for_each_vma_range(vmi, vma, end) {
+		if (vma_is_peer_shared(vma))
+			munmap_single_vma_in_peer_devices(mm, vma, start, end);
+	}
+}
+
+static unsigned long gmem_unmap_align(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	struct vm_area_struct *vma, *vma_end;
+
+	vma = find_vma_intersection(mm, start, start + len);
+	vma_end = find_vma(mm, start + len);
+	if (!vma || !vma_is_peer_shared(vma))
+		return 0;
+	if (vma_is_peer_shared(vma)) {
+		if (!IS_ALIGNED(start, PMD_SIZE))
+			return -EINVAL;
+	}
+
+	/* Prevents partial release of the peer_share page. */
+	if (vma_end && vma_end->vm_start < (start + len) && vma_is_peer_shared(vma_end))
+		len = round_up(len, SZ_2M);
+	return len;
+}
+
+static void gmem_unmap_region(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	unsigned long end, ret;
+
+	ret = gmem_unmap_align(mm, start, len);
+
+	if (!ret || IS_ERR_VALUE(ret))
+		return;
+
+	end = start + ret;
+	munmap_in_peer_devices(mm, start, end);
+}
+#endif
+
 /*
  * do_vmi_align_munmap() - munmap the aligned region from @start to @end.
  * @vmi: The vma iterator
@@ -2653,6 +2875,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	prev = vma_iter_prev_range(vmi);
 	next = vma_next(vmi);
+
 	if (next)
 		vma_iter_prev_range(vmi);
 
@@ -2711,6 +2934,17 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 	unsigned long end;
 	struct vm_area_struct *vma;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled()) {
+		unsigned long ret = gmem_unmap_align(mm, start, len);
+
+		if (IS_ERR_VALUE(ret))
+			return ret;
+		else if (ret)
+			len = ret;
+	}
+#endif
+
 	if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
 		return -EINVAL;
 
@@ -2745,6 +2979,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 {
 	VMA_ITERATOR(vmi, mm, start);
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled())
+		gmem_unmap_region(mm, start, len);
+#endif
 	return do_vmi_munmap(&vmi, mm, start, len, uf, false);
 }
 
@@ -2774,21 +3012,24 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
 		nr_pages = count_vma_pages_range(mm, addr, end);
 
 		if (!may_expand_vm(mm, vm_flags,
-					(len >> PAGE_SHIFT) - nr_pages))
+					(len >> PAGE_SHIFT) - nr_pages)) {
 			return -ENOMEM;
+		}
 	}
 
 	/* Unmap any existing mapping in the area */
-	if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
+	if (do_vmi_munmap(&vmi, mm, addr, len, uf, false)) {
 		return -ENOMEM;
+	}
 
 	/*
 	 * Private writable mapping: check memory availability
 	 */
 	if (accountable_mapping(file, vm_flags)) {
 		charged = len >> PAGE_SHIFT;
-		if (security_vm_enough_memory_mm(mm, charged))
+		if (security_vm_enough_memory_mm(mm, charged)) {
 			return -ENOMEM;
+		}
 		vm_flags |= VM_ACCOUNT;
 	}
 
@@ -3029,6 +3270,11 @@ static int __vm_munmap(unsigned long start, size_t len, bool unlock)
 	if (sp_check_addr(start))
 		return -EINVAL;
 
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled())
+		gmem_unmap_region(mm, start, len);
+#endif
+
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -3410,6 +3656,10 @@ void exit_mmap(struct mm_struct *mm)
 	__mt_destroy(&mm->mm_mt);
 	mmap_write_unlock(mm);
 	vm_unacct_memory(nr_accounted);
+#ifdef CONFIG_GMEM
+	if (gmem_is_enabled() && mm->gm_as)
+		gm_as_destroy(mm->gm_as);
+#endif
 }
 
 /* Insert vm structure into process list sorted by address
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e65363eb603e64c46fd742cf01087f88c73491e5..4eac8ad8a7181e0979dfb8addf56174b3ce8dd41 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -693,7 +693,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 		unsigned long prot, int pkey)
 {
 	unsigned long nstart, end, tmp, reqprot;
+#ifdef CONFIG_GMEM
+	struct vm_area_struct *vma, *prev, *vma_end;
+#else
 	struct vm_area_struct *vma, *prev;
+#endif
 	int error;
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
@@ -736,7 +740,19 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	error = -ENOMEM;
 	if (!vma)
 		goto out;
-
+#ifdef CONFIG_GMEM
+	if (vma_is_peer_shared(vma)) {
+		start = ALIGN_DOWN(start, HPAGE_SIZE);
+		vma_end = find_vma(current->mm, end);
+		if (vma_end && vma_end->vm_start < end && vma_is_peer_shared(vma_end))
+			end = ALIGN(end, HPAGE_SIZE);
+		if (end <= start) {
+			error = -ENOMEM;
+			goto out;
+		}
+		len = end - start;
+	}
+#endif
 	if (unlikely(grows & PROT_GROWSDOWN)) {
 		if (vma->vm_start >= end)
 			goto out;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c233d61d0d06df9a48b779ad600d094ddd95510a..80b29d946a0d92ff57891a903cd4f252bd0434ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -194,6 +194,9 @@ EXPORT_SYMBOL(latent_entropy);
 nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 	[N_POSSIBLE] = NODE_MASK_ALL,
 	[N_ONLINE] = { { [0] = 1UL } },
+#ifdef CONFIG_GMEM
+	[N_HETEROGENEOUS] = NODE_MASK_NONE,
+#endif
 #ifndef CONFIG_NUMA
 	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
 #ifdef CONFIG_HIGHMEM
diff --git a/mm/util.c b/mm/util.c
index 7a5eed15c98fd9ce86fc16b7c27706209a6f8627..65392c97b1e993028dc29508bf134243dba4cf85 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -27,6 +27,9 @@
 
 #include <linux/uaccess.h>
 #include <linux/oom.h>
+#ifdef CONFIG_GMEM
+#include <linux/vm_object.h>
+#endif
 
 #include "internal.h"
 #include "swap.h"
@@ -540,6 +543,114 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
 }
 EXPORT_SYMBOL_GPL(account_locked_vm);
 
+#ifdef CONFIG_GMEM
+static unsigned long alloc_va_in_peer_devices(unsigned long addr, unsigned long len,
+						unsigned long flag)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	struct gm_context *ctx, *tmp;
+	unsigned long prot = VM_NONE;
+	enum gm_ret ret;
+	char *thp_enable_path = "/sys/kernel/mm/transparent_hugepage/enabled";
+
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		gmem_err("vma for addr %lx is NULL, should not happen\n", addr);
+		return -EINVAL;
+	}
+
+	if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) {
+		gmem_err("transparent hugepage is not enabled. check %s\n",
+				thp_enable_path);
+		return -EINVAL;
+	}
+
+	prot |= vma->vm_flags;
+
+	if (!mm->gm_as) {
+		ret = gm_as_create(0, ULONG_MAX, GM_AS_ALLOC_DEFAULT, HPAGE_SIZE, &mm->gm_as);
+		if (ret) {
+			gmem_err("gm_as_create failed\n");
+			return ret;
+		}
+	}
+
+	ret = -ENODEV;
+	// TODO: consider the concurrency problem of device attaching/detaching from the gm_as.
+	list_for_each_entry_safe(ctx, tmp, &mm->gm_as->gm_ctx_list, gm_as_link) {
+		struct gm_fault_t gmf = {
+			.mm = mm,
+			.dev = ctx->dev,
+			.va = addr,
+			.size = len,
+			.prot = prot,
+		};
+
+		if (!gm_dev_is_peer(ctx->dev))
+			continue;
+
+		if (!ctx->dev->mmu->peer_va_alloc_fixed) {
+			pr_debug("gmem: mmu ops has no alloc_vma\n");
+			continue;
+		}
+
+		ret = ctx->dev->mmu->peer_va_alloc_fixed(&gmf);
+		if (ret != GM_RET_SUCCESS) {
+			gmem_err("device mmap failed\n");
+			return ret;
+		}
+	}
+
+	if (!vma->vm_obj)
+		vma->vm_obj = vm_object_create(vma);
+	if (!vma->vm_obj)
+		return -ENOMEM;
+
+	return ret;
+}
+
+struct gmem_vma_list {
+	unsigned long start;
+	size_t len;
+	struct list_head list;
+};
+
+static void gmem_reserve_vma(struct mm_struct *mm, unsigned long start,
+				size_t len, struct list_head *head)
+{
+	struct vm_area_struct *vma;
+	struct gmem_vma_list *node = kmalloc(sizeof(struct gmem_vma_list), GFP_KERNEL);
+
+	vma = find_vma(mm, start);
+	if (!vma || vma->vm_start >= start + len) {
+		kfree(node);
+		return;
+	}
+	vm_flags_set(vma, ~VM_PEER_SHARED);
+
+	node->start = start;
+	node->len = round_up(len, SZ_2M);
+	list_add_tail(&node->list, head);
+}
+
+static void gmem_release_vma(struct mm_struct *mm, struct list_head *head)
+{
+	struct gmem_vma_list *node, *next;
+
+	list_for_each_entry_safe(node, next, head, list) {
+		unsigned long start = node->start;
+		size_t len = node->len;
+
+		if (len)
+			vm_munmap(start, len);
+
+		list_del(&node->list);
+		kfree(node);
+	}
+}
+#endif
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long pgoff)
@@ -548,7 +659,11 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	unsigned long populate;
 	LIST_HEAD(uf);
-
+#ifdef CONFIG_GMEM
+	unsigned int retry_times = 0;
+	LIST_HEAD(reserve_list);
+retry:
+#endif
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
 		if (mmap_write_lock_killable(mm))
@@ -559,6 +674,27 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 		userfaultfd_unmap_complete(mm, &uf);
 		if (populate)
 			mm_populate(ret, populate);
+#ifdef CONFIG_GMEM
+		if (gmem_is_enabled() && !IS_ERR_VALUE(ret) && flag & MAP_PEER_SHARED) {
+			enum gm_ret gm_ret = 0;
+
+			gm_ret = alloc_va_in_peer_devices(ret, len, flag);
+			/*
+			 * if alloc_va_in_peer_devices failed
+			 * add vma to reserve_list and release after find a proper vma
+			 */
+			if (gm_ret == GM_RET_NOMEM && retry_times < GMEM_MMAP_RETRY_TIMES) {
+				retry_times++;
+				gmem_reserve_vma(mm, ret, len, &reserve_list);
+				goto retry;
+			} else if (gm_ret != GM_RET_SUCCESS) {
+				gmem_err("alloc vma ret %lu\n", ret);
+				gmem_reserve_vma(mm, ret, len, &reserve_list);
+				ret = -ENOMEM;
+			}
+			gmem_release_vma(mm, &reserve_list);
+		}
+#endif
 	}
 	return ret;
 }
diff --git a/mm/vm_object.c b/mm/vm_object.c
new file mode 100644
index 0000000000000000000000000000000000000000..42219e8ff42b77827c5bff56381bd1b289e09251
--- /dev/null
+++ b/mm/vm_object.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Logical Mapping Management
+ *
+ * Copyright (C) 2023- Huawei, Inc.
+ * Author: Weixi zhu, chao Liu
+ *
+ */
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/rwsem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/spinlock.h>
+#include <linux/xxhash.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/memory.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/ksm.h>
+#include <linux/hashtable.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
+#include <linux/numa.h>
+#include <linux/mempolicy.h>
+#include <linux/gmem.h>
+#include <linux/xarray.h>
+#include <linux/vm_object.h>
+
+/*
+ * Sine VM_OBJECT maintains the logical page table under each VMA, and each VMA
+ * points to a VM_OBJECT. Ultimately VM_OBJECTs must be maintained as long as VMA
+ * gets changed: merge, split, adjust
+ */
+static struct kmem_cache *vm_object_cachep;
+static struct kmem_cache *gm_mapping_cachep;
+
+/* gm_mapping will not be release dynamically */
+struct gm_mapping *alloc_gm_mapping(void)
+{
+	struct gm_mapping *gm_mapping = kmem_cache_zalloc(gm_mapping_cachep, GFP_KERNEL);
+
+	if (!gm_mapping)
+		return NULL;
+
+	gm_mapping_flags_set(gm_mapping, GM_MAPPING_NOMAP);
+	mutex_init(&gm_mapping->lock);
+
+	return gm_mapping;
+}
+EXPORT_SYMBOL(alloc_gm_mapping);
+
+static inline void release_gm_mapping(struct gm_mapping *mapping)
+{
+	kmem_cache_free(gm_mapping_cachep, mapping);
+}
+
+static inline struct gm_mapping *lookup_gm_mapping(struct vm_object *obj, unsigned long pindex)
+{
+	return xa_load(obj->logical_page_table, pindex);
+}
+
+int __init vm_object_init(void)
+{
+	vm_object_cachep = KMEM_CACHE(vm_object, 0);
+	if (!vm_object_cachep)
+		goto out;
+
+	gm_mapping_cachep = KMEM_CACHE(gm_mapping, 0);
+	if (!gm_mapping_cachep)
+		goto free_vm_object;
+
+	return 0;
+free_vm_object:
+	kmem_cache_destroy(vm_object_cachep);
+out:
+	return -ENOMEM;
+}
+
+/*
+ * Create a VM_OBJECT and attach it to a VMA
+ * This should be called when a VMA is created.
+ */
+struct vm_object *vm_object_create(struct vm_area_struct *vma)
+{
+	struct vm_object *obj = kmem_cache_alloc(vm_object_cachep, GFP_KERNEL);
+
+	if (!obj)
+		return NULL;
+
+	spin_lock_init(&obj->lock);
+	obj->vma = vma;
+
+	/*
+	 * The logical page table maps linear_page_index(obj->vma, va)
+	 * to pointers of struct gm_mapping.
+	 */
+	obj->logical_page_table = kmalloc(sizeof(struct xarray), GFP_KERNEL);
+	if (!obj->logical_page_table) {
+		kmem_cache_free(vm_object_cachep, obj);
+		return NULL;
+	}
+
+	xa_init(obj->logical_page_table);
+	atomic_set(&obj->nr_pages, 0);
+	atomic_set(&obj->ref_count, 1);
+
+	return obj;
+}
+
+/* This should be called when a VMA no longer refers to a VM_OBJECT */
+void vm_object_drop_locked(struct vm_area_struct *vma)
+{
+	struct vm_object *obj = vma->vm_obj;
+
+	if (!obj) {
+		pr_err("vm_object: vm_obj of the vma is NULL\n");
+		return;
+	}
+
+	/*
+	 * We must enter this with VMA write-locked, which is unfortunately a giant lock.
+	 * Note that Linux 6.0 has per-VMA lock:
+	 * https://lwn.net/Articles/906852/
+	 * https://lwn.net/Articles/906833/
+	 */
+	free_gm_mappings(vma);
+	mmap_assert_write_locked(vma->vm_mm);
+	vma->vm_obj = NULL;
+
+	if (atomic_dec_and_test(&obj->ref_count)) {
+		xa_destroy(obj->logical_page_table);
+		kfree(obj->logical_page_table);
+		kmem_cache_free(vm_object_cachep, obj);
+	}
+}
+
+void dup_vm_object(struct vm_area_struct *dst, struct vm_area_struct *src, bool dst_peer_shared)
+{
+	unsigned long index;
+	struct gm_mapping *mapping;
+	unsigned long moved_pages = 0;
+
+	if (dst_peer_shared) {
+		if (!vma_is_peer_shared(dst))
+			return;
+	} else {
+		if (!vma_is_peer_shared(src))
+			return;
+	}
+
+	XA_STATE(xas, src->vm_obj->logical_page_table, linear_page_index(src, src->vm_start));
+
+	xa_lock(dst->vm_obj->logical_page_table);
+	rcu_read_lock();
+	xas_for_each(&xas, mapping, linear_page_index(src, src->vm_end)) {
+		index = xas.xa_index - src->vm_pgoff + dst->vm_pgoff +
+			((src->vm_start - dst->vm_start) >> PAGE_SHIFT);
+		__xa_store(dst->vm_obj->logical_page_table, index, mapping, GFP_KERNEL);
+		moved_pages++;
+	}
+	rcu_read_unlock();
+	atomic_add(moved_pages, &dst->vm_obj->nr_pages);
+	xa_unlock(dst->vm_obj->logical_page_table);
+}
+
+void dup_peer_shared_vma(struct vm_area_struct *vma)
+{
+	if (vma_is_peer_shared(vma)) {
+		pr_debug("gmem: peer-shared vma should not be dup\n");
+		vma->vm_obj = vm_object_create(vma);
+	}
+}
+
+/** 
+ * new_vma is part of old_vma, so old_vma->vm_start <= new_vma->vm_start 
+ * and new_vma->vm_end < old_vma->vm_end 
+ */
+void vm_object_split(struct vm_area_struct *old_vma, struct vm_area_struct *new_vma)
+{
+	unsigned long index;
+	struct gm_mapping *page;
+	unsigned long transferred_pages = 0;
+	pgoff_t pgoff = linear_page_index(old_vma, new_vma->vm_start);
+
+	XA_STATE(xas, old_vma->vm_obj->logical_page_table, pgoff);
+
+	xa_lock(old_vma->vm_obj->logical_page_table);
+	xa_lock(new_vma->vm_obj->logical_page_table);
+	xas_for_each(&xas, page, linear_page_index(old_vma, new_vma->vm_end - SZ_2M)) {
+		index = xas.xa_index - old_vma->vm_pgoff + new_vma->vm_pgoff
+				- ((new_vma->vm_start - old_vma->vm_start) >> PAGE_SHIFT);
+		__xa_store(new_vma->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+		xas_store(&xas, NULL);
+		transferred_pages++;
+	}
+
+	atomic_sub(transferred_pages, &old_vma->vm_obj->nr_pages);
+	atomic_add(transferred_pages, &new_vma->vm_obj->nr_pages);
+	xa_unlock(new_vma->vm_obj->logical_page_table);
+	xa_unlock(old_vma->vm_obj->logical_page_table);
+}
+
+void vm_object_merge(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long index;
+	struct gm_mapping *page;
+	struct vm_area_struct *next, *n_next;
+	unsigned long moved_pages = 0;
+	pgoff_t pgoff;
+
+	VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start);
+	next = vma_next(&vmi);
+	next = vma_next(&vmi);
+	if (!next)
+		return;
+
+	if (addr < vma->vm_end) {
+		/* case 4: move logical mapping in [end, vma->vm_end) from vma to next */
+		pgoff = linear_page_index(vma, addr);
+		XA_STATE(xas, vma->vm_obj->logical_page_table, pgoff);
+
+		xa_lock(vma->vm_obj->logical_page_table);
+		xa_lock(next->vm_obj->logical_page_table);
+		xas_for_each(&xas, page, linear_page_index(vma, vma->vm_end - SZ_2M)) {
+			index = xas.xa_index - vma->vm_pgoff + next->vm_pgoff 
+					- ((next->vm_start - vma->vm_start) >> PAGE_SHIFT);
+			__xa_store(next->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+			xas_store(&xas, NULL);
+			moved_pages++;
+		}
+		atomic_sub(moved_pages, &vma->vm_obj->nr_pages);
+		atomic_add(moved_pages, &next->vm_obj->nr_pages);
+		xa_unlock(next->vm_obj->logical_page_table);
+		xa_unlock(vma->vm_obj->logical_page_table);
+	} else {
+		n_next = vma_next(&vmi);
+
+		if (addr == next->vm_end) {
+			/* case 1, 7, 8: copy all logical mappings from next to vma */
+			pgoff = linear_page_index(next, next->vm_start);
+			XA_STATE(xas, next->vm_obj->logical_page_table, pgoff);
+
+			xa_lock(vma->vm_obj->logical_page_table);
+			rcu_read_lock();
+			xas_for_each(&xas, page, linear_page_index(next, next->vm_end - SZ_2M)) {
+				index = xas.xa_index - next->vm_pgoff + vma->vm_pgoff 
+						+ ((next->vm_start - vma->vm_start) >> PAGE_SHIFT);
+				__xa_store(vma->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+				xas_store(&xas, NULL);
+				moved_pages++;
+			}
+			rcu_read_unlock();
+			atomic_add(moved_pages, &vma->vm_obj->nr_pages);
+			xa_unlock(vma->vm_obj->logical_page_table);
+		} else if (next->vm_start < addr && addr < next->vm_end) {
+			/* case 5: move logical mapping in [next->vm_start, end) from next to vma */
+			pgoff = linear_page_index(next, next->vm_start);
+			XA_STATE(xas, next->vm_obj->logical_page_table, pgoff);
+
+			xa_lock(vma->vm_obj->logical_page_table);
+			xa_lock(next->vm_obj->logical_page_table);
+			xas_for_each(&xas, page, linear_page_index(next, addr - SZ_2M)) {
+				index = xas.xa_index - next->vm_pgoff + vma->vm_pgoff 
+						+ ((next->vm_start - vma->vm_start) >> PAGE_SHIFT);
+				__xa_store(vma->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+				xas_store(&xas, NULL);
+				moved_pages++;
+			}
+			atomic_add(moved_pages, &vma->vm_obj->nr_pages);
+			atomic_sub(moved_pages, &next->vm_obj->nr_pages);
+			xa_unlock(next->vm_obj->logical_page_table);
+			xa_unlock(vma->vm_obj->logical_page_table);
+		} else if (n_next && addr == n_next->vm_end) {
+			/* case 6: copy all logical mappings from next and n_next to vma */
+			pgoff = linear_page_index(next, next->vm_start);
+			XA_STATE(xas_next, next->vm_obj->logical_page_table, pgoff);
+			pgoff = linear_page_index(n_next, n_next->vm_start);
+			XA_STATE(xas_n_next, n_next->vm_obj->logical_page_table, pgoff);
+
+			xa_lock(vma->vm_obj->logical_page_table);
+			rcu_read_lock();
+			
+			xas_for_each(&xas_next, page, linear_page_index(next, next->vm_end - SZ_2M)) {
+				index = xas_next.xa_index - next->vm_pgoff + vma->vm_pgoff 
+						+ ((next->vm_start - vma->vm_start) >> PAGE_SHIFT);
+				__xa_store(vma->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+				xas_store(&xas_next, NULL);
+				moved_pages++;
+			}
+
+			xas_for_each(&xas_n_next, page, linear_page_index(n_next, n_next->vm_end - SZ_2M)) {
+				index = xas_n_next.xa_index - n_next->vm_pgoff + vma->vm_pgoff 
+						+ ((n_next->vm_start - vma->vm_start) >> PAGE_SHIFT);
+				__xa_store(vma->vm_obj->logical_page_table, index, page, GFP_KERNEL);
+				xas_store(&xas_n_next, NULL);
+				moved_pages++;
+			}
+
+			rcu_read_unlock();
+			atomic_add(moved_pages, &vma->vm_obj->nr_pages);
+			xa_unlock(vma->vm_obj->logical_page_table);
+		}
+	}
+	/* case 2, 3: do nothing */
+}
+
+void vm_object_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	/* remove logical mapping in [vma->vm_start, start) and [end, vm->vm_end) */
+	unsigned long removed_pages = 0;
+	struct gm_mapping *mapping;
+	pgoff_t pgoff = linear_page_index(vma, vma->vm_start);
+
+	XA_STATE(xas, vma->vm_obj->logical_page_table, pgoff);
+
+	xas_lock(&xas);
+	if (vma->vm_start < start) {
+		xas_for_each(&xas, mapping, linear_page_index(vma, start)) {
+			xas_store(&xas, NULL);
+			removed_pages++;
+		}
+	}
+
+	if (vma->vm_end > end) {
+		xas_set(&xas, linear_page_index(vma, end));
+
+		xas_for_each(&xas, mapping, linear_page_index(vma, vma->vm_end)) {
+			xas_store(&xas, NULL);
+			removed_pages++;
+		}
+	}
+	atomic_sub(removed_pages, &vma->vm_obj->nr_pages);
+	xas_unlock(&xas);
+}
+
+/*
+ * Given a VA, the page_index is computed by
+ * page_index = linear_page_index(struct vm_area_struct *vma, unsigned long address)
+ */
+struct gm_mapping *vm_object_lookup(struct vm_object *obj, unsigned long va)
+{
+	return lookup_gm_mapping(obj, linear_page_index(obj->vma, va));
+}
+EXPORT_SYMBOL_GPL(vm_object_lookup);
+
+void vm_object_mapping_create(struct vm_object *obj, unsigned long start)
+{
+	pgoff_t index = linear_page_index(obj->vma, start);
+	struct gm_mapping *gm_mapping;
+
+	gm_mapping = alloc_gm_mapping();
+	if (!gm_mapping)
+		return;
+
+	__xa_store(obj->logical_page_table, index, gm_mapping, GFP_KERNEL);
+}
+
+void free_gm_mappings(struct vm_area_struct *vma)
+{
+	struct gm_mapping *gm_mapping;
+	pgoff_t pgoff = linear_page_index(vma, vma->vm_start);
+	XA_STATE(xas, vma->vm_obj->logical_page_table, pgoff);
+
+	xa_lock(vma->vm_obj->logical_page_table);
+	xas_for_each(&xas, gm_mapping, linear_page_index(vma, vma->vm_end - SZ_2M)) {
+		release_gm_mapping(gm_mapping);
+		xas_store(&xas, NULL);
+	}
+	xa_unlock(vma->vm_obj->logical_page_table);
+}