From b3c0ecfb4c80342218ed0fc74ae2a374024da7a5 Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Tue, 25 Aug 2020 21:17:24 +0800
Subject: [PATCH 1/6] support background workers from PG

---
 src/common/backend/port/unix_latch.cpp        |    3 +-
 src/common/backend/utils/init/globals.cpp     |    1 +
 src/common/backend/utils/init/miscinit.cpp    |   45 +-
 src/common/backend/utils/init/postinit.cpp    |   24 +-
 src/common/backend/utils/misc/guc.cpp         |   34 +
 src/gausskernel/optimizer/commands/async.cpp  |    2 -
 src/gausskernel/process/postmaster/Makefile   |    2 +-
 .../process/postmaster/bgworker.cpp           | 1309 +++++++++++++++++
 .../process/postmaster/postmaster.cpp         |  442 +++++-
 .../process/threadpool/knl_instance.cpp       |    8 +
 .../process/threadpool/knl_thread.cpp         |    6 +
 src/gausskernel/storage/ipc/ipci.cpp          |    3 +
 src/gausskernel/storage/ipc/procsignal.cpp    |    1 +
 src/gausskernel/storage/lmgr/lwlocknames.txt  |    1 +
 src/gausskernel/storage/lmgr/proc.cpp         |   38 +-
 src/include/gs_thread.h                       |    1 +
 .../knl/knl_guc/knl_instance_attr_storage.h   |    1 +
 src/include/knl/knl_instance.h                |    8 +
 src/include/knl/knl_thread.h                  |    7 +
 src/include/miscadmin.h                       |    5 +-
 src/include/postmaster/bgworker.h             |  157 ++
 src/include/postmaster/bgworker_internals.h   |   64 +
 src/include/postmaster/postmaster.h           |    1 +
 src/include/storage/pmsignal.h                |    1 +
 src/include/storage/proc.h                    |    2 +
 src/include/threadpool/threadpool_worker.h    |    1 +
 src/include/utils/postinit.h                  |    4 +-
 27 files changed, 2130 insertions(+), 41 deletions(-)
 create mode 100644 src/gausskernel/process/postmaster/bgworker.cpp
 create mode 100644 src/include/postmaster/bgworker.h
 create mode 100644 src/include/postmaster/bgworker_internals.h

diff --git a/src/common/backend/port/unix_latch.cpp b/src/common/backend/port/unix_latch.cpp
index 2a02a0daff..44fd96d5c7 100644
--- a/src/common/backend/port/unix_latch.cpp
+++ b/src/common/backend/port/unix_latch.cpp
@@ -526,8 +526,9 @@ void ResetLatch(volatile Latch* latch)
  */
 void latch_sigusr1_handler(void)
 {
-    if (waiting)
+    if (waiting) {
         sendSelfPipeByte();
+    }
 }
 
 /* Send one byte to the self-pipe, to wake up WaitLatch */
diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp
index e0818b9a76..e0601f5665 100755
--- a/src/common/backend/utils/init/globals.cpp
+++ b/src/common/backend/utils/init/globals.cpp
@@ -51,6 +51,7 @@ THR_LOCAL object_access_hook_type object_access_hook = NULL;
  * These are initialized for the bootstrap/standalone case.
  */
 THR_LOCAL bool IsUnderPostmaster = false;
+THR_LOCAL bool IsBackgroundWorker = false;
 
 volatile ThreadId PostmasterPid = 0;
 bool IsPostmasterEnvironment = false;
diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp
index 1aab5afed5..8eb2842083 100755
--- a/src/common/backend/utils/init/miscinit.cpp
+++ b/src/common/backend/utils/init/miscinit.cpp
@@ -724,11 +724,11 @@ bool has_rolvcadmin(Oid role_id)
 /*
  * Initialize user identity during normal backend startup
  */
-void InitializeSessionUserId(const char* role_name)
+void InitializeSessionUserId(const char* role_name, Oid role_id)
 {
     HeapTuple role_tup;
     Form_pg_authid rform;
-    Oid role_id;
+    char* rname = NULL;
     /* Audit user login */
     char details[PGAUDIT_MAXLENGTH];
 
@@ -744,23 +744,33 @@ void InitializeSessionUserId(const char* role_name)
         AssertState(!OidIsValid(u_sess->misc_cxt.AuthenticatedUserId));
     }
 
-    role_tup = SearchSysCache1(AUTHNAME, PointerGetDatum(role_name));
-    if (!HeapTupleIsValid(role_tup)) {
-        /* Audit user login */
-        int rcs = snprintf_truncated_s(details,
-            sizeof(details),
-            "login db(%s) failed-the role(%s)does not exist",
-            u_sess->proc_cxt.MyProcPort->database_name,
-            role_name);
-        securec_check_ss(rcs, "", "");
-        pgaudit_user_login(FALSE, u_sess->proc_cxt.MyProcPort->database_name, details);
-
-        ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
-            errmsg("Invalid username/password,login denied.")));
+    if (role_name != NULL) {
+        role_tup = SearchSysCache1(AUTHNAME, PointerGetDatum(role_name));
+        if (!HeapTupleIsValid(role_tup)) {
+            /* Audit user login */
+            int rcs = snprintf_truncated_s(details,
+                sizeof(details),
+                "login db(%s) failed-the role(%s)does not exist",
+                u_sess->proc_cxt.MyProcPort->database_name,
+                role_name);
+            securec_check_ss(rcs, "", "");
+            pgaudit_user_login(FALSE, u_sess->proc_cxt.MyProcPort->database_name, details);
+
+            ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
+                errmsg("Invalid username/password,login denied.")));
+        }
+    } else {
+        role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id));
+        if (!HeapTupleIsValid(role_tup)) {
+            ereport(FATAL,
+                (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
+                    errmsg("role with OID %u does not exist", role_id)));
+        }
     }
 
     rform = (Form_pg_authid)GETSTRUCT(role_tup);
     role_id = HeapTupleGetOid(role_tup);
+    rname = NameStr(rform->rolname);
 
     u_sess->misc_cxt.AuthenticatedUserId = role_id;
     u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper;
@@ -832,10 +842,11 @@ void InitializeSessionUserIdStandalone(void)
 {
     /*
      * This function should only be called in single-user mode and in
-     * autovacuum workers.
+     * autovacuum workers, and in background workers.
      */
     AssertState(!IsUnderPostmaster || IsAutoVacuumWorkerProcess() ||
-        IsJobSchedulerProcess() || IsJobWorkerProcess() || AM_WAL_SENDER);
+        IsJobSchedulerProcess() || IsJobWorkerProcess() || AM_WAL_SENDER ||
+        IsBackgroundWorker);
 
     /* In pooler stateless reuse mode, to reset session userid */
     if (!g_instance.attr.attr_network.PoolerStatelessReuse) {
diff --git a/src/common/backend/utils/init/postinit.cpp b/src/common/backend/utils/init/postinit.cpp
index b8b4a5fd59..2775810b90 100644
--- a/src/common/backend/utils/init/postinit.cpp
+++ b/src/common/backend/utils/init/postinit.cpp
@@ -683,7 +683,7 @@ void PostgresResetUsernamePgoption(const char* username)
                 u_sess->proc_cxt.MyProcPort->user_name = (char*)GetSuperUserName((char*)username);
             }
 
-            InitializeSessionUserId(username);
+            InitializeSessionUserId(username, InvalidOid);
             am_superuser = superuser();
             u_sess->misc_cxt.CurrentUserName = u_sess->proc_cxt.MyProcPort->user_name;
         }
@@ -1059,6 +1059,7 @@ PostgresInitializer::PostgresInitializer()
     m_indbname = NULL;
     m_dboid = InvalidOid;
     m_username = NULL;
+    m_useroid = InvalidOid;
     m_isSuperUser = false;
     m_fullpath = NULL;
     memset_s(m_dbname, NAMEDATALEN, 0, NAMEDATALEN);
@@ -1074,11 +1075,13 @@ PostgresInitializer::~PostgresInitializer()
     m_username = NULL;
 }
 
-void PostgresInitializer::SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username)
+void PostgresInitializer::SetDatabaseAndUser(
+    const char* in_dbname, Oid dboid, const char* username, Oid useroid)
 {
     m_indbname = in_dbname;
     m_dboid = dboid;
     m_username = username;
+    m_useroid = useroid;
 }
 
 void PostgresInitializer::InitBootstrap()
@@ -1489,12 +1492,19 @@ void PostgresInitializer::InitSession()
 
     StartXact();
 
-    if (IsUnderPostmaster) {
-        CheckAuthentication();
-        InitUser();
-    } else {
+    if (!IsUnderPostmaster) {
         CheckAtLeastOneRoles();
         SetSuperUserStandalone();
+    } else if (IsBackgroundWorker) {
+        if (m_username == NULL && !OidIsValid(m_useroid)) {
+			InitializeSessionUserIdStandalone();
+			m_isSuperUser = true;
+        } else {
+            InitUser();
+        }
+    } else {
+        CheckAuthentication();
+        InitUser();
     }
 
     CheckConnPermission();
@@ -1626,7 +1636,7 @@ void PostgresInitializer::SetSuperUserAndDatabase()
 
 void PostgresInitializer::InitUser()
 {
-    InitializeSessionUserId(m_username);
+    InitializeSessionUserId(m_username, m_useroid);
     m_isSuperUser = superuser();
     u_sess->misc_cxt.CurrentUserName = u_sess->proc_cxt.MyProcPort->user_name;
 }
diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp
index e0d216deae..a7977f8784 100644
--- a/src/common/backend/utils/misc/guc.cpp
+++ b/src/common/backend/utils/misc/guc.cpp
@@ -459,6 +459,7 @@ static void assign_statistics_memory(int newval, void* extra);
 static void assign_history_memory(int newval, void* extra);
 static bool check_history_memory_limit(int* newval, void** extra, GucSource source);
 static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource source);
+static bool check_max_worker_processes(int* newval, void** extra, GucSource source);
 static bool check_job_max_workers(int* newval, void** extra, GucSource source);
 static bool check_effective_io_concurrency(int* newval, void** extra, GucSource source);
 static void assign_effective_io_concurrency(int newval, void* extra);
@@ -7070,6 +7071,23 @@ static void init_configure_names_int()
             NULL,
             NULL
         },
+        {
+            /* see max_connections */
+            {
+                "max_background_workers",
+                PGC_POSTMASTER,
+                RESOURCES_ASYNCHRONOUS,
+                gettext_noop("Maximum number of concurrent background worker processes."),
+                NULL
+            },
+            &g_instance.attr.attr_storage.max_background_workers,
+            8,
+            0,
+            MAX_BACKENDS,
+            check_max_worker_processes,
+            NULL,
+            NULL
+        },
         {
             {
                 "job_queue_processes",
@@ -18748,6 +18766,7 @@ static bool check_maxconnections(int* newval, void** extra, GucSource source)
     }
 #endif
     if (*newval + g_instance.attr.attr_storage.autovacuum_max_workers + g_instance.attr.attr_sql.job_queue_processes +
+            g_instance.attr.attr_storage.max_background_workers +
             AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections >
         MAX_BACKENDS) {
         return false;
@@ -18758,6 +18777,7 @@ static bool check_maxconnections(int* newval, void** extra, GucSource source)
 static bool CheckMaxInnerToolConnections(int* newval, void** extra, GucSource source)
 {
     if (*newval + g_instance.attr.attr_storage.autovacuum_max_workers + g_instance.attr.attr_sql.job_queue_processes +
+            g_instance.attr.attr_storage.max_background_workers +
             g_instance.attr.attr_network.MaxConnections + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS > MAX_BACKENDS) {
         return false;
     }
@@ -18767,6 +18787,7 @@ static bool CheckMaxInnerToolConnections(int* newval, void** extra, GucSource so
 static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource source)
 {
     if (g_instance.attr.attr_network.MaxConnections + *newval + g_instance.attr.attr_sql.job_queue_processes +
+            g_instance.attr.attr_storage.max_background_workers +
             AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections >
         MAX_BACKENDS) {
         return false;
@@ -18774,6 +18795,18 @@ static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource so
     return true;
 }
 
+static bool check_max_worker_processes(int* newval, void** extra, GucSource source)
+{
+    if (g_instance.attr.attr_network.MaxConnections + g_instance.attr.attr_storage.autovacuum_max_workers +
+            g_instance.attr.attr_sql.job_queue_processes + *newval +
+            AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections >
+        MAX_BACKENDS) {
+        return false;
+    }
+    return true;
+}
+
+
 /*
  * Description: Check wheth out of max backends after max job worker threads.
  *
@@ -18784,6 +18817,7 @@ static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource so
 static bool check_job_max_workers(int* newval, void** extra, GucSource source)
 {
     if (g_instance.attr.attr_network.MaxConnections + g_instance.attr.attr_storage.autovacuum_max_workers + *newval +
+            g_instance.attr.attr_storage.max_background_workers +
             AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections >
         MAX_BACKENDS) {
         return false;
diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp
index 4668be049c..e04af32a11 100755
--- a/src/gausskernel/optimizer/commands/async.cpp
+++ b/src/gausskernel/optimizer/commands/async.cpp
@@ -200,8 +200,6 @@ typedef struct QueueBackendStatus {
     QueuePosition pos; /* backend has read queue up to here */
 } QueueBackendStatus;
 
-#define InvalidPid ((ThreadId)(-1))
-
 /*
  * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
  *
diff --git a/src/gausskernel/process/postmaster/Makefile b/src/gausskernel/process/postmaster/Makefile
index b2eb420f7f..a491ec9048 100755
--- a/src/gausskernel/process/postmaster/Makefile
+++ b/src/gausskernel/process/postmaster/Makefile
@@ -32,7 +32,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o gaussdb_version.o\
-	startup.o syslogger.o walwriter.o checkpointer.o pgaudit.o alarmchecker.o \
+	startup.o syslogger.o walwriter.o checkpointer.o pgaudit.o alarmchecker.o bgworker.o\
 	twophasecleaner.o aiocompleter.o fencedudf.o lwlockmonitor.o cbmwriter.o remoteservice.o pagewriter.o\
 	$(top_builddir)/src/lib/config/libconfig.a
 
diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp
new file mode 100644
index 0000000000..580bf35a5c
--- /dev/null
+++ b/src/gausskernel/process/postmaster/bgworker.cpp
@@ -0,0 +1,1309 @@
+/* --------------------------------------------------------------------
+ * bgworker.cpp
+ *      POSTGRES pluggable background workers implementation
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *    src/gausskernel/process/postmaster/bgworker.cpp
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgworker_internals.h"
+#include "postmaster/postmaster.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+#include "storage/procsignal.h"
+#include "storage/shmem.h"
+#include "tcop/tcopprot.h"
+#include "utils/ascii.h"
+#include "utils/ps_status.h"
+#include "utils/postinit.h"
+
+/*
+ * The postmaster's list of registered background workers, in private memory.
+ */
+THR_LOCAL slist_head    BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
+
+/*
+ * BackgroundWorkerSlots exist in shared memory and can be accessed (via
+ * the BackgroundWorkerArray) by both the postmaster and by regular backends.
+ * However, the postmaster cannot take locks, even spinlocks, because this
+ * might allow it to crash or become wedged if shared memory gets corrupted.
+ * Such an outcome is intolerable.  Therefore, we need a lockless protocol
+ * for coordinating access to this data.
+ *
+ * The 'in_use' flag is used to hand off responsibility for the slot between
+ * the postmaster and the rest of the system.  When 'in_use' is false,
+ * the postmaster will ignore the slot entirely, except for the 'in_use' flag
+ * itself, which it may read.  In this state, regular backends may modify the
+ * slot.  Once a backend sets 'in_use' to true, the slot becomes the
+ * responsibility of the postmaster.  Regular backends may no longer modify it,
+ * but the postmaster may examine it.  Thus, a backend initializing a slot
+ * must fully initialize the slot - and insert a write memory barrier - before
+ * marking it as in use.
+ *
+ * As an exception, however, even when the slot is in use, regular backends
+ * may set the 'terminate' flag for a slot, telling the postmaster not
+ * to restart it.  Once the background worker is no longer running, the slot
+ * will be released for reuse.
+ *
+ * In addition to coordinating with the postmaster, backends modifying this
+ * data structure must coordinate with each other.  Since they can take locks,
+ * this is straightforward: any backend wishing to manipulate a slot must
+ * take BackgroundWorkerLock in exclusive mode.  Backends wishing to read
+ * data that might get concurrently modified by other backends should take
+ * this lock in shared mode.  No matter what, backends reading this data
+ * structure must be able to tolerate concurrent modifications by the
+ * postmaster.
+ */
+typedef struct BackgroundWorkerSlot {
+    bool in_use;
+    bool terminate;
+    ThreadId pid; /* InvalidPid = not started yet; 0 = dead */
+    uint64 generation; /* incremented when slot is recycled */
+    BackgroundWorker worker;
+} BackgroundWorkerSlot;
+
+/*
+ * In order to limit the total number of parallel workers (according to
+ * max_parallel_workers GUC), we maintain the number of active parallel
+ * workers.  Since the postmaster cannot take locks, two variables are used for
+ * this purpose: the number of registered parallel workers (modified by the
+ * backends, protected by BackgroundWorkerLock) and the number of terminated
+ * parallel workers (modified only by the postmaster, lockless).  The active
+ * number of parallel workers is the number of registered workers minus the
+ * terminated ones.  These counters can of course overflow, but it's not
+ * important here since the subtraction will still give the right number.
+ */
+typedef struct BackgroundWorkerArray {
+    int total_slots;
+    uint32 parallel_register_count; // For extension only
+    uint32 parallel_terminate_count; // For extension only
+    BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
+} BackgroundWorkerArray;
+
+struct BackgroundWorkerHandle {
+    int slot;
+    uint64 generation;
+};
+
+/*
+ * List of internal background worker entry points.  We need this for
+ * reasons explained in LookupBackgroundWorkerFunction(), below.
+ */
+static const struct {
+    const char *fn_name;
+    bgworker_main_type fn_addr;
+} InternalBGWorkers[] =
+
+{
+};
+
+/* Private functions. */
+static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
+
+/*
+ * Calculate shared memory needed.
+ */
+Size BackgroundWorkerShmemSize(void)
+{
+    Size size;
+
+    /* Array of workers is variably sized. */
+    size = offsetof(BackgroundWorkerArray, slot);
+    size = add_size(size, mul_size((Size)g_instance.attr.attr_storage.max_background_workers,
+                                   sizeof(BackgroundWorkerSlot)));
+
+    return size;
+}
+
+/*
+ * Initialize shared memory.
+ */
+void BackgroundWorkerShmemInit(void)
+{
+    bool found;
+
+    t_thrd.bgworker_cxt.background_worker_data = (BackgroundWorkerArray*)ShmemInitStruct("Background Worker Data",
+       BackgroundWorkerShmemSize(),
+       &found);
+    if (!IsUnderPostmaster) {
+        slist_iter siter;
+        int slotno = 0;
+
+        t_thrd.bgworker_cxt.background_worker_data->total_slots = g_instance.attr.attr_storage.max_background_workers;
+        t_thrd.bgworker_cxt.background_worker_data->parallel_register_count = 0;
+        t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count = 0;
+
+        /*
+         * Copy contents of worker list into shared memory.  Record the shared
+         * memory slot assigned to each worker.  This ensures a 1-to-1
+         * correspondence between the postmaster's private list and the array
+         * in shared memory.
+         */
+        slist_foreach(siter, &BackgroundWorkerList) {
+            BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+            RegisteredBgWorker *rw;
+
+            rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+            Assert(slotno < g_instance.attr.attr_storage.max_background_workers);
+            slot->in_use = true;
+            slot->terminate = false;
+            slot->pid = InvalidPid;
+            slot->generation = 0;
+            rw->rw_shmem_slot = slotno;
+            rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
+            int ss_rc = memcpy_s(&slot->worker, sizeof(BackgroundWorker), &rw->rw_worker, sizeof(BackgroundWorker));
+            securec_check(ss_rc, "\0", "\0");
+            ++slotno;
+        }
+
+        /*
+         * Mark any remaining slots as not in use.
+         */
+        while (slotno < g_instance.attr.attr_storage.max_background_workers) {
+            BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+
+            slot->in_use = false;
+            ++slotno;
+        }
+    } else {
+        Assert(found);
+    }
+}
+
+/*
+ * Search the postmaster's backend-private list of RegisteredBgWorker objects
+ * for the one that maps to the given slot number.
+ */
+static RegisteredBgWorker * FindRegisteredWorkerBySlotNumber(int slotno)
+{
+    slist_iter  siter;
+
+    slist_foreach(siter, &BackgroundWorkerList) {
+        RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+        if (rw->rw_shmem_slot == slotno) {
+            return rw;
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Notice changes to shared memory made by other backends.  This code
+ * runs in the postmaster, so we must be very careful not to assume that
+ * shared memory contents are sane.  Otherwise, a rogue backend could take
+ * out the postmaster.
+ */
+void BackgroundWorkerStateChange(void)
+{
+    int slotno;
+
+    /*
+     * The total number of slots stored in shared memory should match our
+     * notion of max_background_workers.  If it does not, something is very
+     * wrong.  Further down, we always refer to this value as
+     * max_background_workers, in case shared memory gets corrupted while we're
+     * looping.
+     */
+    if (g_instance.attr.attr_storage.max_background_workers != t_thrd.bgworker_cxt.background_worker_data->total_slots) {
+        elog(LOG,
+             "inconsistent background worker state (max_background_workers=%d, total_slots=%d",
+             g_instance.attr.attr_storage.max_background_workers,
+             t_thrd.bgworker_cxt.background_worker_data->total_slots);
+        return;
+    }
+
+    /*
+     * Iterate through slots, looking for newly-registered workers or workers
+     * who must die.
+     */
+    for (slotno = 0; slotno < g_instance.attr.attr_storage.max_background_workers; ++slotno) {
+        BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+        RegisteredBgWorker *rw = NULL;
+
+        if (!slot->in_use) {
+            continue;
+        }
+
+        /*
+         * Make sure we don't see the in_use flag before the updated slot
+         * contents.
+         */
+        pg_read_barrier();
+
+        /* See whether we already know about this worker. */
+        rw = FindRegisteredWorkerBySlotNumber(slotno);
+        if (rw != NULL) {
+            /*
+             * In general, the worker data can't change after it's initially
+             * registered.  However, someone can set the terminate flag.
+             */
+            if (slot->terminate && !rw->rw_terminate) {
+                rw->rw_terminate = true;
+                if (rw->rw_pid != 0) {
+                    if (gs_signal_send(rw->rw_pid, SIGTERM) != 0) {
+                        ereport(WARNING,
+                            (errmsg("sending SIGTERM to %lu failed", rw->rw_pid)));
+                    }
+                } else {
+                    /* Report never-started, now-terminated worker as dead. */
+                    ReportBackgroundWorkerPID(rw);
+                }
+            }
+            continue;
+        }
+
+        /*
+         * If the worker is marked for termination, we don't need to add it to
+         * the registered workers list; we can just free the slot. However, if
+         * bgw_notify_pid is set, the process that registered the worker may
+         * need to know that we've processed the terminate request, so be sure
+         * to signal it.
+         */
+        if (slot->terminate) {
+            /*
+             * We need a memory barrier here to make sure that the load of
+             * bgw_notify_pid and the update of parallel_terminate_count
+             * complete before the store to in_use.
+             */
+            ThreadId notify_pid = slot->worker.bgw_notify_pid;
+            if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) {
+                t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count++;
+            }
+            pg_memory_barrier();
+            slot->pid = 0;
+            slot->in_use = false;
+            if (notify_pid != 0) {
+                if (gs_signal_send(notify_pid, SIGUSR1) != 0) {
+                    ereport(WARNING,
+                        (errmsg("sending SIGUSR1 to %lu failed", notify_pid)));
+                }
+            }
+
+            continue;
+        }
+
+        /*
+         * Copy the registration data into the registered workers list.
+         */
+        rw = (RegisteredBgWorker*)malloc(sizeof(RegisteredBgWorker));
+        if (rw == NULL) {
+            ereport(LOG,
+                (errcode(ERRCODE_OUT_OF_MEMORY),
+                    errmsg("out of memory")));
+            return;
+        }
+
+        /*
+         * Copy strings in a paranoid way.  If shared memory is corrupted, the
+         * source data might not even be NUL-terminated.
+         */
+        ascii_safe_strlcpy(rw->rw_worker.bgw_name,
+                           slot->worker.bgw_name, BGW_MAXLEN);
+        ascii_safe_strlcpy(rw->rw_worker.bgw_type,
+                           slot->worker.bgw_type, BGW_MAXLEN);
+        ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
+                           slot->worker.bgw_library_name, BGW_MAXLEN);
+        ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
+                           slot->worker.bgw_function_name, BGW_MAXLEN);
+
+        /*
+         * Copy various fixed-size fields.
+         *
+         * flags, start_time, and restart_time are examined by the postmaster,
+         * but nothing too bad will happen if they are corrupted.  The
+         * remaining fields will only be examined by the child process.  It
+         * might crash, but we won't.
+         */
+        rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
+        rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
+        rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
+        rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
+        int ss_rc = memcpy_s(rw->rw_worker.bgw_extra, BGW_EXTRALEN, slot->worker.bgw_extra, BGW_EXTRALEN);
+        securec_check(ss_rc, "\0", "\0");
+
+        /*
+         * Copy the PID to be notified about state changes, but only if the
+         * postmaster knows about a backend with that PID.  It isn't an error
+         * if the postmaster doesn't know about the PID, because the backend
+         * that requested the worker could have died (or been killed) just
+         * after doing so.  Nonetheless, at least until we get some experience
+         * with how this plays out in the wild, log a message at a relative
+         * high debug level.
+         */
+        rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
+        if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid)) {
+            elog(DEBUG1, "worker notification PID %lu is not valid",
+                 rw->rw_worker.bgw_notify_pid);
+            rw->rw_worker.bgw_notify_pid = 0;
+        }
+
+        /* Initialize postmaster bookkeeping. */
+        rw->rw_backend = NULL;
+        rw->rw_pid = 0;
+        rw->rw_child_slot = 0;
+        rw->rw_crashed_at = 0;
+        rw->rw_shmem_slot = slotno;
+        rw->rw_terminate = false;
+
+        /* Log it! */
+        ereport(DEBUG1,
+            (errmsg("registering background worker \"%s\"",
+                rw->rw_worker.bgw_name)));
+
+        slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+    }
+}
+
+/*
+ * Forget about a background worker that's no longer needed.
+ *
+ * The worker must be identified by passing an slist_mutable_iter that
+ * points to it.  This convention allows deletion of workers during
+ * searches of the worker list, and saves having to search the list again.
+ *
+ * This function must be invoked only in the postmaster.
+ */
+void ForgetBackgroundWorker(slist_mutable_iter *cur)
+{
+    RegisteredBgWorker *rw = NULL;
+    BackgroundWorkerSlot *slot = NULL;
+
+    rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
+
+    Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers);
+    slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot];
+    if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) {
+        t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count++;
+    }
+
+    slot->in_use = false;
+
+    ereport(DEBUG1,
+        (errmsg("unregistering background worker \"%s\"",
+            rw->rw_worker.bgw_name)));
+
+    slist_delete_current(cur);
+    free(rw);
+}
+
+/*
+ * Report the PID of a newly-launched background worker in shared memory.
+ *
+ * This function should only be called from the postmaster.
+ */
+void ReportBackgroundWorkerPID(const RegisteredBgWorker *rw)
+{
+    BackgroundWorkerSlot *slot;
+
+    Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers);
+    slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot];
+    slot->pid = rw->rw_pid;
+    ereport(LOG,
+        (errmsg("ReportBackgroundWorkerPID slot: %d, pid: %lu, bgw_notify_pid: %lu",
+            rw->rw_shmem_slot, slot->pid, rw->rw_worker.bgw_notify_pid)));
+
+    if (rw->rw_worker.bgw_notify_pid != 0) {
+        int ret = gs_signal_send(rw->rw_worker.bgw_notify_pid, SIGUSR1);
+        ereport(LOG,
+            (errmsg("ReportBackgroundWorkerPID send SIGUSR1 to bgw_notify_pid: %lu, ret: %d",
+                rw->rw_worker.bgw_notify_pid, ret)));
+    }
+}
+
+/*
+ * Report that the PID of a background worker is now zero because a
+ * previously-running background worker has exited.
+ *
+ * This function should only be called from the postmaster.
+ */
+void ReportBackgroundWorkerExit(slist_mutable_iter *cur)
+{
+    RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
+
+    Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers);
+    BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot];
+    slot->pid = rw->rw_pid;
+    ThreadId notify_pid = rw->rw_worker.bgw_notify_pid;
+
+    /*
+     * If this worker is slated for deregistration, do that before notifying
+     * the process which started it.  Otherwise, if that process tries to
+     * reuse the slot immediately, it might not be available yet.  In theory
+     * that could happen anyway if the process checks slot->pid at just the
+     * wrong moment, but this makes the window narrower.
+     */
+    if (rw->rw_terminate ||
+        rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) {
+        ForgetBackgroundWorker(cur);
+    }
+
+    if (notify_pid != 0) {
+        int ret = gs_signal_send(notify_pid, SIGUSR1);
+        ereport(LOG,
+            (errmsg("ReportBackgroundWorkerExit send SIGUSR1 to bgw_notify_pid: %lu, ret: %d",
+                notify_pid, ret)));
+    }
+}
+
+/*
+ * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
+ *
+ * This function should only be called from the postmaster.
+ */
+void BackgroundWorkerStopNotifications(ThreadId pid)
+{
+    slist_iter  siter;
+
+    slist_foreach(siter, &BackgroundWorkerList)
+    {
+        RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+        if (rw->rw_worker.bgw_notify_pid == pid) {
+            rw->rw_worker.bgw_notify_pid = 0;
+        }
+    }
+}
+
+/*
+ * Reset background worker crash state.
+ *
+ * We assume that, after a crash-and-restart cycle, background workers without
+ * the never-restart flag should be restarted immediately, instead of waiting
+ * for bgw_restart_time to elapse.
+ */
+void ResetBackgroundWorkerCrashTimes(void)
+{
+    slist_mutable_iter iter;
+
+    slist_foreach_modify(iter, &BackgroundWorkerList)
+    {
+        RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+        if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) {
+            /*
+             * Workers marked BGW_NEVER_RESTART shouldn't get relaunched after
+             * the crash, so forget about them.  (If we wait until after the
+             * crash to forget about them, and they are parallel workers,
+             * parallel_terminate_count will get incremented after we've
+             * already zeroed parallel_register_count, which would be bad.)
+             */
+            ForgetBackgroundWorker(&iter);
+        } else {
+            /*
+             * The accounting which we do via parallel_register_count and
+             * parallel_terminate_count would get messed up if a worker marked
+             * parallel could survive a crash and restart cycle. All such
+             * workers should be marked BGW_NEVER_RESTART, and thus control
+             * should never reach this branch.
+             */
+            Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0);
+
+            /*
+             * Allow this worker to be restarted immediately after we finish
+             * resetting.
+             */
+            rw->rw_crashed_at = 0;
+        }
+    }
+}
+
+#ifdef EXEC_BACKEND
+/*
+ * In EXEC_BACKEND mode, return address of the corresponding slot in
+ * shared memory.
+ */
+void* GetBackgroundWorkerShmAddr(int slotno)
+{
+    Assert(slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots);
+    return (void*)&t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+}
+
+/*
+ * In EXEC_BACKEND mode, workers use this to retrieve their details from
+ * shared memory.
+ */
+BackgroundWorker* BackgroundWorkerEntry(const BackgroundWorkerSlot* bgWorkerSlotShmAddr)
+{
+    static THR_LOCAL BackgroundWorker myEntry;
+
+    Assert(bgWorkerSlotShmAddr != NULL);
+    Assert(bgWorkerSlotShmAddr->in_use);
+
+    /* must copy this in case we don't intend to retain shmem access */
+    int ss_rc = memcpy_s(&myEntry, sizeof(myEntry), &bgWorkerSlotShmAddr->worker, sizeof(myEntry));
+    securec_check(ss_rc, "\0", "\0");
+    return &myEntry;
+}
+#endif
+
+/*
+ * Complain about the BackgroundWorker definition using error level elevel.
+ * Return true if it looks ok, false if not (unless elevel >= ERROR, in
+ * which case we won't return at all in the not-OK case).
+ */
+static bool SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
+{
+    /* sanity check for flags */
+    if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) {
+        if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS)) {
+            ereport(elevel,
+                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                     errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
+                            worker->bgw_name)));
+            return false;
+        }
+
+        if (worker->bgw_start_time == BgWorkerStart_PostmasterStart) {
+            ereport(elevel,
+                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                     errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
+                            worker->bgw_name)));
+            return false;
+        }
+
+        /* XXX other checks? */
+    }
+
+    if ((worker->bgw_restart_time < 0 &&
+         worker->bgw_restart_time != BGW_NEVER_RESTART) ||
+        (worker->bgw_restart_time > USECS_PER_DAY / 1000))  {
+        ereport(elevel,
+            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("background worker \"%s\": invalid restart interval",
+                    worker->bgw_name)));
+        return false;
+    }
+
+    /*
+     * Parallel workers may not be configured for restart, because the
+     * parallel_register_count/parallel_terminate_count accounting can't
+     * handle parallel workers lasting through a crash-and-restart cycle.
+     */
+    if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
+        (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) {
+        ereport(elevel,
+            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("background worker \"%s\": parallel workers may not be configured for restart",
+                    worker->bgw_name)));
+        return false;
+    }
+
+    /*
+     * If bgw_type is not filled in, use bgw_name.
+     */
+    if (strcmp(worker->bgw_type, "") == 0) {
+        int rd = strncpy_s(worker->bgw_type, BGW_MAXLEN, worker->bgw_name, BGW_MAXLEN);
+        securec_check(rd, "\0", "\0");
+    }
+
+    return true;
+}
+
+static void bgworker_quickdie(SIGNAL_ARGS)
+{
+    /*
+     * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here
+     * because shared memory may be corrupted, so we don't want to try to
+     * clean up our transaction.  Just nail the windows shut and get out of
+     * town.  The callbacks wouldn't be safe to run from a signal handler,
+     * anyway.
+     *
+     * Note we do _exit(2) not _exit(0).  This is to force the postmaster into
+     * a system reset cycle if someone sends a manual SIGQUIT to a random
+     * backend.  This is necessary precisely because we don't clean up our
+     * shared memory state.  (The "dead man switch" mechanism in pmsignal.c
+     * should ensure the postmaster sees this as a crash, too, but no harm in
+     * being doubly sure.)
+     */
+    _exit(2);
+}
+
+/*
+ * Standard SIGTERM handler for background workers
+ */
+static void bgworker_die(SIGNAL_ARGS)
+{
+    (void)PG_SETMASK(&t_thrd.libpq_cxt.BlockSig);
+
+    ereport(FATAL,
+        (errcode(ERRCODE_ADMIN_SHUTDOWN),
+            errmsg("terminating background worker \"%s\" due to administrator command",
+                t_thrd.bgworker_cxt.my_bgworker_entry->bgw_type)));
+}
+
+/*
+ * Standard SIGUSR1 handler for unconnected workers
+ *
+ * Here, we want to make sure an unconnected worker will at least heed
+ * latch activity.
+ */
+static void bgworker_sigusr1_handler(SIGNAL_ARGS)
+{
+    int save_errno = errno;
+
+    latch_sigusr1_handler();
+
+    errno = save_errno;
+}
+
+/*
+ * Start a new background worker
+ *
+ * This is the main entry point for background worker, to be called from
+ * postmaster.
+ */
+void StartBackgroundWorker(void* bgWorkerSlotShmAddr)
+{
+    sigjmp_buf local_sigjmp_buf;
+    t_thrd.bgworker_cxt.my_bgworker_entry = BackgroundWorkerEntry((BackgroundWorkerSlot *)bgWorkerSlotShmAddr);
+    BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry;
+    bgworker_main_type entrypt;
+
+    /*
+     * Create memory context and buffer used for RowDescription messages. As
+     * SendRowDescriptionMessage(), via exec_describe_statement_message(), is
+     * frequently executed for ever single statement, we don't want to
+     * allocate a separate buffer every time.
+     */
+    t_thrd.mem_cxt.row_desc_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt,
+        "RowDescriptionContext",
+        ALLOCSET_DEFAULT_MINSIZE,
+        ALLOCSET_DEFAULT_INITSIZE,
+        ALLOCSET_DEFAULT_MAXSIZE);
+    MemoryContext old_mc = MemoryContextSwitchTo(t_thrd.mem_cxt.row_desc_mem_cxt);
+    initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf));
+    (void)MemoryContextSwitchTo(old_mc);
+    
+    t_thrd.mem_cxt.mask_password_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt,
+        "MaskPasswordCtx",
+        ALLOCSET_DEFAULT_MINSIZE,
+        ALLOCSET_DEFAULT_INITSIZE,
+        ALLOCSET_DEFAULT_MAXSIZE);
+
+    if (worker == NULL) {
+        ereport(FATAL,
+            (errmsg("unable to find bgworker entry")));
+    }
+
+    IsBackgroundWorker = true;
+
+    /* Identify myself via ps */
+    init_ps_display(worker->bgw_name, "", "", "");
+
+    SetProcessingMode(InitProcessing);
+
+    /*
+     * Set up signal handlers.
+     */
+    if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) {
+        /*
+         * SIGINT is used to signal canceling the current action
+         */
+        (void)gspqsignal(SIGINT, StatementCancelHandler);
+        (void)gspqsignal(SIGUSR1, procsignal_sigusr1_handler);
+        (void)gspqsignal(SIGFPE, FloatExceptionHandler);
+
+        /* XXX Any other handlers needed here? */
+    } else {
+        (void)gspqsignal(SIGINT, SIG_IGN);
+        (void)gspqsignal(SIGUSR1, bgworker_sigusr1_handler);
+        (void)gspqsignal(SIGFPE, SIG_IGN);
+    }
+    (void)gspqsignal(SIGTERM, bgworker_die);
+    (void)gspqsignal(SIGHUP, SIG_IGN);
+
+    (void)gspqsignal(SIGQUIT, bgworker_quickdie);
+    (void)gspqsignal(SIGALRM, handle_sig_alarm);
+
+    (void)gspqsignal(SIGPIPE, SIG_IGN);
+    (void)gspqsignal(SIGUSR2, SIG_IGN);
+    (void)gspqsignal(SIGCHLD, SIG_DFL);
+
+    /*
+     * If an exception is encountered, processing resumes here.
+     *
+     * See notes in postgres.c about the design of this coding.
+     */
+    if (sigsetjmp(local_sigjmp_buf, 1) != 0) {
+        /* Since not using PG_TRY, must reset error stack by hand */
+        t_thrd.log_cxt.error_context_stack = NULL;
+
+        /* Prevent interrupts while cleaning up */
+        HOLD_INTERRUPTS();
+
+        /* Report the error to the server log */
+        EmitErrorReport();
+
+        /*
+         * Do we need more cleanup here?  For shmem-connected bgworkers, we
+         * will call InitProcess below, which will install ProcKill as exit
+         * callback.  That will take care of releasing locks, etc.
+         */
+
+        /* and go away */
+        proc_exit(1);
+    }
+
+    /* We can now handle ereport(ERROR) */
+    t_thrd.log_cxt.PG_exception_stack = &local_sigjmp_buf;
+
+    /*
+     * If the background worker request shared memory access, set that up now;
+     * else, detach all shared memory segments.
+     */
+    if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS) {
+        /*
+         * Early initialization.  Some of this could be useful even for
+         * background workers that aren't using shared memory, but they can
+         * call the individual startup routines for those subsystems if
+         * needed.
+         */
+        BaseInit();
+
+        /*
+         * Create a per-backend PGPROC struct in shared memory, except in the
+         * EXEC_BACKEND case where this was done in SubPostmasterMain. We must
+         * do this before we can use LWLocks (and in the EXEC_BACKEND case we
+         * already had to do some stuff with LWLocks).
+         */
+#ifndef EXEC_BACKEND
+        InitProcess();
+#endif
+    }
+
+    /*
+     * Look up the entry point function, loading its library if necessary.
+     */
+    entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
+                                             worker->bgw_function_name);
+
+    /*
+     * Note that in normal processes, we would call InitPostgres here.  For a
+     * worker, however, we don't know what database to connect to, yet; so we
+     * need to wait until the user code does it via
+     * BackgroundWorkerInitializeConnection().
+     */
+
+    /*
+     * Now invoke the user-defined worker code
+     */
+    entrypt(worker->bgw_main_arg);
+
+    /* ... and if it returns, we're done */
+    proc_exit(0);
+}
+
+/*
+ * Register a new static background worker.
+ *
+ * This can only be called directly from postmaster or in the _PG_init
+ * function of a module library that's loaded by shared_preload_libraries;
+ * otherwise it will have no effect.
+ */
+void RegisterBackgroundWorker(BackgroundWorker *worker)
+{
+    RegisteredBgWorker *rw;
+    static THR_LOCAL int    numworkers = 0;
+
+    if (!IsUnderPostmaster) {
+        ereport(DEBUG1,
+            (errmsg("registering background worker \"%s\"", worker->bgw_name)));
+    }
+
+    if (!u_sess->misc_cxt.process_shared_preload_libraries_in_progress &&
+        strcmp(worker->bgw_library_name, "postgres") != 0) {
+        if (!IsUnderPostmaster) {
+            ereport(LOG,
+                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
+                        worker->bgw_name)));
+        }
+        return;
+    }
+
+    if (!SanityCheckBackgroundWorker(worker, LOG)) {
+        return;
+    }
+
+    if (worker->bgw_notify_pid != 0) {
+        ereport(LOG,
+            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("background worker \"%s\": only dynamic background workers can request notification",
+                    worker->bgw_name)));
+        return;
+    }
+
+    /*
+     * Enforce maximum number of workers.  Note this is overly restrictive: we
+     * could allow more non-shmem-connected workers, because these don't count
+     * towards the MAX_BACKENDS limit elsewhere.  For now, it doesn't seem
+     * important to relax this restriction.
+     */
+    if (++numworkers > g_instance.attr.attr_storage.max_background_workers) {
+        ereport(LOG,
+            (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+                 errmsg("too many background workers"),
+                 errdetail_plural("Up to %d background worker can be registered with the current settings.",
+                                  "Up to %d background workers can be registered with the current settings.",
+                                  g_instance.attr.attr_storage.max_background_workers,
+                                  g_instance.attr.attr_storage.max_background_workers),
+                 errhint("Consider increasing the configuration parameter \"max_background_workers\".")));
+        return;
+    }
+
+    /*
+     * Copy the registration data into the registered workers list.
+     */
+    rw = (RegisteredBgWorker*)malloc(sizeof(RegisteredBgWorker));
+    if (rw == NULL) {
+        ereport(LOG,
+                (errcode(ERRCODE_OUT_OF_MEMORY),
+                 errmsg("out of memory")));
+        return;
+    }
+
+    rw->rw_worker = *worker;
+    rw->rw_backend = NULL;
+    rw->rw_pid = 0;
+    rw->rw_child_slot = 0;
+    rw->rw_crashed_at = 0;
+    rw->rw_terminate = false;
+
+    slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+}
+
+/*
+ * Register a new background worker from a regular backend.
+ *
+ * Returns true on success and false on failure.  Failure typically indicates
+ * that no background worker slots are currently available.
+ *
+ * If handle != NULL, we'll set *handle to a pointer that can subsequently
+ * be used as an argument to GetBackgroundWorkerPid().  The caller can
+ * free this pointer using pfree(), if desired.
+ */
+bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
+                                BackgroundWorkerHandle **handle)
+{
+    int         slotno;
+    bool        success = false;
+    bool        parallel;
+    uint64      generation = 0;
+
+    /*
+     * We can't register dynamic background workers from the postmaster. If
+     * this is a standalone backend, we're the only process and can't start
+     * any more.  In a multi-process environment, it might be theoretically
+     * possible, but we don't currently support it due to locking
+     * considerations; see comments on the BackgroundWorkerSlot data
+     * structure.
+     */
+    if (!IsUnderPostmaster) {
+        return false;
+    }
+
+    if (!SanityCheckBackgroundWorker(worker, ERROR)) {
+        return false;
+    }
+
+    parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
+
+    (void)LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
+
+    /*
+     * If this is a parallel worker, check whether there are already too many
+     * parallel workers; if so, don't register another one.  Our view of
+     * parallel_terminate_count may be slightly stale, but that doesn't really
+     * matter: we would have gotten the same result if we'd arrived here
+     * slightly earlier anyway.  There's no help for it, either, since the
+     * postmaster must not take locks; a memory barrier wouldn't guarantee
+     * anything useful.
+     */
+    if (parallel && (int)(t_thrd.bgworker_cxt.background_worker_data->parallel_register_count -
+                     t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count) >=
+        g_instance.shmem_cxt.max_parallel_workers) {
+        Assert(t_thrd.bgworker_cxt.background_worker_data->parallel_register_count -
+               t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count <=
+               MAX_PARALLEL_WORKER_LIMIT);
+        LWLockRelease(BackgroundWorkerLock);
+        return false;
+    }
+
+    /*
+     * Look for an unused slot.  If we find one, grab it.
+     */
+    for (slotno = 0; slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots; ++slotno) {
+        BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+
+        if (!slot->in_use) {
+            int ss_rc = memcpy_s(&slot->worker, sizeof(BackgroundWorker), worker, sizeof(BackgroundWorker));
+            securec_check(ss_rc, "\0", "\0");
+            slot->pid = InvalidPid; /* indicates not started yet */
+            slot->generation++;
+            slot->terminate = false;
+            generation = slot->generation;
+            if (parallel)
+                t_thrd.bgworker_cxt.background_worker_data->parallel_register_count++;
+
+            /*
+             * Make sure postmaster doesn't see the slot as in use before it
+             * sees the new contents.
+             */
+            pg_write_barrier();
+
+            slot->in_use = true;
+            success = true;
+            break;
+        }
+    }
+
+    LWLockRelease(BackgroundWorkerLock);
+
+    /* If we found a slot, tell the postmaster to notice the change. */
+    if (success) {
+        SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
+    }
+
+    /*
+     * If we found a slot and the user has provided a handle, initialize it.
+     */
+    if (success && handle) {
+        *handle = (BackgroundWorkerHandle*)palloc(sizeof(BackgroundWorkerHandle));
+        (*handle)->slot = slotno;
+        (*handle)->generation = generation;
+    }
+
+    return success;
+}
+
+/*
+ * Get the PID of a dynamically-registered background worker.
+ *
+ * If the worker is determined to be running, the return value will be
+ * BGWH_STARTED and *pidp will get the PID of the worker process.  If the
+ * postmaster has not yet attempted to start the worker, the return value will
+ * be BGWH_NOT_YET_STARTED.  Otherwise, the return value is BGWH_STOPPED.
+ *
+ * BGWH_STOPPED can indicate either that the worker is temporarily stopped
+ * (because it is configured for automatic restart and exited non-zero),
+ * or that the worker is permanently stopped (because it exited with exit
+ * code 0, or was not configured for automatic restart), or even that the
+ * worker was unregistered without ever starting (either because startup
+ * failed and the worker is not configured for automatic restart, or because
+ * TerminateBackgroundWorker was used before the worker was successfully
+ * started).
+ */
+BgwHandleStatus GetBackgroundWorkerPid(const BackgroundWorkerHandle *handle, ThreadId *pidp)
+{
+    ThreadId pid = InvalidPid;
+
+    Assert(handle->slot < g_instance.attr.attr_storage.max_background_workers);
+    BackgroundWorkerSlot* slot = &t_thrd.bgworker_cxt.background_worker_data->slot[handle->slot];
+
+    /*
+     * We could probably arrange to synchronize access to data using memory
+     * barriers only, but for now, let's just keep it simple and grab the
+     * lock.  It seems unlikely that there will be enough traffic here to
+     * result in meaningful contention.
+     */
+    (void)LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
+
+    /*
+     * The generation number can't be concurrently changed while we hold the
+     * lock.  The pid, which is updated by the postmaster, can change at any
+     * time, but we assume such changes are atomic.  So the value we read
+     * won't be garbage, but it might be out of date by the time the caller
+     * examines it (but that's unavoidable anyway).
+     *
+     * The in_use flag could be in the process of changing from true to false,
+     * but if it is already false then it can't change further.
+     */
+    if (handle->generation != slot->generation || !slot->in_use) {
+        pid = 0;
+    } else {
+        pid = slot->pid;
+    }
+
+    /* All done. */
+    LWLockRelease(BackgroundWorkerLock);
+
+    ereport(LOG,
+        (errmsg("GetBackgroundWorkerPid slot: %d, pid: %lu",
+            handle->slot, pid)));
+    if (pid == 0) {
+        return BGWH_STOPPED;
+    } else if (pid == InvalidPid) {
+        return BGWH_NOT_YET_STARTED;
+    }
+    *pidp = pid;
+    return BGWH_STARTED;
+}
+
+/*
+ * Wait for a background worker to start up.
+ *
+ * This is like GetBackgroundWorkerPid(), except that if the worker has not
+ * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
+ * returned.  However, if the postmaster has died, we give up and return
+ * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
+ * take place.
+ */
+BgwHandleStatus WaitForBackgroundWorkerStartup(const BackgroundWorkerHandle *handle, ThreadId *pidp)
+{
+    BgwHandleStatus status;
+    int rc;
+    volatile knl_thrd_context* localThrd = &t_thrd;
+
+    for (;;) {
+        ThreadId pid = 0;
+
+        CHECK_FOR_INTERRUPTS();
+
+        status = GetBackgroundWorkerPid(handle, &pid);
+        ereport(LOG,
+            (errmsg("WaitForBackgroundWorkerStartup slot: %d, pid: %lu, status: %u, mypid: %lu",
+                handle->slot, pid, status, t_thrd.proc_cxt.MyProcPid)));
+        ereport(LOG,
+            (errmsg("WaitForBackgroundWorkerStartup addr: %p", localThrd)));
+        if (status == BGWH_STARTED) {
+            *pidp = pid;
+        }
+        if (status != BGWH_NOT_YET_STARTED) {
+            break;
+        }
+
+        rc = WaitLatch(&t_thrd.proc->procLatch,
+                       WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
+
+        if (rc & WL_POSTMASTER_DEATH) {
+            status = BGWH_POSTMASTER_DIED;
+            break;
+        }
+
+        ResetLatch(&t_thrd.proc->procLatch);
+    }
+
+    return status;
+}
+
+/*
+ * Wait for a background worker to stop.
+ *
+ * If the worker hasn't yet started, or is running, we wait for it to stop
+ * and then return BGWH_STOPPED.  However, if the postmaster has died, we give
+ * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
+ * notifies us when a worker's state changes.
+ */
+BgwHandleStatus WaitForBackgroundWorkerShutdown(const BackgroundWorkerHandle *handle)
+{
+    BgwHandleStatus status;
+    int rc;
+
+    for (;;) {
+        ThreadId pid = InvalidPid;
+
+        CHECK_FOR_INTERRUPTS();
+
+        status = GetBackgroundWorkerPid(handle, &pid);
+        if (status == BGWH_STOPPED) {
+            break;
+        }
+
+        rc = WaitLatch(&t_thrd.proc->procLatch,
+                       WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
+
+        if (rc & WL_POSTMASTER_DEATH) {
+            status = BGWH_POSTMASTER_DIED;
+            break;
+        }
+
+        ResetLatch(&t_thrd.proc->procLatch);
+    }
+
+    return status;
+}
+
+/*
+ * Instruct the postmaster to terminate a background worker.
+ *
+ * Note that it's safe to do this without regard to whether the worker is
+ * still running, or even if the worker may already have existed and been
+ * unregistered.
+ */
+void TerminateBackgroundWorker(const BackgroundWorkerHandle *handle)
+{
+    bool signal_postmaster = false;
+
+    Assert(handle->slot < g_instance.attr.attr_storage.max_background_workers);
+    BackgroundWorkerSlot* slot = &t_thrd.bgworker_cxt.background_worker_data->slot[handle->slot];
+
+    /* Set terminate flag in shared memory, unless slot has been reused. */
+    (void)LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
+    if (handle->generation == slot->generation) {
+        slot->terminate = true;
+        signal_postmaster = true;
+    }
+    LWLockRelease(BackgroundWorkerLock);
+
+    /* Make sure the postmaster notices the change to shared memory. */
+    if (signal_postmaster) {
+        SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
+    }
+}
+
+/*
+ * Look up (and possibly load) a bgworker entry point function.
+ *
+ * For functions contained in the core code, we use library name "postgres"
+ * and consult the InternalBGWorkers array.  External functions are
+ * looked up, and loaded if necessary, using load_external_function().
+ *
+ * The point of this is to pass function names as strings across process
+ * boundaries.  We can't pass actual function addresses because of the
+ * possibility that the function has been loaded at a different address
+ * in a different process.  This is obviously a hazard for functions in
+ * loadable libraries, but it can happen even for functions in the core code
+ * on platforms using EXEC_BACKEND (e.g., Windows).
+ *
+ * At some point it might be worthwhile to get rid of InternalBGWorkers[]
+ * in favor of applying load_external_function() for core functions too;
+ * but that raises portability issues that are not worth addressing now.
+ */
+static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
+{
+    /*
+     * If the function is to be loaded from postgres itself, search the
+     * InternalBGWorkers array.
+     */
+    if (strcmp(libraryname, "postgres") == 0) {
+        size_t i;
+        for (i = 0; i < lengthof(InternalBGWorkers); i++) {
+            if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0) {
+                return InternalBGWorkers[i].fn_addr;
+            }
+        }
+
+        /* We can only reach this by programming error. */
+        elog(ERROR, "internal function \"%s\" not found", funcname);
+    }
+
+    /* Otherwise load from external library. */
+    return (bgworker_main_type)
+        load_external_function(libraryname, (char*)funcname, true, true).user_fn;
+}
+
+/*
+ * Given a PID, get the bgw_type of the background worker.  Returns NULL if
+ * not a valid background worker.
+ *
+ * The return value is in static memory belonging to this function, so it has
+ * to be used before calling this function again.  This is so that the caller
+ * doesn't have to worry about the background worker locking protocol.
+ */
+const char * GetBackgroundWorkerTypeByPid(ThreadId pid)
+{
+    int         slotno;
+    bool        found = false;
+    static THR_LOCAL char result[BGW_MAXLEN];
+
+    (void)LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
+
+    for (slotno = 0; slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots; slotno++) {
+        BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno];
+
+        if (slot->pid > 0 && slot->pid == pid) {
+            int rd = strncpy_s(result, BGW_MAXLEN, slot->worker.bgw_type, BGW_MAXLEN);
+            securec_check(rd, "\0", "\0");
+            found = true;
+            break;
+        }
+    }
+
+    LWLockRelease(BackgroundWorkerLock);
+
+    if (!found) {
+        return NULL;
+    }
+
+    return result;
+}
+
+/*
+ * Connect background worker to a database.
+ */
+void BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
+{
+    BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry;
+
+    /* XXX is this the right errcode? */
+    if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) {
+        ereport(FATAL,
+            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                 errmsg("database connection requirement not indicated during registration")));
+    }
+
+    t_thrd.proc_cxt.PostInit->SetDatabaseAndUser(dbname, InvalidOid, username, InvalidOid);
+    t_thrd.proc_cxt.PostInit->InitBackendWorker();
+
+    /* it had better not gotten out of "init" mode yet */
+    if (!IsInitProcessingMode()) {
+        ereport(ERROR,
+            (errmsg("invalid processing mode in background worker")));
+    }
+    SetProcessingMode(NormalProcessing);
+}
+
+/*
+ * Connect background worker to a database using OIDs.
+ */
+void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
+{
+    BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry;
+
+    /* XXX is this the right errcode? */
+    if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) {
+        ereport(FATAL,
+            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                 errmsg("database connection requirement not indicated during registration")));
+    }
+
+    t_thrd.proc_cxt.PostInit->SetDatabaseAndUser(NULL, dboid, NULL, useroid);
+    t_thrd.proc_cxt.PostInit->InitBackendWorker();
+
+    /* it had better not gotten out of "init" mode yet */
+    if (!IsInitProcessingMode()) {
+        ereport(ERROR,
+            (errmsg("invalid processing mode in background worker")));
+    }
+    SetProcessingMode(NormalProcessing);
+}
+
+/*
+ * Block/unblock signals in a background worker
+ */
+void BackgroundWorkerBlockSignals(void)
+{
+    (void)PG_SETMASK(&t_thrd.libpq_cxt.BlockSig);
+}
+
+void BackgroundWorkerUnblockSignals(void)
+{
+    (void)PG_SETMASK(&t_thrd.libpq_cxt.UnBlockSig);
+}
+
+
diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp
index d01e8fcd51..8a0da6b553 100755
--- a/src/gausskernel/process/postmaster/postmaster.cpp
+++ b/src/gausskernel/process/postmaster/postmaster.cpp
@@ -110,6 +110,7 @@
 #include "job/job_scheduler.h"
 #include "job/job_worker.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker_internals.h"
 #include "postmaster/pagewriter.h"
 #include "postmaster/fork_process.h"
 #include "postmaster/pgarch.h"
@@ -311,6 +312,7 @@ static void reaper(SIGNAL_ARGS);
 static void sigusr1_handler(SIGNAL_ARGS);
 static void dummy_handler(SIGNAL_ARGS);
 static void CleanupBackend(ThreadId pid, int exitstatus);
+static bool CleanupBackgroundWorker(ThreadId pid, int exitstatus);
 static const char* GetProcName(ThreadId pid);
 static void LogChildExit(int lev, const char* procname, ThreadId pid, int exitstatus);
 static void PostmasterStateMachine(void);
@@ -366,6 +368,8 @@ static void check_and_reset_ha_listen_port(void);
 static void* cJSON_internal_malloc(size_t size);
 static bool NeedHeartbeat();
 static ServerMode GetHaShmemMode(void);
+static bool assign_backendlist_entry(RegisteredBgWorker *rw);
+static void maybe_start_bgworkers(void);
 
 bool PMstateIsRun(void);
 
@@ -380,6 +384,7 @@ bool PMstateIsRun(void);
 #define BACKEND_TYPE_TEMPBACKEND                                        \
     0x0010                      /* temp thread processing cancel signal \
                                    or stream connection */
+
 #define BACKEND_TYPE_ALL 0x001F /* OR of all the above */
 
 static int CountChildren(int target);
@@ -1019,6 +1024,7 @@ void SetShmemCxt(void)
     g_instance.shmem_cxt.MaxBackends = g_instance.shmem_cxt.MaxConnections + 
                                        g_instance.attr.attr_sql.job_queue_processes +
                                        g_instance.attr.attr_storage.autovacuum_max_workers + 
+                                       g_instance.attr.attr_storage.max_background_workers +
                                        AUXILIARY_BACKENDS + 
                                        AV_LAUNCHER_PROCS;
     g_instance.shmem_cxt.MaxReserveBackendId = g_instance.attr.attr_sql.job_queue_processes +
@@ -5464,6 +5470,14 @@ static void reaper(SIGNAL_ARGS)
             continue;
         }
 
+        /* Was it one of our background workers? */
+        if (CleanupBackgroundWorker(pid, (int)exitstatus))
+        {
+            /* have it be restarted */
+            g_instance.bgworker_cxt.have_crashed_worker = true;
+            continue;
+        }
+        
         /*
          * Else do standard backend child cleanup.
          */
@@ -5566,6 +5580,101 @@ static const char* GetProcName(ThreadId pid)
     }
 }
 
+/*
+ * Scan the bgworkers list and see if the given PID (which has just stopped
+ * or crashed) is in it.  Handle its shutdown if so, and return true.  If not a
+ * bgworker, return false.
+ *
+ * This is heavily based on CleanupBackend.  One important difference is that
+ * we don't know yet that the dying process is a bgworker, so we must be silent
+ * until we're sure it is.
+ */
+static bool CleanupBackgroundWorker(ThreadId pid,
+    int exitstatus) /* child's exit status */
+{
+    char namebuf[MAXPGPATH];
+    slist_mutable_iter iter;
+
+    slist_foreach_modify(iter, &BackgroundWorkerList) {
+        RegisteredBgWorker *rw;
+
+        rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+        if (rw->rw_pid != pid) {
+            continue;
+        }
+
+#ifdef WIN32
+        /* see CleanupBackend */
+        if (exitstatus == ERROR_WAIT_NO_CHILDREN) {
+            exitstatus = 0;
+        }
+#endif
+
+        int rc = snprintf_s(namebuf, MAXPGPATH, MAXPGPATH - 1, _("background worker \"%s\""), rw->rw_worker.bgw_type);
+        securec_check_ss_c(rc, "\0", "\0");
+
+        if (!EXIT_STATUS_0(exitstatus)) {
+            /* Record timestamp, so we know when to restart the worker. */
+            rw->rw_crashed_at = GetCurrentTimestamp();
+        } else {
+            /* Zero exit status means terminate */
+            rw->rw_crashed_at = 0;
+            rw->rw_terminate = true;
+        }
+
+        /*
+         * Additionally, for shared-memory-connected workers, just like a
+         * backend, any exit status other than 0 or 1 is considered a crash
+         * and causes a system-wide restart.
+         */
+        if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) {
+            if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) {
+                HandleChildCrash(pid, exitstatus, namebuf);
+                return true;
+            }
+        }
+
+        /*
+         * We must release the postmaster child slot whether this worker is
+         * connected to shared memory or not, but we only treat it as a crash
+         * if it is in fact connected.
+         */
+        if (!ReleasePostmasterChildSlot(rw->rw_child_slot) &&
+            (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) {
+            HandleChildCrash(pid, exitstatus, namebuf);
+            return true;
+        }
+
+        /* Get it out of the BackendList and clear out remaining data */
+        DLRemove(&rw->rw_backend->elem);
+
+        /*
+         * It's possible that this background worker started some OTHER
+         * background worker and asked to be notified when that worker started
+         * or stopped.  If so, cancel any notifications destined for the
+         * now-dead backend.
+         */
+        if (rw->rw_backend->bgworker_notify) {
+            BackgroundWorkerStopNotifications(rw->rw_pid);
+        }
+
+        BackendArrayRemove(rw->rw_backend);
+
+        rw->rw_backend = NULL;
+        rw->rw_pid = 0;
+        rw->rw_child_slot = 0;
+        ReportBackgroundWorkerExit(&iter);  /* report child death */
+
+        LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
+                     namebuf, pid, exitstatus);
+
+        return true;
+    }
+
+    return false;
+}
+
 /*
  * CleanupBackend -- cleanup after terminated backend.
  *
@@ -5629,6 +5738,18 @@ static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status.
                 BackendArrayRemove(bp);
             }
 
+			if (bp->bgworker_notify)
+			{
+				/*
+				 * This backend may have been slated to receive SIGUSR1 when
+				 * some background worker started or stopped.  Cancel those
+				 * notifications, as we don't want to signal PIDs that are not
+				 * PostgreSQL backends.  This gets skipped in the (probably
+				 * very common) case where the backend has never requested any
+				 * such notifications.
+				 */
+				BackgroundWorkerStopNotifications(bp->pid);
+			}
             DLRemove(curr);
             break;
         }
@@ -6881,6 +7002,16 @@ static void sigusr1_handler(SIGNAL_ARGS)
 
     gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
 
+    /* Process background worker state change. */
+    if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
+    {
+        BackgroundWorkerStateChange();
+        g_instance.bgworker_cxt.start_worker_needed = true;
+    }
+    if (g_instance.bgworker_cxt.start_worker_needed || g_instance.bgworker_cxt.have_crashed_worker) {
+        maybe_start_bgworkers();
+    }
+
     /*
      * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
      * unexpected states. If the startup process quickly starts up, completes
@@ -7699,6 +7830,300 @@ int MaxLivePostmasterChildren(void)
     return 6 * g_instance.shmem_cxt.MaxBackends;
 }
 
+/*
+ * Start a new bgworker.
+ * Starting time conditions must have been checked already.
+ *
+ * Returns true on success, false on failure.
+ * In either case, update the RegisteredBgWorker's state appropriately.
+ *
+ * This code is heavily based on autovacuum.c, q.v.
+ */
+static bool do_start_bgworker(RegisteredBgWorker *rw)
+{
+    ThreadId worker_pid = InvalidPid;
+
+    Assert(rw->rw_pid == 0);
+
+    /*
+     * Allocate and assign the Backend element.  Note we must do this before
+     * forking, so that we can handle failures (out of memory or child-process
+     * slots) cleanly.
+     *
+     * Treat failure as though the worker had crashed.  That way, the
+     * postmaster will wait a bit before attempting to start it again; if we
+     * tried again right away, most likely we'd find ourselves hitting the
+     * same resource-exhaustion condition.
+     */
+    if (!assign_backendlist_entry(rw)) {
+        rw->rw_crashed_at = GetCurrentTimestamp();
+        return false;
+    }
+
+    ereport(DEBUG1,
+        (errmsg("starting background worker process \"%s\"",
+            rw->rw_worker.bgw_name)));
+
+    Backend* bn = rw->rw_backend;
+    void* bgWorkerShmAddr = GetBackgroundWorkerShmAddr(rw->rw_shmem_slot);
+    switch ((worker_pid = initialize_util_thread(BACKGROUND_WORKER, bgWorkerShmAddr))) {
+        case (ThreadId)-1:
+            /* in postmaster, fork failed ... */
+            ereport(LOG,
+                (errmsg("could not fork worker process: %m")));
+            /* undo what assign_backendlist_entry did */
+            (void)ReleasePostmasterChildSlot(rw->rw_child_slot);
+            bn->pid = 0;
+            rw->rw_child_slot = 0;
+            rw->rw_backend = NULL;
+            /* mark entry as crashed, so we'll try again later */
+            rw->rw_crashed_at = GetCurrentTimestamp();
+            break;
+
+        default:
+            /* in postmaster, fork successful ... */
+            rw->rw_pid = worker_pid;
+            bn->pid = rw->rw_pid;
+            ReportBackgroundWorkerPID(rw);
+            /* add new worker to lists of backends */
+            DLInitElem(&bn->elem, bn);
+            DLAddHead(g_instance.backend_list, &bn->elem);
+
+            return true;
+    }
+
+    return false;
+}
+
+/*
+ * Does the current postmaster state require starting a worker with the
+ * specified start_time?
+ */
+static bool
+bgworker_should_start_now(BgWorkerStartTime start_time)
+{
+    switch (pmState) {
+        case PM_NO_CHILDREN:
+        case PM_WAIT_DEAD_END:
+        case PM_SHUTDOWN_2:
+        case PM_SHUTDOWN:
+        case PM_WAIT_BACKENDS:
+        case PM_WAIT_READONLY:
+        case PM_WAIT_BACKUP:
+            break;
+
+        case PM_RUN:
+            if (start_time == BgWorkerStart_RecoveryFinished) {
+                return true;
+            }
+            /* fall through */
+        case PM_HOT_STANDBY:
+            if (start_time == BgWorkerStart_ConsistentState) {
+                return true;
+            }
+            /* fall through */
+        case PM_RECOVERY:
+        case PM_STARTUP:
+        case PM_INIT:
+            if (start_time == BgWorkerStart_PostmasterStart) {
+                return true;
+            }
+            /* fall through */
+    }
+
+    return false;
+}
+
+/*
+ * Allocate the Backend struct for a connected background worker, but don't
+ * add it to the list of backends just yet.
+ *
+ * On failure, return false without changing any worker state.
+ *
+ * Some info from the Backend is copied into the passed rw.
+ */
+static bool
+assign_backendlist_entry(RegisteredBgWorker *rw)
+{
+    Backend* bn = NULL;
+
+    /*
+     * Check that database state allows another connection.  Currently the
+     * only possible failure is CAC_TOOMANY, so we just log an error message
+     * based on that rather than checking the error code precisely.
+     */
+    if (canAcceptConnections(false) != CAC_OK)
+    {
+        ereport(LOG,
+                (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+                 errmsg("no slot available for new worker process")));
+        return false;
+    }
+
+    int slot = AssignPostmasterChildSlot();
+
+    bn = AssignFreeBackEnd(slot);
+
+    if (bn == NULL) {
+        ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
+        return false;
+    }
+    
+    /*
+     * Compute the cancel key that will be assigned to this session. We
+     * probably don't need cancel keys for background workers, but we'd better
+     * have something random in the field to prevent unfriendly people from
+     * sending cancels to them.
+     */
+    GenerateCancelKey(false);
+    bn->cancel_key = t_thrd.proc_cxt.MyCancelKey;
+    bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot;
+    bn->is_autovacuum = false;
+    bn->dead_end = false;
+    bn->bgworker_notify = false;
+    rw->rw_backend = bn;
+    rw->rw_child_slot = bn->child_slot;
+
+    return true;
+}
+
+/*
+ * If the time is right, start background worker(s).
+ *
+ * As a side effect, the bgworker control variables are set or reset
+ * depending on whether more workers may need to be started.
+ *
+ * We limit the number of workers started per call, to avoid consuming the
+ * postmaster's attention for too long when many such requests are pending.
+ * As long as start_worker_needed is true, ServerLoop will not block and will
+ * call this function again after dealing with any other issues.
+ */
+static void maybe_start_bgworkers(void)
+{
+#define MAX_BGWORKERS_TO_LAUNCH 100
+    int num_launched = 0;
+    TimestampTz now = 0;
+    slist_mutable_iter iter;
+
+    /*
+     * During crash recovery, we have no need to be called until the state
+     * transition out of recovery.
+     */
+    if (g_instance.fatal_error) {
+        g_instance.bgworker_cxt.start_worker_needed = false;
+        g_instance.bgworker_cxt.have_crashed_worker = false;
+        return;
+    }
+
+    /* Don't need to be called again unless we find a reason for it below */
+    g_instance.bgworker_cxt.start_worker_needed = false;
+    g_instance.bgworker_cxt.have_crashed_worker = false;
+
+    slist_foreach_modify(iter, &BackgroundWorkerList) {
+        RegisteredBgWorker *rw;
+
+        rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+        /* ignore if already running */
+        if (rw->rw_pid != 0) {
+            continue;
+        }
+
+        /* if marked for death, clean up and remove from list */
+        if (rw->rw_terminate) {
+            ForgetBackgroundWorker(&iter);
+            continue;
+        }
+
+        /*
+         * If this worker has crashed previously, maybe it needs to be
+         * restarted (unless on registration it specified it doesn't want to
+         * be restarted at all).  Check how long ago did a crash last happen.
+         * If the last crash is too recent, don't start it right away; let it
+         * be restarted once enough time has passed.
+         */
+        if (rw->rw_crashed_at != 0) {
+            if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) {
+                ThreadId notify_pid = rw->rw_worker.bgw_notify_pid;
+
+                ForgetBackgroundWorker(&iter);
+
+                /* Report worker is gone now. */
+                if (notify_pid != 0) {
+                    (void)gs_signal_send(notify_pid, SIGUSR1);
+                }
+
+                continue;
+            }
+
+            /* read system time only when needed */
+            if (now == 0) {
+                now = GetCurrentTimestamp();
+            }
+
+            if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
+                                            rw->rw_worker.bgw_restart_time * 1000)) {
+                /* Set flag to remember that we have workers to start later */
+                g_instance.bgworker_cxt.have_crashed_worker = true;
+                continue;
+            }
+        }
+
+        if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) {
+            /* reset crash time before trying to start worker */
+            rw->rw_crashed_at = 0;
+
+            /*
+             * Try to start the worker.
+             *
+             * On failure, give up processing workers for now, but set
+             * start_worker_needed so we'll come back here on the next iteration
+             * of ServerLoop to try again.  (We don't want to wait, because
+             * there might be additional ready-to-run workers.)  We could set
+             * have_crashed_worker as well, since this worker is now marked
+             * crashed, but there's no need because the next run of this
+             * function will do that.
+             */
+            if (!do_start_bgworker(rw)) {
+                g_instance.bgworker_cxt.start_worker_needed = true;
+                return;
+            }
+
+            /*
+             * If we've launched as many workers as allowed, quit, but have
+             * ServerLoop call us again to look for additional ready-to-run
+             * workers.  There might not be any, but we'll find out the next
+             * time we run.
+             */
+            if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH) {
+                g_instance.bgworker_cxt.start_worker_needed = true;
+                return;
+            }
+        }
+    }
+}
+
+/*
+ * When a backend asks to be notified about worker state changes, we
+ * set a flag in its backend entry.  The background worker machinery needs
+ * to know when such backends exit.
+ */
+bool
+PostmasterMarkPIDForWorkerNotify(ThreadId pid)
+{
+    int count = MaxLivePostmasterChildren();
+    for (int i = 0; i < count; ++i) {
+        Backend* bp = &g_instance.backend_array[i];
+        if (bp->pid == pid)
+        {
+            bp->bgworker_notify = true;
+            return true;
+        }
+    }
+    return false;
+}
+
+
 #ifdef EXEC_BACKEND
 #ifndef WIN32
 #define write_inheritable_socket(dest, src) ((*(dest) = (src)))
@@ -7965,6 +8390,7 @@ Backend* AssignFreeBackEnd(int slot)
     bn->pid = 0;
     bn->cancel_key = 0;
     bn->dead_end = false;
+    bn->bgworker_notify = false;
     return bn;
 }
 
@@ -9952,7 +10378,7 @@ int GaussDbThreadMain(knl_thread_arg* arg)
             commAuxiliaryMain();
             proc_exit(0);
         } break;
-	
+
 #ifdef ENABLE_MULTIPLE_NODES
         case COMM_POOLER_CLEAN: {
             InitProcessAndShareMemory();
@@ -9960,6 +10386,14 @@ int GaussDbThreadMain(knl_thread_arg* arg)
             proc_exit(0);
         } break;
 #endif
+
+        case BACKGROUND_WORKER: {
+            IsBackgroundWorker = true;
+            InitProcessAndShareMemory();
+            StartBackgroundWorker(arg->payload);
+            proc_exit(0);
+        } break;
+
         default:
             ereport(PANIC, (errmsg("unsupport thread role type %d", arg->role)));
             break;
@@ -10011,7 +10445,8 @@ static GaussdbThreadEntry GaussdbThreadEntryGate[] = {GaussDbThreadMain<MASTER>,
     GaussDbThreadMain<COMM_RECEIVERFLOWER>,
     GaussDbThreadMain<COMM_RECEIVER>,
     GaussDbThreadMain<COMM_AUXILIARY>,
-    GaussDbThreadMain<COMM_POOLER_CLEAN>};
+    GaussDbThreadMain<COMM_POOLER_CLEAN>,
+    GaussDbThreadMain<BACKGROUND_WORKER>};
 
 const char* GaussdbThreadName[] = {"main",
     "worker",
@@ -10055,7 +10490,8 @@ const char* GaussdbThreadName[] = {"main",
     "communicator receiver flower",
     "communicator receiver loop",
     "communicator auxiliary",
-    "communicator pooler auto cleaner"};
+    "communicator pooler auto cleaner",
+    "background worker"};
 
 GaussdbThreadEntry GetThreadEntry(knl_thread_role role)
 {
diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp
index 38794aac49..dad5820202 100755
--- a/src/gausskernel/process/threadpool/knl_instance.cpp
+++ b/src/gausskernel/process/threadpool/knl_instance.cpp
@@ -290,6 +290,7 @@ static void knl_g_wlm_init(knl_g_wlm_context* wlm_cxt)
 
 static void knl_g_shmem_init(knl_g_shmem_context* shmem_cxt)
 {
+    shmem_cxt->max_parallel_workers = 8;
     shmem_cxt->MaxBackends = 100;
     shmem_cxt->MaxReserveBackendId = (AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS);
     shmem_cxt->ThreadPoolGroupNum = 0;
@@ -316,6 +317,12 @@ static void knl_g_numa_init(knl_g_numa_context* numa_cxt)
     numa_cxt->allocIndex = 0;
 }
 
+static void knl_g_bgworker_init(knl_g_bgworker_context* bgworker_cxt)
+{
+    bgworker_cxt->start_worker_needed = true;
+    bgworker_cxt->have_crashed_worker = false;
+}
+
 void knl_instance_init()
 {
     g_instance.binaryupgrade = false;
@@ -363,6 +370,7 @@ void knl_instance_init()
     knl_g_dw_init(&g_instance.dw_cxt);
     knl_g_xlog_init(&g_instance.xlog_cxt);
     knl_g_numa_init(&g_instance.numa_cxt);
+    knl_g_bgworker_init(&g_instance.bgworker_cxt);
 
     MemoryContextSwitchTo(old_cxt);
 
diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp
index 3822f053bb..1d0ca27a90 100755
--- a/src/gausskernel/process/threadpool/knl_thread.cpp
+++ b/src/gausskernel/process/threadpool/knl_thread.cpp
@@ -1405,6 +1405,12 @@ void knl_thread_mot_init()
     knl_t_mot_init(&t_thrd.mot_cxt);
 }
 
+void knl_t_bgworker_init(knl_t_bgworker_context* bgworker_cxt)
+{
+    bgworker_cxt->background_worker_data = NULL;
+    bgworker_cxt->my_bgworker_entry = NULL;
+}
+
 void knl_thread_init(knl_thread_role role)
 {
     t_thrd.role = role;
diff --git a/src/gausskernel/storage/ipc/ipci.cpp b/src/gausskernel/storage/ipc/ipci.cpp
index 508a9abcbd..fa8a0df839 100755
--- a/src/gausskernel/storage/ipc/ipci.cpp
+++ b/src/gausskernel/storage/ipc/ipci.cpp
@@ -37,6 +37,7 @@
 #include "pgxc/nodemgr.h"
 #endif
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker_internals.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/postmaster.h"
 #include "replication/slot.h"
@@ -136,6 +137,7 @@ void CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
         size = add_size(size, CLOGShmemSize());
         size = add_size(size, CSNLOGShmemSize());
         size = add_size(size, TwoPhaseShmemSize());
+        size = add_size(size, BackgroundWorkerShmemSize());
         size = add_size(size, MultiXactShmemSize());
         size = add_size(size, LWLockShmemSize());
         size = add_size(size, ProcArrayShmemSize());
@@ -275,6 +277,7 @@ void CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
     {
         TwoPhaseShmemInit();
     }
+	BackgroundWorkerShmemInit();
 
     /*
      * Set up shared-inval messaging
diff --git a/src/gausskernel/storage/ipc/procsignal.cpp b/src/gausskernel/storage/ipc/procsignal.cpp
index d7d92a9893..3308fe14e4 100644
--- a/src/gausskernel/storage/ipc/procsignal.cpp
+++ b/src/gausskernel/storage/ipc/procsignal.cpp
@@ -306,6 +306,7 @@ void procsignal_sigusr1_handler(SIGNAL_ARGS)
     if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
         RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 
+    SetLatch(&t_thrd.proc->procLatch);
     latch_sigusr1_handler();
 
     errno = save_errno;
diff --git a/src/gausskernel/storage/lmgr/lwlocknames.txt b/src/gausskernel/storage/lmgr/lwlocknames.txt
index 602047f4d6..1489b44f6d 100644
--- a/src/gausskernel/storage/lmgr/lwlocknames.txt
+++ b/src/gausskernel/storage/lmgr/lwlocknames.txt
@@ -96,3 +96,4 @@ GPCCommitLock 88
 GPCClearLock 89
 GPCTimelineLock 90
 TsTagsCacheLock  91
+BackgroundWorkerLock	92
\ No newline at end of file
diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp
index ce2eb19b01..aba442a0cc 100755
--- a/src/gausskernel/storage/lmgr/proc.cpp
+++ b/src/gausskernel/storage/lmgr/proc.cpp
@@ -212,7 +212,9 @@ static void FiniNuma(int code, Datum arg)
  *	  So, now we grab enough semaphores to support the desired max number
  *	  of backends immediately at initialization --- if the sysadmin has set
  *	  MaxConnections or autovacuum_max_workers higher than his kernel will
- *	  support, he'll find out sooner rather than later.
+ *	  support, he'll find out sooner rather than later. (The number of
+ *	  background worker processes registered by loadable modules is also taken
+ *	  into consideration.)
  *
  *	  Another reason for creating semaphores here is that the semaphore
  *	  implementation typically requires us to create semaphores in the
@@ -240,6 +242,7 @@ void InitProcGlobal(void)
 #endif
     g_instance.proc_base->freeProcs = NULL;
     g_instance.proc_base->autovacFreeProcs = NULL;
+    g_instance.proc_base->bgworkerFreeProcs = NULL;
     g_instance.proc_base->pgjobfreeProcs = NULL;
     g_instance.proc_base->startupProc = NULL;
     g_instance.proc_base->startupProcPid = 0;
@@ -252,10 +255,10 @@ void InitProcGlobal(void)
 
     /*
      * Create and initialize all the PGPROC structures we'll need.  There are
-     * four separate consumers: (1) normal backends, (2) autovacuum workers
-     * and the autovacuum launcher, (3) auxiliary processes, and (4) prepared
-     * transactions.  Each PGPROC structure is dedicated to exactly one of
-     * these purposes, and they do not move between groups.
+     * five separate consumers: (1) normal backends, (2) autovacuum workers
+     * and the autovacuum launcher, (3) background workers, (4) auxiliary processes,
+     * and (5) prepared transactions.  Each PGPROC structure is dedicated to exactly
+     * one of these purposes, and they do not move between groups.
      */
     PGPROC *initProcs[MAX_NUMA_NODE] = {0};
 
@@ -331,7 +334,7 @@ void InitProcGlobal(void)
         procs[i]->nodeno = i % nNumaNodes;
 
         /*
-         * Newly created PGPROCs for normal backends or for autovacuum must be
+         * Newly created PGPROCs for normal backends, autovacuum and bgworkers must be
          * queued up on the appropriate free list.	Because there can only
          * ever be a small, fixed number of auxiliary processes, no free list
          * is used in that case; InitAuxiliaryProcess() instead uses a linear
@@ -347,13 +350,23 @@ void InitProcGlobal(void)
             /* PGPROC for pg_job backend, add to pgjobfreeProcs list,  1 for Job Schedule Lancher */
             procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->pgjobfreeProcs;
             g_instance.proc_base->pgjobfreeProcs = procs[i];
-        } else if (i < g_instance.shmem_cxt.MaxBackends) {
+        } else if (i <  g_instance.shmem_cxt.MaxConnections + AUXILIARY_BACKENDS +
+                           g_instance.attr.attr_sql.job_queue_processes + 1 +
+                           g_instance.attr.attr_storage.autovacuum_max_workers +
+                           AV_LAUNCHER_PROCS) {
             /*
              * PGPROC for AV launcher/worker, add to autovacFreeProcs list
-             * list size is autovacuum_max_workers + AUTOVACUUM_LAUNCHERS
+             * list size is autovacuum_max_workers + AV_LAUNCHER_PROCS
              */
             procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->autovacFreeProcs;
             g_instance.proc_base->autovacFreeProcs = procs[i];
+        } else if (i < g_instance.shmem_cxt.MaxBackends) {
+            /*
+             * PGPROC for bgworker, add to bgworkerFreeProcs list
+             * list size is max_background_workers
+             */
+            procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->bgworkerFreeProcs;
+            g_instance.proc_base->bgworkerFreeProcs = procs[i];
         }
 
         /* Initialize myProcLocks[] shared memory queues. */
@@ -463,6 +476,8 @@ void InitProcess(void)
         t_thrd.proc = g_instance.proc_base->autovacFreeProcs;
     else if (IsJobSchedulerProcess() || IsJobWorkerProcess())
         t_thrd.proc = g_instance.proc_base->pgjobfreeProcs;
+    else if (IsBackgroundWorker)
+        t_thrd.proc = g_instance.proc_base->bgworkerFreeProcs;
     else {
 #ifndef __USE_NUMA
         t_thrd.proc = g_instance.proc_base->freeProcs;
@@ -478,6 +493,8 @@ void InitProcess(void)
             g_instance.proc_base->autovacFreeProcs = (PGPROC *)t_thrd.proc->links.next;
         else if (IsJobSchedulerProcess() || IsJobWorkerProcess())
             g_instance.proc_base->pgjobfreeProcs = (PGPROC *)t_thrd.proc->links.next;
+        else if (IsBackgroundWorker)
+            g_instance.proc_base->bgworkerFreeProcs = (PGPROC *)t_thrd.proc->links.next;
         else {
 #ifndef __USE_NUMA
             g_instance.proc_base->freeProcs = (PGPROC *)t_thrd.proc->links.next;
@@ -1036,6 +1053,11 @@ static void ProcKill(int code, Datum arg)
     } else if (IsJobSchedulerProcess() || IsJobWorkerProcess()) {
         t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->pgjobfreeProcs;
         g_instance.proc_base->pgjobfreeProcs = t_thrd.proc;
+    }
+    else if (IsBackgroundWorker)
+    {
+        t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->bgworkerFreeProcs;
+        g_instance.proc_base->bgworkerFreeProcs = t_thrd.proc;
     } else {
         t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->freeProcs;
         g_instance.proc_base->freeProcs = t_thrd.proc;
diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h
index e32e21efe8..dc0bfaf1bd 100755
--- a/src/include/gs_thread.h
+++ b/src/include/gs_thread.h
@@ -97,6 +97,7 @@ typedef enum knl_thread_role {
     COMM_RECEIVER,
     COMM_AUXILIARY,
     COMM_POOLER_CLEAN,
+    BACKGROUND_WORKER,
     // should be last valid thread.
     THREAD_ENTRY_BOUND,
 
diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h
index fc9805f801..ff905a4b6d 100755
--- a/src/include/knl/knl_guc/knl_instance_attr_storage.h
+++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h
@@ -68,6 +68,7 @@ typedef struct knl_instance_attr_storage {
     int max_replication_slots;
     int replication_type;
     int autovacuum_max_workers;
+    int max_background_workers;
     int64 autovacuum_freeze_max_age;
     int wal_level;
     /* User specified maximum number of recovery threads. */
diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h
index a7eba7c4c8..edac6dfd48 100644
--- a/src/include/knl/knl_instance.h
+++ b/src/include/knl/knl_instance.h
@@ -485,6 +485,7 @@ typedef struct knl_g_libpq_context {
 
 typedef struct knl_g_shmem_context {   
     int MaxConnections;
+    int max_parallel_workers;
     int MaxBackends;
     int MaxReserveBackendId;
     int ThreadPoolGroupNum;
@@ -511,6 +512,12 @@ typedef struct knl_g_numa_context {
     size_t allocIndex;
 } knl_g_numa_context;
 
+typedef struct knl_g_bgworker_context {
+    /* set when there's a worker that needs to be started up */
+    volatile bool start_worker_needed;
+    volatile bool have_crashed_worker;
+} knl_g_bgworker_context;
+
 typedef struct knl_instance_context {
     knl_virtual_role role;
     volatile int status;
@@ -582,6 +589,7 @@ typedef struct knl_instance_context {
     knl_g_rto_context rto_cxt;
     knl_g_xlog_context xlog_cxt;
     knl_g_numa_context numa_cxt;
+    knl_g_bgworker_context bgworker_cxt;
 } knl_instance_context;
 
 extern void knl_instance_init();
diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h
index 4a2939477e..21a08dde19 100644
--- a/src/include/knl/knl_thread.h
+++ b/src/include/knl/knl_thread.h
@@ -69,6 +69,7 @@
 #include "openssl/ossl_typ.h"
 #include "workload/qnode.h"
 #include "tcop/dest.h"
+#include "postmaster/bgworker.h"
 
 #define MAX_PATH_LEN 1024
 
@@ -2702,6 +2703,11 @@ typedef struct knl_t_mot_context {
     unsigned int mbindFlags;
 } knl_t_mot_context;
 
+typedef struct knl_t_bgworker_context {
+    BackgroundWorkerArray *background_worker_data;
+    BackgroundWorker *my_bgworker_entry;
+} knl_t_bgworker_context;
+
 /* thread context. */
 typedef struct knl_thrd_context {
     knl_thread_role role;
@@ -2799,6 +2805,7 @@ typedef struct knl_thrd_context {
     knl_t_heartbeat_context heartbeat_cxt;
     knl_t_poolcleaner_context poolcleaner_cxt;
     knl_t_mot_context mot_cxt;
+    knl_t_bgworker_context bgworker_cxt;
 } knl_thrd_context;
 
 extern void knl_thread_mot_init();
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 3ca70ddc20..6d73c6960d 100755
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -28,6 +28,8 @@
 #include "pgtime.h" /* for pg_time_t */
 #include "libpq/libpq-be.h"
 
+#define InvalidPid ((ThreadId)(-1))
+
 #define PG_BACKEND_VERSIONSTR "gaussdb " DEF_GS_VERSION "\n"
 
 /*****************************************************************************
@@ -129,6 +131,7 @@ extern bool InplaceUpgradePrecommit;
 
 extern THR_LOCAL PGDLLIMPORT bool IsUnderPostmaster;
 extern THR_LOCAL PGDLLIMPORT char my_exec_path[];
+extern THR_LOCAL PGDLLIMPORT bool IsBackgroundWorker;
 
 #define MAX_QUERY_DOP (64)
 #define MIN_QUERY_DOP -(MAX_QUERY_DOP)
@@ -232,7 +235,7 @@ extern bool InLocalUserIdChange(void);
 extern bool InSecurityRestrictedOperation(void);
 extern void GetUserIdAndContext(Oid* userid, bool* sec_def_context);
 extern void SetUserIdAndContext(Oid userid, bool sec_def_context);
-extern void InitializeSessionUserId(const char* rolename);
+extern void InitializeSessionUserId(const char* rolename, Oid role_id);
 extern void InitializeSessionUserIdStandalone(void);
 extern void SetSessionAuthorization(Oid userid, bool is_superuser);
 extern Oid GetCurrentRoleId(void);
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
new file mode 100644
index 0000000000..2c3d5c6e9d
--- /dev/null
+++ b/src/include/postmaster/bgworker.h
@@ -0,0 +1,157 @@
+/* --------------------------------------------------------------------
+ * bgworker.h
+ *      POSTGRES pluggable background workers interface
+ *
+ * A background worker is a process able to run arbitrary, user-supplied code,
+ * including normal transactions.
+ *
+ * Any external module loaded via shared_preload_libraries can register a
+ * worker.  Workers can also be registered dynamically at runtime.  In either
+ * case, the worker process is forked from the postmaster and runs the
+ * user-supplied "main" function.  This code may connect to a database and
+ * run transactions.  Workers can remain active indefinitely, but will be
+ * terminated if a shutdown or crash occurs.
+ *
+ * If the fork() call fails in the postmaster, it will try again later.  Note
+ * that the failure can only be transient (fork failure due to high load,
+ * memory pressure, too many processes, etc); more permanent problems, like
+ * failure to connect to a database, are detected later in the worker and dealt
+ * with just by having the worker exit normally. A worker which exits with
+ * a return code of 0 will never be restarted and will be removed from worker
+ * list. A worker which exits with a return code of 1 will be restarted after
+ * the configured restart interval (unless that interval is BGW_NEVER_RESTART).
+ * The TerminateBackgroundWorker() function can be used to terminate a
+ * dynamically registered background worker; the worker will be sent a SIGTERM
+ * and will not be restarted after it exits.  Whenever the postmaster knows
+ * that a worker will not be restarted, it unregisters the worker, freeing up
+ * that worker's slot for use by a new worker.
+ *
+ * Note that there might be more than one worker in a database concurrently,
+ * and the same module may request more than one worker running the same (or
+ * different) code.
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *      src/include/postmaster/bgworker.h
+ * --------------------------------------------------------------------
+ */
+#include "gs_thread.h"
+#ifndef BGWORKER_H
+#define BGWORKER_H
+
+/*---------------------------------------------------------------------
+ * External module API.
+ *---------------------------------------------------------------------
+ */
+
+/*
+ * Pass this flag to have your worker be able to connect to shared memory.
+ */
+#define BGWORKER_SHMEM_ACCESS                       0x0001
+
+/*
+ * This flag means the bgworker requires a database connection.  The connection
+ * is not established automatically; the worker must establish it later.
+ * It requires that BGWORKER_SHMEM_ACCESS was passed too.
+ */
+#define BGWORKER_BACKEND_DATABASE_CONNECTION        0x0002
+
+/*
+ * This class is used internally for parallel queries, to keep track of the
+ * number of active parallel workers and make sure we never launch more than
+ * max_parallel_workers parallel workers at the same time.  Third party
+ * background workers should not use this class.
+ */
+#define BGWORKER_CLASS_PARALLEL                 0x0010
+
+/* add additional bgworker classes here */
+
+typedef void (*bgworker_main_type) (Datum main_arg);
+
+/*
+ * Points in time at which a bgworker can request to be started
+ */
+typedef enum {
+    BgWorkerStart_PostmasterStart,
+    BgWorkerStart_ConsistentState,
+    BgWorkerStart_RecoveryFinished
+} BgWorkerStartTime;
+
+#define BGW_DEFAULT_RESTART_INTERVAL    60
+#define BGW_NEVER_RESTART               -1
+#define BGW_MAXLEN                      96
+#define BGW_EXTRALEN                    128
+
+typedef struct BackgroundWorker {
+    char        bgw_name[BGW_MAXLEN];
+    char        bgw_type[BGW_MAXLEN];
+    int         bgw_flags;
+    BgWorkerStartTime bgw_start_time;
+    int         bgw_restart_time; /* in seconds, or BGW_NEVER_RESTART */
+    char        bgw_library_name[BGW_MAXLEN];
+    char        bgw_function_name[BGW_MAXLEN];
+    Datum       bgw_main_arg;
+    char        bgw_extra[BGW_EXTRALEN];
+    ThreadId    bgw_notify_pid; /* SIGUSR1 this backend on start/stop */
+} BackgroundWorker;
+
+typedef enum BgwHandleStatus {
+    BGWH_STARTED,               /* worker is running */
+    BGWH_NOT_YET_STARTED,       /* worker hasn't been started yet */
+    BGWH_STOPPED,               /* worker has exited */
+    BGWH_POSTMASTER_DIED        /* postmaster died; worker status unclear */
+} BgwHandleStatus;
+
+struct BackgroundWorkerHandle;
+typedef struct BackgroundWorkerHandle BackgroundWorkerHandle;
+struct BackgroundWorkerArray;
+
+/* Register a new bgworker during shared_preload_libraries */
+extern void RegisterBackgroundWorker(BackgroundWorker *worker);
+
+/* Register a new bgworker from a regular backend */
+extern bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
+    BackgroundWorkerHandle **handle);
+
+/* Query the status of a bgworker */
+extern BgwHandleStatus GetBackgroundWorkerPid(const BackgroundWorkerHandle *handle,
+    ThreadId *pidp);
+extern BgwHandleStatus WaitForBackgroundWorkerStartup(const BackgroundWorkerHandle *handle, ThreadId *pid);
+extern BgwHandleStatus WaitForBackgroundWorkerShutdown(const BackgroundWorkerHandle *handle);
+extern const char *GetBackgroundWorkerTypeByPid(ThreadId pid);
+
+/* Terminate a bgworker */
+extern void TerminateBackgroundWorker(const BackgroundWorkerHandle *handle);
+
+/*
+ * Connect to the specified database, as the specified user.  Only a worker
+ * that passed BGWORKER_BACKEND_DATABASE_CONNECTION during registration may
+ * call this.
+ *
+ * If username is NULL, bootstrapping superuser is used.
+ * If dbname is NULL, connection is made to no specific database;
+ * only shared catalogs can be accessed.
+ */
+extern void BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags = 1);
+
+/* Just like the above, but specifying database and user by OID. */
+extern void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags = 1);
+
+/*
+ * Flags to BackgroundWorkerInitializeConnection et al
+ *
+ *
+ * Allow bypassing datallowconn restrictions when connecting to database
+ */
+#define BGWORKER_BYPASS_ALLOWCONN 1
+
+
+/* Block/unblock signals in a background worker process */
+extern void BackgroundWorkerBlockSignals(void);
+extern void BackgroundWorkerUnblockSignals(void);
+
+#endif                          /* BGWORKER_H */
+
diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
new file mode 100644
index 0000000000..fa57bdc4da
--- /dev/null
+++ b/src/include/postmaster/bgworker_internals.h
@@ -0,0 +1,64 @@
+/* --------------------------------------------------------------------
+ * bgworker_internals.h
+ *      POSTGRES pluggable background workers internals
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *      src/include/postmaster/bgworker_internals.h
+ * --------------------------------------------------------------------
+ */
+#ifndef BGWORKER_INTERNALS_H
+#define BGWORKER_INTERNALS_H
+
+#include "datatype/timestamp.h"
+#include "lib/ilist.h"
+#include "postmaster/bgworker.h"
+
+/*
+ * Maximum possible value of parallel workers.
+ */
+#define MAX_PARALLEL_WORKER_LIMIT 1024
+
+struct BackgroundWorkerSlot;
+
+/*
+ * List of background workers, private to postmaster.
+ *
+ * A worker that requests a database connection during registration will have
+ * rw_backend set, and will be present in BackendList.  Note: do not rely on
+ * rw_backend being non-NULL for shmem-connected workers!
+ */
+typedef struct RegisteredBgWorker {
+    BackgroundWorker rw_worker; /* its registry entry */
+    struct Backend *rw_backend; /* its BackendList entry, or NULL */
+    ThreadId        rw_pid;     /* 0 if not running */
+    int         rw_child_slot;
+    TimestampTz rw_crashed_at;  /* if not 0, time it last crashed */
+    int         rw_shmem_slot;
+    bool        rw_terminate;
+    slist_node  rw_lnode;       /* list link */
+} RegisteredBgWorker;
+
+extern THR_LOCAL slist_head BackgroundWorkerList;
+
+extern Size BackgroundWorkerShmemSize(void);
+extern void BackgroundWorkerShmemInit(void);
+extern void BackgroundWorkerStateChange(void);
+extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
+extern void ReportBackgroundWorkerPID(const RegisteredBgWorker *);
+extern void ReportBackgroundWorkerExit(slist_mutable_iter *cur);
+extern void BackgroundWorkerStopNotifications(ThreadId pid);
+extern void ResetBackgroundWorkerCrashTimes(void);
+
+/* Function to start a background worker, called from postmaster.c */
+extern void StartBackgroundWorker(void* bgWorkerSlotShmAddr) ;
+
+#ifdef EXEC_BACKEND
+extern void* GetBackgroundWorkerShmAddr(int slotno);
+extern BackgroundWorker *BackgroundWorkerEntry(const BackgroundWorkerSlot* bgWorkerSlotShmAddr);
+#endif
+
+#endif                          /* BGWORKER_INTERNALS_H */
+
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 6c2ec7c0f1..5070a79961 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -249,4 +249,5 @@ extern void GenerateCancelKey(bool isThreadPoolSession);
 extern bool SignalCancelAllBackEnd();
 extern bool IsLocalAddr(Port* port);
 extern uint64_t mc_timers_us(void);
+extern bool PostmasterMarkPIDForWorkerNotify(ThreadId);
 #endif /* _POSTMASTER_H */
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
index 07e213fac6..e78bebb203 100755
--- a/src/include/storage/pmsignal.h
+++ b/src/include/storage/pmsignal.h
@@ -43,6 +43,7 @@ typedef enum {
     PMSIGNAL_ROLLBACK_STANDBY_PROMOTE, /* roll back standby promoting */
     PMSIGNAL_START_PAGE_WRITER,        /* start a new page writer thread */
     PMSIGNAL_START_THREADPOOL_WORKER,  /* start thread pool woker */
+    PMSIGNAL_BACKGROUND_WORKER_CHANGE, /* background worker state change */
     NUM_PMSIGNALS                      /* Must be last value of enum! */
 } PMSignalReason;
 
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index b0a14ae3e5..e76d80d863 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -304,6 +304,8 @@ typedef struct PROC_HDR {
     PGPROC* freeProcs;
     /* Head of list of autovacuum's free PGPROC structures */
     PGPROC* autovacFreeProcs;
+    /* Head of list of bgworker free PGPROC structures */
+    PGPROC* bgworkerFreeProcs;
     /* Head of list of pg_job's free PGPROC structures */
     PGPROC* pgjobfreeProcs;
     /* First pgproc waiting for group XID clear */
diff --git a/src/include/threadpool/threadpool_worker.h b/src/include/threadpool/threadpool_worker.h
index 7ec2cf6479..2c22e5ce08 100755
--- a/src/include/threadpool/threadpool_worker.h
+++ b/src/include/threadpool/threadpool_worker.h
@@ -54,6 +54,7 @@ typedef struct Backend {
     bool is_autovacuum; /* is it an autovacuum process? */
     volatile bool dead_end; /* is it going to send an quit? */
     volatile int flag;
+    bool bgworker_notify; /* gets bgworker start/stop notifications */
     Dlelem elem; /* list link in BackendList */
 } Backend;
 
diff --git a/src/include/utils/postinit.h b/src/include/utils/postinit.h
index 2525a8fecc..dccc83079d 100755
--- a/src/include/utils/postinit.h
+++ b/src/include/utils/postinit.h
@@ -54,7 +54,7 @@ public:
 
     ~PostgresInitializer();
 
-    void SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username);
+    void SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username, Oid useroid = InvalidOid);
 
     void GetDatabaseName(char* out_dbname);
 
@@ -91,6 +91,8 @@ public:
 
     const char* m_username;
 
+    Oid m_useroid;
+
 private:
     void InitThread();
 
-- 
Gitee


From 4ba086877c61fa7e069f42e0365f4d2200e5352f Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Tue, 25 Aug 2020 21:27:33 +0800
Subject: [PATCH 2/6] add message queue from PG

---
 src/common/backend/libpq/Makefile      |    2 +-
 src/common/backend/libpq/pqcomm.cpp    |   38 +-
 src/common/backend/libpq/pqformat.cpp  |   28 +
 src/common/backend/libpq/pqmq.cpp      |  270 ++++++
 src/gausskernel/storage/ipc/Makefile   |    2 +-
 src/gausskernel/storage/ipc/shm_mq.cpp | 1158 ++++++++++++++++++++++++
 src/include/libpq/libpq.h              |   40 +-
 src/include/libpq/pqformat.h           |    1 +
 src/include/libpq/pqmq.h               |   25 +
 src/include/storage/procsignal.h       |    1 +
 src/include/storage/shm_mq.h           |   82 ++
 11 files changed, 1625 insertions(+), 22 deletions(-)
 create mode 100644 src/common/backend/libpq/pqmq.cpp
 create mode 100644 src/gausskernel/storage/ipc/shm_mq.cpp
 create mode 100644 src/include/libpq/pqmq.h
 create mode 100644 src/include/storage/shm_mq.h

diff --git a/src/common/backend/libpq/Makefile b/src/common/backend/libpq/Makefile
index cfab9c855a..3969dd94a8 100644
--- a/src/common/backend/libpq/Makefile
+++ b/src/common/backend/libpq/Makefile
@@ -23,6 +23,6 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o sha2.o pqcomm.o \
-       pqformat.o pqsignal.o
+       pqformat.o pqsignal.o pqmq.o
 
 include $(top_srcdir)/src/gausskernel/common.mk
diff --git a/src/common/backend/libpq/pqcomm.cpp b/src/common/backend/libpq/pqcomm.cpp
index 45e28cf246..b6190904df 100644
--- a/src/common/backend/libpq/pqcomm.cpp
+++ b/src/common/backend/libpq/pqcomm.cpp
@@ -145,6 +145,28 @@ static int Lock_AF_UNIX(unsigned short portNumber, const char* unixSocketName, b
 static int Setup_AF_UNIX(bool is_create_psql_sock);
 #endif /* HAVE_UNIX_SOCKETS */
 
+static void socket_comm_reset(void);
+static int  socket_flush(void);
+static int  socket_flush_if_writable(void);
+static bool socket_is_send_pending(void);
+static int  socket_putmessage(char msgtype, const char *s, size_t len);
+static int socket_putmessage_noblock(char msgtype, const char *s, size_t len);
+static void socket_startcopyout(void);
+static void socket_endcopyout(bool errorAbort);
+
+static PQcommMethods PqCommSocketMethods = {
+    socket_comm_reset,
+    socket_flush,
+    socket_flush_if_writable,
+    socket_is_send_pending,
+    socket_putmessage,
+    socket_putmessage_noblock,
+    socket_startcopyout,
+    socket_endcopyout	
+};
+
+THR_LOCAL PQcommMethods *PqCommMethods = &PqCommSocketMethods;
+
 extern bool FencedUDFMasterMode;
 
 /* --------------------------------
@@ -458,7 +480,7 @@ void pq_init(void)
  * inside a pqcomm.c routine (which ideally will never happen, but...)
  * --------------------------------
  */
-void pq_comm_reset(void)
+static void socket_comm_reset(void)
 {
     /* Do not throw away pending data, but do reset the busy flag */
     t_thrd.libpq_cxt.PqCommBusy = false;
@@ -1612,7 +1634,7 @@ static int internal_putbytes(const char* s, size_t len)
  *		returns 0 if OK, EOF if trouble
  * --------------------------------
  */
-int pq_flush(void)
+static int socket_flush(void)
 {
     int res = 0;
 
@@ -1769,7 +1791,7 @@ static int internal_flush(void)
  * Returns 0 if OK, or EOF if trouble.
  * --------------------------------
  */
-int pq_flush_if_writable(void)
+static int socket_flush_if_writable(void)
 {
     int res;
 
@@ -1868,7 +1890,7 @@ void pq_flush_timedwait(int timeout)
  *		pq_is_send_pending	- is there any pending data in the output buffer?
  * --------------------------------
  */
-bool pq_is_send_pending(void)
+static bool socket_is_send_pending(void)
 {
     return (t_thrd.libpq_cxt.PqSendStart < t_thrd.libpq_cxt.PqSendPointer);
 }
@@ -1905,7 +1927,7 @@ bool pq_is_send_pending(void)
  *		returns 0 if OK, EOF if trouble
  * --------------------------------
  */
-int pq_putmessage(char msgtype, const char* s, size_t len)
+static int socket_putmessage(char msgtype, const char* s, size_t len)
 {
     if (t_thrd.libpq_cxt.DoingCopyOut || t_thrd.libpq_cxt.PqCommBusy) {
         return 0;
@@ -1941,7 +1963,7 @@ fail:
  *		If the output buffer is too small to hold the message, the buffer
  *		is enlarged.
  */
-int pq_putmessage_noblock(char msgtype, const char* s, size_t len)
+static int socket_putmessage_noblock(char msgtype, const char* s, size_t len)
 {
     int res;
     int required;
@@ -1967,7 +1989,7 @@ int pq_putmessage_noblock(char msgtype, const char* s, size_t len)
  *			is beginning
  * --------------------------------
  */
-void pq_startcopyout(void)
+static void socket_startcopyout(void)
 {
     t_thrd.libpq_cxt.DoingCopyOut = true;
 }
@@ -1982,7 +2004,7 @@ void pq_startcopyout(void)
  *		not allow binary transfers, so a textual terminator is always correct.
  * --------------------------------
  */
-void pq_endcopyout(bool errorAbort)
+static void socket_endcopyout(bool errorAbort)
 {
     if (!t_thrd.libpq_cxt.DoingCopyOut) {
         return;
diff --git a/src/common/backend/libpq/pqformat.cpp b/src/common/backend/libpq/pqformat.cpp
index 0828753959..3509790d09 100644
--- a/src/common/backend/libpq/pqformat.cpp
+++ b/src/common/backend/libpq/pqformat.cpp
@@ -610,6 +610,34 @@ const char* pq_getmsgstring(StringInfo msg)
     return pg_client_to_server(str, slen);
 }
 
+/* --------------------------------
+ *		pq_getmsgrawstring - get a null-terminated text string - NO conversion
+ *
+ *		Returns a pointer directly into the message buffer.
+ * --------------------------------
+ */
+const char *pq_getmsgrawstring(StringInfo msg)
+{
+    char *str;
+    int slen;
+
+    str = &msg->data[msg->cursor];
+
+	/*
+	 * It's safe to use strlen() here because a StringInfo is guaranteed to
+	 * have a trailing null byte.  But check we found a null inside the
+	 * message.
+	 */
+    slen = strlen(str);
+    if (msg->cursor + slen >= msg->len)
+        ereport(ERROR,
+            (errcode(ERRCODE_PROTOCOL_VIOLATION),
+			    errmsg("invalid string in message")));
+    msg->cursor += slen + 1;
+
+    return str;
+}
+
 /* --------------------------------
  *		pq_getmsgend	- verify message fully consumed
  * --------------------------------
diff --git a/src/common/backend/libpq/pqmq.cpp b/src/common/backend/libpq/pqmq.cpp
new file mode 100644
index 0000000000..274c285a0c
--- /dev/null
+++ b/src/common/backend/libpq/pqmq.cpp
@@ -0,0 +1,270 @@
+/*-------------------------------------------------------------------------
+ *
+ * pqmq.cpp
+ *    Use the frontend/backend protocol for communication over a shm_mq
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *  src/common/backend/libpq/pqmq.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "libpq/pqmq.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+
+static THR_LOCAL shm_mq *pq_mq;
+static THR_LOCAL shm_mq_handle *pq_mq_handle;
+static THR_LOCAL bool pq_mq_busy = false;
+static THR_LOCAL ThreadId pq_mq_parallel_master_pid = 0;
+static THR_LOCAL BackendId pq_mq_parallel_master_backend_id = InvalidBackendId;
+
+static void mq_comm_reset(void);
+static int  mq_flush(void);
+static int  mq_flush_if_writable(void);
+static bool mq_is_send_pending(void);
+static int  mq_putmessage(char msgtype, const char *s, size_t len);
+static int mq_putmessage_noblock(char msgtype, const char *s, size_t len);
+static void mq_startcopyout(void);
+static void mq_endcopyout(bool errorAbort);
+
+static THR_LOCAL PQcommMethods PqCommMqMethods = {
+    mq_comm_reset,
+    mq_flush,
+    mq_flush_if_writable,
+    mq_is_send_pending,
+    mq_putmessage,
+    mq_putmessage_noblock,
+    mq_startcopyout,
+    mq_endcopyout
+};
+
+static THR_LOCAL PQcommMethods *save_PqCommMethods;
+static THR_LOCAL CommandDest save_whereToSendOutput;
+static THR_LOCAL ProtocolVersion save_FrontendProtocol;
+
+/*
+ * Arrange to redirect frontend/backend protocol messages to a message queue.
+ */
+void pq_redirect_to_shm_mq(shm_mq_handle *mqh)
+{
+	save_PqCommMethods = PqCommMethods;
+	save_whereToSendOutput = CommandDest(t_thrd.postgres_cxt.whereToSendOutput);
+	save_FrontendProtocol = FrontendProtocol;
+
+	PqCommMethods = &PqCommMqMethods;
+	pq_mq_handle = mqh;
+	t_thrd.postgres_cxt.whereToSendOutput = static_cast<int>(DestRemote);
+	FrontendProtocol = PG_PROTOCOL_LATEST;
+}
+
+void pq_stop_redirect_to_shm_mq(void)
+{
+    PqCommMethods = save_PqCommMethods;
+    t_thrd.postgres_cxt.whereToSendOutput = static_cast<int>(save_whereToSendOutput);
+    FrontendProtocol = save_FrontendProtocol;
+    pq_mq = NULL;
+    pq_mq_handle = NULL;
+}
+
+/*
+ * Arrange to SendProcSignal() to the parallel master each time we transmit
+ * message data via the shm_mq.
+ */
+void pq_set_parallel_master(ThreadId pid, BackendId backend_id)
+{
+    Assert(PqCommMethods == &PqCommMqMethods);
+    pq_mq_parallel_master_pid = pid;
+    pq_mq_parallel_master_backend_id = backend_id;
+}
+
+static void mq_comm_reset(void)
+{
+    /* Nothing to do. */
+}
+
+static int mq_flush(void)
+{
+    /* Nothing to do. */
+    return 0;
+}
+
+static int mq_flush_if_writable(void)
+{
+    /* Nothing to do. */
+    return 0;
+}
+
+static bool mq_is_send_pending(void)
+{
+    /* There's never anything pending. */
+    return false;
+}
+
+/*
+ * Transmit a libpq protocol message to the shared memory message queue
+ * selected via pq_mq_handle.  We don't include a length word, because the
+ * receiver will know the length of the message from shm_mq_receive().
+ */
+static int mq_putmessage(char msgtype, const char *s, size_t len)
+{
+    shm_mq_iovec iov[2];
+    shm_mq_result result;
+
+    /*
+     * If we're sending a message, and we have to wait because the queue is
+     * full, and then we get interrupted, and that interrupt results in trying
+     * to send another message, we respond by detaching the queue.  There's no
+     * way to return to the original context, but even if there were, just
+     * queueing the message would amount to indefinitely postponing the
+     * response to the interrupt.  So we do this instead.
+     */
+    if (pq_mq_busy) {
+        if (pq_mq_handle != NULL)
+            shm_mq_detach(pq_mq_handle);
+        pq_mq_handle = NULL;
+        return EOF;
+    }
+
+    /*
+     * If the message queue is already gone, just ignore the message. This
+     * doesn't necessarily indicate a problem; for example, DEBUG messages can
+     * be generated late in the shutdown sequence, after all DSMs have already
+     * been detached.
+     */
+    if (pq_mq_handle == NULL)
+        return 0;
+
+    pq_mq_busy = true;
+
+    iov[0].data = &msgtype;
+    iov[0].len = 1;
+    iov[1].data = s;
+    iov[1].len = len;
+
+    Assert(pq_mq_handle != NULL);
+
+    for (;;) {
+        result = shm_mq_sendv(pq_mq_handle, iov, 2, true);
+
+        if (pq_mq_parallel_master_pid != 0)
+            (void)SendProcSignal(pq_mq_parallel_master_pid,PROCSIG_PARALLEL_MESSAGE,
+                                 pq_mq_parallel_master_backend_id);
+
+        if (result != SHM_MQ_WOULD_BLOCK)
+            break;
+
+        (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0);
+        ResetLatch(&t_thrd.proc->procLatch);
+        CHECK_FOR_INTERRUPTS();
+    }
+
+    pq_mq_busy = false;
+
+    Assert(result == SHM_MQ_SUCCESS || result == SHM_MQ_DETACHED);
+    if (result != SHM_MQ_SUCCESS)
+        return EOF;
+    return 0;
+}
+
+static int mq_putmessage_noblock(char msgtype, const char *s, size_t len)
+{
+    /*
+     * While the shm_mq machinery does support sending a message in
+     * non-blocking mode, there's currently no way to try sending beginning to
+     * send the message that doesn't also commit us to completing the
+     * transmission.  This could be improved in the future, but for now we
+     * don't need it.
+     */
+    elog(ERROR, "not currently supported");
+    return 0;
+}
+
+static void mq_startcopyout(void)
+{
+    /* Nothing to do. */
+}
+
+static void mq_endcopyout(bool errorAbort)
+{
+    /* Nothing to do. */
+}
+
+/*
+ * Parse an ErrorResponse or NoticeResponse payload and populate an ErrorData
+ * structure with the results.
+ */
+void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
+{
+    /* Initialize edata with reasonable defaults. */
+    errno_t rc = memset_s(edata, sizeof(ErrorData), 0, sizeof(ErrorData));
+    securec_check(rc, "\0", "\0");
+    edata->elevel = ERROR;
+
+    /* Loop over fields and extract each one. */
+    for (;;) {
+        char code = pq_getmsgbyte(msg);
+        const char *value = NULL;
+
+        if (code == '\0') {
+            pq_getmsgend(msg);
+            break;
+        }
+        value = pq_getmsgrawstring(msg);
+
+        switch (code) {
+            case PG_DIAG_SEVERITY:
+                /* ignore, trusting we'll get a nonlocalized version */
+                break;
+            case PG_DIAG_SQLSTATE:
+                if (strlen(value) != 5) {
+                    elog(ERROR, "invalid SQLSTATE: \"%s\"", value);
+                }
+                edata->sqlerrcode = MAKE_SQLSTATE(value[0], value[1], value[2],
+                                                  value[3], value[4]);
+                break;
+            case PG_DIAG_MESSAGE_PRIMARY:
+                edata->message = pstrdup(value);
+                break;
+            case PG_DIAG_MESSAGE_DETAIL:
+                edata->detail = pstrdup(value);
+                break;
+            case PG_DIAG_MESSAGE_HINT:
+                edata->hint = pstrdup(value);
+                break;
+            case PG_DIAG_STATEMENT_POSITION:
+                edata->cursorpos = pg_atoi(const_cast<char*>(value), sizeof(int), '\0');
+                break;
+            case PG_DIAG_INTERNAL_POSITION:
+                edata->internalpos = pg_atoi(const_cast<char*>(value), sizeof(int), '\0');
+                break;
+            case PG_DIAG_INTERNAL_QUERY:
+                edata->internalquery = pstrdup(value);
+                break;
+            case PG_DIAG_CONTEXT:
+                edata->context = pstrdup(value);
+                break;
+            case PG_DIAG_SOURCE_FILE:
+                edata->filename = pstrdup(value);
+                break;
+            case PG_DIAG_SOURCE_LINE:
+                edata->lineno = pg_atoi(const_cast<char*>(value), sizeof(int), '\0');
+                break;
+            case PG_DIAG_SOURCE_FUNCTION:
+                edata->funcname = pstrdup(value);
+                break;
+            default:
+                elog(ERROR, "unrecognized error field code: %d", (int) code);
+                break;
+        }
+    }
+}
+
diff --git a/src/gausskernel/storage/ipc/Makefile b/src/gausskernel/storage/ipc/Makefile
index 5ce67f7673..010ea8de9a 100644
--- a/src/gausskernel/storage/ipc/Makefile
+++ b/src/gausskernel/storage/ipc/Makefile
@@ -17,6 +17,6 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
-	sinval.o sinvaladt.o standby.o
+	sinval.o sinvaladt.o standby.o shm_mq.o
 
 include $(top_srcdir)/src/gausskernel/common.mk
diff --git a/src/gausskernel/storage/ipc/shm_mq.cpp b/src/gausskernel/storage/ipc/shm_mq.cpp
new file mode 100644
index 0000000000..7c731336b3
--- /dev/null
+++ b/src/gausskernel/storage/ipc/shm_mq.cpp
@@ -0,0 +1,1158 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_mq.cpp
+ *    single-reader, single-writer message queue
+ *
+ * Both the sender and the receiver must have a PGPROC; their respective
+ * process latches are used for synchronization.  Only the sender may send,
+ * and only the receiver may receive.  This is intended to allow a user
+ * backend to communicate with worker backends that it has registered.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/gausskernel/storage/ipc/shm_mq.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
+#include "storage/procsignal.h"
+#include "storage/shm_mq.h"
+#include "storage/spin.h"
+#include "gs_threadlocal.h"
+#include "gs_thread.h"
+
+/*
+ * This structure represents the actual queue.
+ *
+ * Some notes on synchronization:
+ *
+ * mq_receiver and mq_bytes_read can only be changed by the receiver; and
+ * mq_sender and mq_bytes_written can only be changed by the sender.
+ * mq_receiver and mq_sender are protected by mq_mutex, although, importantly,
+ * they cannot change once set, and thus may be read without a lock once this
+ * is known to be the case.
+ *
+ * mq_bytes_read and mq_bytes_written are not protected by the mutex.  Instead,
+ * they are written atomically using 8 byte loads and stores.  Memory barriers
+ * must be carefully used to synchronize reads and writes of these values with
+ * reads and writes of the actual data in mq_ring.
+ *
+ * mq_detached needs no locking.  It can be set by either the sender or the
+ * receiver, but only ever from false to true, so redundant writes don't
+ * matter.  It is important that if we set mq_detached and then set the
+ * counterparty's latch, the counterparty must be certain to see the change
+ * after waking up.  Since SetLatch begins with a memory barrier and ResetLatch
+ * ends with one, this should be OK.
+ *
+ * mq_ring_size and mq_ring_offset never change after initialization, and
+ * can therefore be read without the lock.
+ *
+ * Importantly, mq_ring can be safely read and written without a lock.
+ * At any given time, the difference between mq_bytes_read and
+ * mq_bytes_written defines the number of bytes within mq_ring that contain
+ * unread data, and mq_bytes_read defines the position where those bytes
+ * begin.  The sender can increase the number of unread bytes at any time,
+ * but only the receiver can give license to overwrite those bytes, by
+ * incrementing mq_bytes_read.  Therefore, it's safe for the receiver to read
+ * the unread bytes it knows to be present without the lock.  Conversely,
+ * the sender can write to the unused portion of the ring buffer without
+ * the lock, because nobody else can be reading or writing those bytes.  The
+ * receiver could be making more bytes unused by incrementing mq_bytes_read,
+ * but that's OK.  Note that it would be unsafe for the receiver to read any
+ * data it's already marked as read, or to write any data; and it would be
+ * unsafe for the sender to reread any data after incrementing
+ * mq_bytes_written, but fortunately there's no need for any of that.
+ */
+struct shm_mq {
+    slock_t     mq_mutex;
+    PGPROC     *mq_receiver;
+    PGPROC     *mq_sender;
+    pg_atomic_uint64 mq_bytes_read;
+    pg_atomic_uint64 mq_bytes_written;
+    Size        mq_ring_size;
+    bool        mq_detached;
+    uint8       mq_ring_offset;
+    char        mq_ring[FLEXIBLE_ARRAY_MEMBER];
+};
+
+/*
+ * This structure is a backend-private handle for access to a queue.
+ *
+ * mqh_queue is a pointer to the queue we've attached.
+ *
+ * If this queue is intended to connect the current process with a background
+ * worker that started it, the user can pass a pointer to the worker handle
+ * to shm_mq_attach(), and we'll store it in mqh_handle.  The point of this
+ * is to allow us to begin sending to or receiving from that queue before the
+ * process we'll be communicating with has even been started.  If it fails
+ * to start, the handle will allow us to notice that and fail cleanly, rather
+ * than waiting forever; see shm_mq_wait_internal.  This is mostly useful in
+ * simple cases - e.g. where there are just 2 processes communicating; in
+ * more complex scenarios, every process may not have a BackgroundWorkerHandle
+ * available, or may need to watch for the failure of more than one other
+ * process at a time.
+ *
+ * When a message exists as a contiguous chunk of bytes in the queue - that is,
+ * it is smaller than the size of the ring buffer and does not wrap around
+ * the end - we return the message to the caller as a pointer into the buffer.
+ * For messages that are larger or happen to wrap, we reassemble the message
+ * locally by copying the chunks into a backend-local buffer.  mqh_buffer is
+ * the buffer, and mqh_buflen is the number of bytes allocated for it.
+ *
+ * mqh_partial_bytes, mqh_expected_bytes, and mqh_length_word_complete
+ * are used to track the state of non-blocking operations.  When the caller
+ * attempts a non-blocking operation that returns SHM_MQ_WOULD_BLOCK, they
+ * are expected to retry the call at a later time with the same argument;
+ * we need to retain enough state to pick up where we left off.
+ * mqh_length_word_complete tracks whether we are done sending or receiving
+ * (whichever we're doing) the entire length word.  mqh_partial_bytes tracks
+ * the number of bytes read or written for either the length word or the
+ * message itself, and mqh_expected_bytes - which is used only for reads -
+ * tracks the expected total size of the payload.
+ *
+ * mqh_counterparty_attached tracks whether we know the counterparty to have
+ * attached to the queue at some previous point.  This lets us avoid some
+ * mutex acquisitions.
+ *
+ * mqh_context is the memory context in effect at the time we attached to
+ * the shm_mq.  The shm_mq_handle itself is allocated in this context, and
+ * we make sure any other allocations we do happen in this context as well,
+ * to avoid nasty surprises.
+ */
+struct shm_mq_handle {
+    shm_mq     *mqh_queue;
+    char       *mqh_segment;
+    BackgroundWorkerHandle *mqh_handle;
+    char       *mqh_buffer;
+    Size        mqh_buflen;
+    Size        mqh_consume_pending;
+    Size        mqh_partial_bytes;
+    Size        mqh_expected_bytes;
+    bool        mqh_length_word_complete;
+    bool        mqh_counterparty_attached;
+    MemoryContext mqh_context;
+};
+
+static void shm_mq_detach_internal(shm_mq *mq);
+static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes,
+                                       const void *data, bool nowait, Size *bytes_written);
+static shm_mq_result shm_mq_receive_bytes(shm_mq_handle *mqh,Size bytes_needed, bool nowait,
+                                          Size *nbytesp, void **datap);
+static bool shm_mq_counterparty_gone(shm_mq *mq,
+                                     BackgroundWorkerHandle *handle);
+static bool shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr,
+                                 BackgroundWorkerHandle *handle);
+static void shm_mq_inc_bytes_read(shm_mq *mq, Size n);
+static void shm_mq_inc_bytes_written(shm_mq *mq, Size n);
+
+/* Minimum queue size is enough for header and at least one chunk of data. */
+const Size shm_mq_minimum_size = MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF;
+
+#define MQH_INITIAL_BUFSIZE 8192
+
+/*
+ * Initialize a new shared message queue.
+ */
+shm_mq *shm_mq_create(void *address, Size size)
+{
+    shm_mq *mq = (shm_mq*)address;
+    Size data_offset = MAXALIGN(offsetof(shm_mq, mq_ring));
+
+    /* If the size isn't MAXALIGN'd, just discard the odd bytes. */
+    size = MAXALIGN_DOWN(size);
+
+    /* Queue size must be large enough to hold some data. */
+    Assert(size > data_offset);
+
+    /* Initialize queue header. */
+    SpinLockInit(&mq->mq_mutex);
+    mq->mq_receiver = NULL;
+    mq->mq_sender = NULL;
+    pg_atomic_init_u64(&mq->mq_bytes_read, 0);
+    pg_atomic_init_u64(&mq->mq_bytes_written, 0);
+    mq->mq_ring_size = size - data_offset;
+    mq->mq_detached = false;
+    mq->mq_ring_offset = data_offset - offsetof(shm_mq, mq_ring);
+
+    return mq;
+}
+
+/*
+ * Set the identity of the process that will receive from a shared message
+ * queue.
+ */
+void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
+{
+    PGPROC *sender = NULL;
+
+    SpinLockAcquire(&mq->mq_mutex);
+    Assert(mq->mq_receiver == NULL);
+    mq->mq_receiver = proc;
+    sender = mq->mq_sender;
+    SpinLockRelease(&mq->mq_mutex);
+
+    if (sender != NULL)
+        SetLatch(&sender->procLatch);
+}
+
+/*
+ * Set the identity of the process that will send to a shared message queue.
+ */
+void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
+{
+    PGPROC *receiver = NULL;
+
+    SpinLockAcquire(&mq->mq_mutex);
+    Assert(mq->mq_sender == NULL);
+    mq->mq_sender = proc;
+    receiver = mq->mq_receiver;
+    SpinLockRelease(&mq->mq_mutex);
+
+    if (receiver != NULL)
+        SetLatch(&receiver->procLatch);
+}
+
+/*
+ * Get the configured receiver.
+ */
+PGPROC *shm_mq_get_receiver(shm_mq *mq)
+{
+    PGPROC *receiver = NULL;
+
+    SpinLockAcquire(&mq->mq_mutex);
+    receiver = mq->mq_receiver;
+    SpinLockRelease(&mq->mq_mutex);
+
+    return receiver;
+}
+
+/*
+ * Get the configured sender.
+ */
+PGPROC *shm_mq_get_sender(shm_mq *mq)
+{
+    PGPROC *sender = NULL;
+
+    SpinLockAcquire(&mq->mq_mutex);
+    sender = mq->mq_sender;
+    SpinLockRelease(&mq->mq_mutex);
+
+    return sender;
+}
+
+/*
+ * Attach to a shared message queue so we can send or receive messages.
+ *
+ * The memory context in effect at the time this function is called should
+ * be one which will last for at least as long as the message queue itself.
+ * We'll allocate the handle in that context, and future allocations that
+ * are needed to buffer incoming data will happen in that context as well.
+ *
+ *
+ * If handle != NULL, the queue can be read or written even before the
+ * other process has attached.  We'll wait for it to do so if needed.  The
+ * handle must be for a background worker initialized with bgw_notify_pid
+ * equal to our PID.
+ *
+ * shm_mq_detach() should be called when done.  This will free the
+ * shm_mq_handle and mark the queue itself as detached, so that our
+ * counterpart won't get stuck waiting for us to fill or drain the queue
+ * after we've already lost interest.
+ */
+shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg, BackgroundWorkerHandle *handle)
+{
+    shm_mq_handle *mqh = (shm_mq_handle*)palloc(sizeof(shm_mq_handle));
+
+    Assert(mq->mq_receiver == t_thrd.proc || mq->mq_sender == t_thrd.proc);
+    mqh->mqh_queue = mq;
+    mqh->mqh_segment = seg;
+    mqh->mqh_handle = handle;
+    mqh->mqh_buffer = NULL;
+    mqh->mqh_buflen = 0;
+    mqh->mqh_consume_pending = 0;
+    mqh->mqh_partial_bytes = 0;
+    mqh->mqh_expected_bytes = 0;
+    mqh->mqh_length_word_complete = false;
+    mqh->mqh_counterparty_attached = false;
+    mqh->mqh_context = CurrentMemoryContext;
+
+    return mqh;
+}
+
+/*
+ * Associate a BackgroundWorkerHandle with a shm_mq_handle just as if it had
+ * been passed to shm_mq_attach.
+ */
+void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle)
+{
+    Assert(mqh->mqh_handle == NULL);
+    mqh->mqh_handle = handle;
+}
+
+/*
+ * Write a message into a shared message queue.
+ */
+shm_mq_result shm_mq_send(shm_mq_handle *mqh, Size nbytes, const void *data, bool nowait)
+{
+    shm_mq_iovec iov;
+
+    iov.data = (const char*)data;
+    iov.len = nbytes;
+
+    return shm_mq_sendv(mqh, &iov, 1, nowait);
+}
+
+/*
+ * Write a message into a shared message queue, gathered from multiple
+ * addresses.
+ *
+ * When nowait = false, we'll wait on our process latch when the ring buffer
+ * fills up, and then continue writing once the receiver has drained some data.
+ * The process latch is reset after each wait.
+ *
+ * When nowait = true, we do not manipulate the state of the process latch;
+ * instead, if the buffer becomes full, we return SHM_MQ_WOULD_BLOCK.  In
+ * this case, the caller should call this function again, with the same
+ * arguments, each time the process latch is set.  (Once begun, the sending
+ * of a message cannot be aborted except by detaching from the queue; changing
+ * the length or payload will corrupt the queue.)
+ */
+shm_mq_result shm_mq_sendv(shm_mq_handle *mqh, shm_mq_iovec *iov, int iovcnt, bool nowait)
+{
+    shm_mq_result res;
+    shm_mq     *mq = mqh->mqh_queue;
+    PGPROC     *receiver = NULL;
+    Size        nbytes = 0;
+    Size        bytes_written;
+    int         i;
+    int         which_iov = 0;
+    Size        offset;
+
+    Assert(mq->mq_sender == t_thrd.proc);
+
+    /* Compute total size of write. */
+    for (i = 0; i < iovcnt; ++i)
+        nbytes += iov[i].len;
+
+    /* Try to write, or finish writing, the length word into the buffer. */
+    while (!mqh->mqh_length_word_complete) {
+        Assert(mqh->mqh_partial_bytes < sizeof(Size));
+        res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
+                                ((char *)&nbytes) + mqh->mqh_partial_bytes,
+                                nowait, &bytes_written);
+        if (res == SHM_MQ_DETACHED) {
+            /* Reset state in case caller tries to send another message. */
+            mqh->mqh_partial_bytes = 0;
+            mqh->mqh_length_word_complete = false;
+            return res;
+        }
+        mqh->mqh_partial_bytes += bytes_written;
+
+        if (mqh->mqh_partial_bytes >= sizeof(Size)) {
+            Assert(mqh->mqh_partial_bytes == sizeof(Size));
+
+            mqh->mqh_partial_bytes = 0;
+            mqh->mqh_length_word_complete = true;
+        }
+
+        if (res != SHM_MQ_SUCCESS)
+            return res;
+
+        /* Length word can't be split unless bigger than required alignment. */
+        Assert(mqh->mqh_length_word_complete || sizeof(Size) > MAXIMUM_ALIGNOF);
+    }
+
+    /* Write the actual data bytes into the buffer. */
+    Assert(mqh->mqh_partial_bytes <= nbytes);
+    offset = mqh->mqh_partial_bytes;
+    do {
+        Size chunksize;
+
+        /* Figure out which bytes need to be sent next. */
+        if (offset >= iov[which_iov].len) {
+            offset -= iov[which_iov].len;
+            ++which_iov;
+            if (which_iov >= iovcnt)
+                break;
+            continue;
+        }
+
+        /*
+         * We want to avoid copying the data if at all possible, but every
+         * chunk of bytes we write into the queue has to be MAXALIGN'd, except
+         * the last.  Thus, if a chunk other than the last one ends on a
+         * non-MAXALIGN'd boundary, we have to combine the tail end of its
+         * data with data from one or more following chunks until we either
+         * reach the last chunk or accumulate a number of bytes which is
+         * MAXALIGN'd.
+         */
+        if (which_iov + 1 < iovcnt &&
+            offset + MAXIMUM_ALIGNOF > iov[which_iov].len) {
+            char        tmpbuf[MAXIMUM_ALIGNOF];
+            Size         j = 0;
+
+            for (;;) {
+                if (offset < iov[which_iov].len) {
+                    tmpbuf[j] = iov[which_iov].data[offset];
+                    j++;
+                    offset++;
+                    if (j == MAXIMUM_ALIGNOF)
+                        break;
+                } else {
+                    offset -= iov[which_iov].len;
+                    which_iov++;
+                    if (which_iov >= iovcnt)
+                        break;
+                }
+            }
+
+            res = shm_mq_send_bytes(mqh, j, tmpbuf, nowait, &bytes_written);
+            if (res == SHM_MQ_DETACHED) {
+                /* Reset state in case caller tries to send another message. */
+                mqh->mqh_partial_bytes = 0;
+                mqh->mqh_length_word_complete = false;
+                return res;
+            }
+
+            mqh->mqh_partial_bytes += bytes_written;
+            if (res != SHM_MQ_SUCCESS)
+                return res;
+            continue;
+        }
+
+        /*
+         * If this is the last chunk, we can write all the data, even if it
+         * isn't a multiple of MAXIMUM_ALIGNOF.  Otherwise, we need to
+         * MAXALIGN_DOWN the write size.
+         */
+        chunksize = iov[which_iov].len - offset;
+        if (which_iov + 1 < iovcnt)
+            chunksize = MAXALIGN_DOWN(chunksize);
+        res = shm_mq_send_bytes(mqh, chunksize, &iov[which_iov].data[offset],
+                                nowait, &bytes_written);
+        if (res == SHM_MQ_DETACHED) {
+            /* Reset state in case caller tries to send another message. */
+            mqh->mqh_length_word_complete = false;
+            mqh->mqh_partial_bytes = 0;
+            return res;
+        }
+
+        mqh->mqh_partial_bytes += bytes_written;
+        offset += bytes_written;
+        if (res != SHM_MQ_SUCCESS)
+            return res;
+    } while (mqh->mqh_partial_bytes < nbytes);
+
+    /* Reset for next message. */
+    mqh->mqh_partial_bytes = 0;
+    mqh->mqh_length_word_complete = false;
+
+    /* If queue has been detached, let caller know. */
+    if (mq->mq_detached)
+        return SHM_MQ_DETACHED;
+
+    /*
+     * If the counterparty is known to have attached, we can read mq_receiver
+     * without acquiring the spinlock and assume it isn't NULL.  Otherwise,
+     * more caution is needed.
+     */
+    if (mqh->mqh_counterparty_attached) {
+        receiver = mq->mq_receiver;
+    } else {
+        SpinLockAcquire(&mq->mq_mutex);
+        receiver = mq->mq_receiver;
+        SpinLockRelease(&mq->mq_mutex);
+        if (receiver == NULL)
+            return SHM_MQ_SUCCESS;
+        mqh->mqh_counterparty_attached = true;
+    }
+
+    /* Notify receiver of the newly-written data, and return. */
+    SetLatch(&receiver->procLatch);
+    return SHM_MQ_SUCCESS;
+}
+
+/*
+ * Receive a message from a shared message queue.
+ *
+ * We set *nbytes to the message length and *data to point to the message
+ * payload.  If the entire message exists in the queue as a single,
+ * contiguous chunk, *data will point directly into shared memory; otherwise,
+ * it will point to a temporary buffer.  This mostly avoids data copying in
+ * the hoped-for case where messages are short compared to the buffer size,
+ * while still allowing longer messages.  In either case, the return value
+ * remains valid until the next receive operation is performed on the queue.
+ *
+ * When nowait = false, we'll wait on our process latch when the ring buffer
+ * is empty and we have not yet received a full message.  The sender will
+ * set our process latch after more data has been written, and we'll resume
+ * processing.  Each call will therefore return a complete message
+ * (unless the sender detaches the queue).
+ *
+ * When nowait = true, we do not manipulate the state of the process latch;
+ * instead, whenever the buffer is empty and we need to read from it, we
+ * return SHM_MQ_WOULD_BLOCK.  In this case, the caller should call this
+ * function again after the process latch has been set.
+ */
+shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
+{
+    shm_mq     *mq = mqh->mqh_queue;
+    shm_mq_result res;
+    Size        rb = 0;
+    Size        nbytes;
+    void       *rawdata = NULL;
+
+    Assert(mq->mq_receiver == t_thrd.proc);
+
+    /* We can't receive data until the sender has attached. */
+    if (!mqh->mqh_counterparty_attached) {
+        if (nowait) {
+            int         counterparty_gone;
+
+            /*
+             * We shouldn't return at this point at all unless the sender
+             * hasn't attached yet.  However, the correct return value depends
+             * on whether the sender is still attached.  If we first test
+             * whether the sender has ever attached and then test whether the
+             * sender has detached, there's a race condition: a sender that
+             * attaches and detaches very quickly might fool us into thinking
+             * the sender never attached at all.  So, test whether our
+             * counterparty is definitively gone first, and only afterwards
+             * check whether the sender ever attached in the first place.
+             */
+            counterparty_gone = (int)shm_mq_counterparty_gone(mq, mqh->mqh_handle);
+            if (shm_mq_get_sender(mq) == NULL) {
+                if (counterparty_gone)
+                    return SHM_MQ_DETACHED;
+                else
+                    return SHM_MQ_WOULD_BLOCK;
+            }
+        } else if (!shm_mq_wait_internal(mq, &mq->mq_sender, mqh->mqh_handle)
+                   && shm_mq_get_sender(mq) == NULL) {
+            mq->mq_detached = true;
+            return SHM_MQ_DETACHED;
+        }
+        mqh->mqh_counterparty_attached = true;
+    }
+
+    /*
+     * If we've consumed an amount of data greater than 1/4th of the ring
+     * size, mark it consumed in shared memory.  We try to avoid doing this
+     * unnecessarily when only a small amount of data has been consumed,
+     * because SetLatch() is fairly expensive and we don't want to do it too
+     * often.
+     */
+    if (mqh->mqh_consume_pending > mq->mq_ring_size / 4) {
+        shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending);
+        mqh->mqh_consume_pending = 0;
+    }
+
+    /* Try to read, or finish reading, the length word from the buffer. */
+    while (!mqh->mqh_length_word_complete) {
+        /* Try to receive the message length word. */
+        Assert(mqh->mqh_partial_bytes < sizeof(Size));
+        res = shm_mq_receive_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes,
+                                   nowait, &rb, &rawdata);
+        if (res != SHM_MQ_SUCCESS)
+            return res;
+
+        /*
+         * Hopefully, we'll receive the entire message length word at once.
+         * But if sizeof(Size) > MAXIMUM_ALIGNOF, then it might be split over
+         * multiple reads.
+         */
+        if (mqh->mqh_partial_bytes == 0 && rb >= sizeof(Size)) {
+            Size        needed;
+
+            nbytes = *(Size *)rawdata;
+
+            /* If we've already got the whole message, we're done. */
+            needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes);
+            if (rb >= needed) {
+                mqh->mqh_consume_pending += needed;
+                *nbytesp = nbytes;
+                *datap = ((char *)rawdata) + MAXALIGN(sizeof(Size));
+                return SHM_MQ_SUCCESS;
+            }
+
+            /*
+             * We don't have the whole message, but we at least have the whole
+             * length word.
+             */
+            mqh->mqh_expected_bytes = nbytes;
+            mqh->mqh_length_word_complete = true;
+            mqh->mqh_consume_pending += MAXALIGN(sizeof(Size));
+            rb -= MAXALIGN(sizeof(Size));
+        } else {
+            Size        lengthbytes;
+
+            /* Can't be split unless bigger than required alignment. */
+            Assert(sizeof(Size) > MAXIMUM_ALIGNOF);
+
+            /* Message word is split; need buffer to reassemble. */
+            if (mqh->mqh_buffer == NULL) {
+                mqh->mqh_buffer = (char*)MemoryContextAlloc(mqh->mqh_context,
+                                                            MQH_INITIAL_BUFSIZE);
+                mqh->mqh_buflen = MQH_INITIAL_BUFSIZE;
+            }
+            Assert(mqh->mqh_buflen >= sizeof(Size));
+
+            /* Copy partial length word; remember to consume it. */
+            if (mqh->mqh_partial_bytes + rb > sizeof(Size))
+                lengthbytes = sizeof(Size) - mqh->mqh_partial_bytes;
+            else
+                lengthbytes = rb;
+            errno_t rc = memcpy_s(&mqh->mqh_buffer[mqh->mqh_partial_bytes], lengthbytes,
+                                  rawdata, lengthbytes);
+            securec_check(rc, "\0", "\0");
+            mqh->mqh_partial_bytes += lengthbytes;
+            mqh->mqh_consume_pending += MAXALIGN(lengthbytes);
+            rb -= lengthbytes;
+
+            /* If we now have the whole word, we're ready to read payload. */
+            if (mqh->mqh_partial_bytes >= sizeof(Size)) {
+                Assert(mqh->mqh_partial_bytes == sizeof(Size));
+                mqh->mqh_expected_bytes = *(Size *)mqh->mqh_buffer;
+                mqh->mqh_length_word_complete = true;
+                mqh->mqh_partial_bytes = 0;
+            }
+        }
+    }
+    nbytes = mqh->mqh_expected_bytes;
+
+    if (mqh->mqh_partial_bytes == 0) {
+        /*
+         * Try to obtain the whole message in a single chunk.  If this works,
+         * we need not copy the data and can return a pointer directly into
+         * shared memory.
+         */
+        res = shm_mq_receive_bytes(mqh, nbytes, nowait, &rb, &rawdata);
+        if (res != SHM_MQ_SUCCESS)
+            return res;
+        if (rb >= nbytes) {
+            mqh->mqh_length_word_complete = false;
+            mqh->mqh_consume_pending += MAXALIGN(nbytes);
+            *nbytesp = nbytes;
+            *datap = rawdata;
+            return SHM_MQ_SUCCESS;
+        }
+
+        /*
+         * The message has wrapped the buffer.  We'll need to copy it in order
+         * to return it to the client in one chunk.  First, make sure we have
+         * a large enough buffer available.
+         */
+        if (mqh->mqh_buflen < nbytes) {
+            Size        newbuflen = Max(mqh->mqh_buflen, MQH_INITIAL_BUFSIZE);
+
+            while (newbuflen < nbytes)
+                newbuflen *= 2;
+
+            if (mqh->mqh_buffer != NULL) {
+                pfree(mqh->mqh_buffer);
+                mqh->mqh_buffer = NULL;
+                mqh->mqh_buflen = 0;
+            }
+            mqh->mqh_buffer = (char*)MemoryContextAlloc(mqh->mqh_context, newbuflen);
+            mqh->mqh_buflen = newbuflen;
+        }
+    }
+
+    /* Loop until we've copied the entire message. */
+    for (;;) {
+        Size        still_needed;
+
+        /* Copy as much as we can. */
+        Assert(mqh->mqh_partial_bytes + rb <= nbytes);
+        errno_t rc = memcpy_s(&mqh->mqh_buffer[mqh->mqh_partial_bytes], rb, rawdata, rb);
+        securec_check(rc, "\0", "\0");
+        mqh->mqh_partial_bytes += rb;
+
+        /*
+         * Update count of bytes that can be consumed, accounting for
+         * alignment padding.  Note that this will never actually insert any
+         * padding except at the end of a message, because the buffer size is
+         * a multiple of MAXIMUM_ALIGNOF, and each read and write is as well.
+         */
+        Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb));
+        mqh->mqh_consume_pending += MAXALIGN(rb);
+
+        /* If we got all the data, exit the loop. */
+        if (mqh->mqh_partial_bytes >= nbytes)
+            break;
+
+        /* Wait for some more data. */
+        still_needed = nbytes - mqh->mqh_partial_bytes;
+        res = shm_mq_receive_bytes(mqh, still_needed, nowait, &rb, &rawdata);
+        if (res != SHM_MQ_SUCCESS)
+            return res;
+        if (rb > still_needed)
+            rb = still_needed;
+    }
+
+    /* Return the complete message, and reset for next message. */
+    *nbytesp = nbytes;
+    *datap = mqh->mqh_buffer;
+    mqh->mqh_length_word_complete = false;
+    mqh->mqh_partial_bytes = 0;
+    return SHM_MQ_SUCCESS;
+}
+
+/*
+ * Wait for the other process that's supposed to use this queue to attach
+ * to it.
+ *
+ * The return value is SHM_MQ_DETACHED if the worker has already detached or
+ * if it dies; it is SHM_MQ_SUCCESS if we detect that the worker has attached.
+ * Note that we will only be able to detect that the worker has died before
+ * attaching if a background worker handle was passed to shm_mq_attach().
+ */
+shm_mq_result shm_mq_wait_for_attach(shm_mq_handle *mqh)
+{
+    shm_mq *mq = mqh->mqh_queue;
+    PGPROC **victim;
+
+    if (shm_mq_get_receiver(mq) == t_thrd.proc) {
+        victim = &mq->mq_sender;
+    } else {
+        Assert(shm_mq_get_sender(mq) == t_thrd.proc);
+        victim = &mq->mq_receiver;
+    }
+
+    if (shm_mq_wait_internal(mq, victim, mqh->mqh_handle))
+        return SHM_MQ_SUCCESS;
+    else
+        return SHM_MQ_DETACHED;
+}
+
+/*
+ * Detach from a shared message queue, and destroy the shm_mq_handle.
+ */
+void shm_mq_detach(shm_mq_handle *mqh)
+{
+    /* Notify counterparty that we're outta here. */
+    shm_mq_detach_internal(mqh->mqh_queue);
+
+    /* Release local memory associated with handle. */
+    if (mqh->mqh_buffer != NULL)
+        pfree(mqh->mqh_buffer);
+    pfree(mqh);
+}
+
+/*
+ * Notify counterparty that we're detaching from shared message queue.
+ *
+ * The purpose of this function is to make sure that the process
+ * with which we're communicating doesn't block forever waiting for us to
+ * fill or drain the queue once we've lost interest.  When the sender
+ * detaches, the receiver can read any messages remaining in the queue;
+ * further reads will return SHM_MQ_DETACHED.  If the receiver detaches,
+ * further attempts to send messages will likewise return SHM_MQ_DETACHED.
+ *
+ * This is separated out from shm_mq_detach() because if the on_dsm_detach
+ * callback fires, we only want to do this much.  We do not try to touch
+ * the local shm_mq_handle, as it may have been pfree'd already.
+ */
+static void shm_mq_detach_internal(shm_mq *mq)
+{
+    PGPROC *victim = NULL;
+
+    SpinLockAcquire(&mq->mq_mutex);
+    if (mq->mq_sender == t_thrd.proc) {
+        victim = mq->mq_receiver;
+    } else {
+        Assert(mq->mq_receiver == t_thrd.proc);
+        victim = mq->mq_sender;
+    }
+    mq->mq_detached = true;
+    SpinLockRelease(&mq->mq_mutex);
+
+    if (victim != NULL) {
+        SetLatch(&victim->procLatch);
+    }
+}
+
+/*
+ * Get the shm_mq from handle.
+ */
+shm_mq *shm_mq_get_queue(shm_mq_handle *mqh)
+{
+    return mqh->mqh_queue;
+}
+
+/*
+ * Write bytes into a shared message queue.
+ */
+static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data,
+                                       bool nowait, Size *bytes_written)
+{
+    shm_mq     *mq = mqh->mqh_queue;
+    Size        sent = 0;
+    uint64      used;
+    Size        ringsize = mq->mq_ring_size;
+    Size        available;
+
+    while (sent < nbytes) {
+        uint64      rb;
+        uint64      wb;
+
+        /* Compute number of ring buffer bytes used and available. */
+        rb = pg_atomic_read_u64(&mq->mq_bytes_read);
+        wb = pg_atomic_read_u64(&mq->mq_bytes_written);
+        Assert(wb >= rb);
+        used = wb - rb;
+        Assert(used <= ringsize);
+        available = Min(ringsize - used, nbytes - sent);
+
+        /*
+         * Bail out if the queue has been detached.  Note that we would be in
+         * trouble if the compiler decided to cache the value of
+         * mq->mq_detached in a register or on the stack across loop
+         * iterations.  It probably shouldn't do that anyway since we'll
+         * always return, call an external function that performs a system
+         * call, or reach a memory barrier at some point later in the loop,
+         * but just to be sure, insert a compiler barrier here.
+         */
+        pg_compiler_barrier();
+        if (mq->mq_detached) {
+            *bytes_written = sent;
+            return SHM_MQ_DETACHED;
+        }
+
+        if (available == 0 && !mqh->mqh_counterparty_attached) {
+            /*
+             * The queue is full, so if the receiver isn't yet known to be
+             * attached, we must wait for that to happen.
+             */
+            if (nowait) {
+                if (shm_mq_counterparty_gone(mq, mqh->mqh_handle)) {
+                    *bytes_written = sent;
+                    return SHM_MQ_DETACHED;
+                }
+                if (shm_mq_get_receiver(mq) == NULL) {
+                    *bytes_written = sent;
+                    return SHM_MQ_WOULD_BLOCK;
+                }
+            } else if (!shm_mq_wait_internal(mq, &mq->mq_receiver, mqh->mqh_handle)) {
+                mq->mq_detached = true;
+                *bytes_written = sent;
+                return SHM_MQ_DETACHED;
+            }
+            mqh->mqh_counterparty_attached = true;
+
+            /*
+             * The receiver may have read some data after attaching, so we
+             * must not wait without rechecking the queue state.
+             */
+        } else if (available == 0) {
+            /*
+             * Since mq->mqh_counterparty_attached is known to be true at this
+             * point, mq_receiver has been set, and it can't change once set.
+             * Therefore, we can read it without acquiring the spinlock.
+             */
+            Assert(mqh->mqh_counterparty_attached);
+            SetLatch(&mq->mq_receiver->procLatch);
+
+            /* Skip manipulation of our latch if nowait = true. */
+            if (nowait) {
+                *bytes_written = sent;
+                return SHM_MQ_WOULD_BLOCK;
+            }
+
+            /*
+             * Wait for our latch to be set.  It might already be set for some
+             * unrelated reason, but that'll just result in one extra trip
+             * through the loop.  It's worth it to avoid resetting the latch
+             * at top of loop, because setting an already-set latch is much
+             * cheaper than setting one that has been reset.
+             */
+            (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0);
+
+            /* Reset the latch so we don't spin. */
+            ResetLatch(&t_thrd.proc->procLatch);
+
+            /* An interrupt may have occurred while we were waiting. */
+            CHECK_FOR_INTERRUPTS();
+        } else {
+            Size        offset;
+            Size        sendnow;
+
+            offset = wb % (uint64)ringsize;
+            sendnow = Min(available, ringsize - offset);
+
+            /*
+             * Write as much data as we can via a single memcpy(). Make sure
+             * these writes happen after the read of mq_bytes_read, above.
+             * This barrier pairs with the one in shm_mq_inc_bytes_read.
+             * (Since we're separating the read of mq_bytes_read from a
+             * subsequent write to mq_ring, we need a full barrier here.)
+             */
+            pg_memory_barrier();
+            errno_t rc = memcpy_s(&mq->mq_ring[mq->mq_ring_offset + offset], sendnow,
+                                  (char*)data + sent, sendnow);
+            securec_check(rc, "\0", "\0");
+            sent += sendnow;
+
+            /*
+             * Update count of bytes written, with alignment padding.  Note
+             * that this will never actually insert any padding except at the
+             * end of a run of bytes, because the buffer size is a multiple of
+             * MAXIMUM_ALIGNOF, and each read is as well.
+             */
+            Assert(sent == nbytes || sendnow == MAXALIGN(sendnow));
+            shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow));
+
+            /*
+             * For efficiency, we don't set the reader's latch here.  We'll do
+             * that only when the buffer fills up or after writing an entire
+             * message.
+             */
+        }
+    }
+
+    *bytes_written = sent;
+    return SHM_MQ_SUCCESS;
+}
+
+/*
+ * Wait until at least *nbytesp bytes are available to be read from the
+ * shared message queue, or until the buffer wraps around.  If the queue is
+ * detached, returns SHM_MQ_DETACHED.  If nowait is specified and a wait
+ * would be required, returns SHM_MQ_WOULD_BLOCK.  Otherwise, *datap is set
+ * to the location at which data bytes can be read, *nbytesp is set to the
+ * number of bytes which can be read at that address, and the return value
+ * is SHM_MQ_SUCCESS.
+ */
+static shm_mq_result shm_mq_receive_bytes(shm_mq_handle *mqh, Size bytes_needed, bool nowait,
+                                          Size *nbytesp, void **datap)
+{
+    shm_mq     *mq = mqh->mqh_queue;
+    Size        ringsize = mq->mq_ring_size;
+    uint64      used;
+    uint64      written;
+
+    for (;;) {
+        Size        offset;
+        uint64      read;
+
+        /* Get bytes written, so we can compute what's available to read. */
+        written = pg_atomic_read_u64(&mq->mq_bytes_written);
+
+        /*
+         * Get bytes read.  Include bytes we could consume but have not yet
+         * consumed.
+         */
+        read = pg_atomic_read_u64(&mq->mq_bytes_read) +
+            mqh->mqh_consume_pending;
+        used = written - read;
+        Assert(used <= ringsize);
+        offset = read % (uint64)ringsize;
+
+        /* If we have enough data or buffer has wrapped, we're done. */
+        if (used >= bytes_needed || offset + used >= ringsize) {
+            *nbytesp = Min(used, ringsize - offset);
+            *datap = &mq->mq_ring[mq->mq_ring_offset + offset];
+
+            /*
+             * Separate the read of mq_bytes_written, above, from caller's
+             * attempt to read the data itself.  Pairs with the barrier in
+             * shm_mq_inc_bytes_written.
+             */
+            pg_read_barrier();
+            return SHM_MQ_SUCCESS;
+        }
+
+        /*
+         * Fall out before waiting if the queue has been detached.
+         *
+         * Note that we don't check for this until *after* considering whether
+         * the data already available is enough, since the receiver can finish
+         * receiving a message stored in the buffer even after the sender has
+         * detached.
+         */
+        if (mq->mq_detached) {
+            /*
+             * If the writer advanced mq_bytes_written and then set
+             * mq_detached, we might not have read the final value of
+             * mq_bytes_written above.  Insert a read barrier and then check
+             * again if mq_bytes_written has advanced.
+             */
+            pg_read_barrier();
+            if (written != pg_atomic_read_u64(&mq->mq_bytes_written))
+                continue;
+
+            return SHM_MQ_DETACHED;
+        }
+
+        /*
+         * We didn't get enough data to satisfy the request, so mark any data
+         * previously-consumed as read to make more buffer space.
+         */
+        if (mqh->mqh_consume_pending > 0) {
+            shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending);
+            mqh->mqh_consume_pending = 0;
+        }
+
+        /* Skip manipulation of our latch if nowait = true. */
+        if (nowait)
+            return SHM_MQ_WOULD_BLOCK;
+
+        /*
+         * Wait for our latch to be set.  It might already be set for some
+         * unrelated reason, but that'll just result in one extra trip through
+         * the loop.  It's worth it to avoid resetting the latch at top of
+         * loop, because setting an already-set latch is much cheaper than
+         * setting one that has been reset.
+         */
+        (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0);
+
+        /* Reset the latch so we don't spin. */
+        ResetLatch(&t_thrd.proc->procLatch);
+
+        /* An interrupt may have occurred while we were waiting. */
+        CHECK_FOR_INTERRUPTS();
+    }
+}
+
+/*
+ * Test whether a counterparty who may not even be alive yet is definitely gone.
+ */
+static bool shm_mq_counterparty_gone(shm_mq *mq, BackgroundWorkerHandle *handle)
+{
+    ThreadId        pid;
+
+    /* If the queue has been detached, counterparty is definitely gone. */
+    if (mq->mq_detached) {
+        return true;
+    }
+
+    /* If there's a handle, check worker status. */
+    if (handle != NULL) {
+        BgwHandleStatus status;
+
+        /* Check for unexpected worker death. */
+        status = GetBackgroundWorkerPid(handle, &pid);
+        if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) {
+            /* Mark it detached, just to make it official. */
+            mq->mq_detached = true;
+            return true;
+        }
+    }
+
+    /* Counterparty is not definitively gone. */
+    return false;
+}
+
+/*
+ * This is used when a process is waiting for its counterpart to attach to the
+ * queue.  We exit when the other process attaches as expected, or, if
+ * handle != NULL, when the referenced background process or the postmaster
+ * dies.  Note that if handle == NULL, and the process fails to attach, we'll
+ * potentially get stuck here forever waiting for a process that may never
+ * start.  We do check for interrupts, though.
+ *
+ * ptr is a pointer to the memory address that we're expecting to become
+ * non-NULL when our counterpart attaches to the queue.
+ */
+static bool shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr, BackgroundWorkerHandle *handle)
+{
+    bool        result = false;
+
+    for (;;) {
+        BgwHandleStatus status;
+        ThreadId        pid;
+
+        /* Acquire the lock just long enough to check the pointer. */
+        SpinLockAcquire(&mq->mq_mutex);
+        result = (*ptr != NULL);
+        SpinLockRelease(&mq->mq_mutex);
+
+        /* Fail if detached; else succeed if initialized. */
+        if (mq->mq_detached) {
+            result = false;
+            break;
+        }
+        if (result) {
+            break;
+        }
+        if (handle != NULL) {
+            /* Check for unexpected worker death. */
+            status = GetBackgroundWorkerPid(handle, &pid);
+            if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) {
+                result = false;
+                break;
+            }
+        }
+
+        /* Wait to be signalled. */
+        (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0);
+
+        /* Reset the latch so we don't spin. */
+        ResetLatch(&t_thrd.proc->procLatch);
+
+        /* An interrupt may have occurred while we were waiting. */
+        CHECK_FOR_INTERRUPTS();
+    }
+
+    return result;
+}
+
+/*
+ * Increment the number of bytes read.
+ */
+static void shm_mq_inc_bytes_read(shm_mq *mq, Size n)
+{
+    PGPROC *sender = NULL;
+
+    /*
+     * Separate prior reads of mq_ring from the increment of mq_bytes_read
+     * which follows.  This pairs with the full barrier in
+     * shm_mq_send_bytes(). We only need a read barrier here because the
+     * increment of mq_bytes_read is actually a read followed by a dependent
+     * write.
+     */
+    pg_read_barrier();
+
+    /*
+     * There's no need to use pg_atomic_fetch_add_u64 here, because nobody
+     * else can be changing this value.  This method should be cheaper.
+     */
+    pg_atomic_write_u64(&mq->mq_bytes_read,
+                        pg_atomic_read_u64(&mq->mq_bytes_read) + n);
+
+    /*
+     * We shouldn't have any bytes to read without a sender, so we can read
+     * mq_sender here without a lock.  Once it's initialized, it can't change.
+     */
+    sender = mq->mq_sender;
+    Assert(sender != NULL);
+    SetLatch(&sender->procLatch);
+}
+
+/*
+ * Increment the number of bytes written.
+ */
+static void shm_mq_inc_bytes_written(shm_mq *mq, Size n)
+{
+    /*
+     * Separate prior reads of mq_ring from the write of mq_bytes_written
+     * which we're about to do.  Pairs with the read barrier found in
+     * shm_mq_get_receive_bytes.
+     */
+    pg_write_barrier();
+
+    /*
+     * There's no need to use pg_atomic_fetch_add_u64 here, because nobody
+     * else can be changing this value.  This method avoids taking the bus
+     * lock unnecessarily.
+     */
+    pg_atomic_write_u64(&mq->mq_bytes_written,
+                        pg_atomic_read_u64(&mq->mq_bytes_written) + n);
+}
+
diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h
index e04ea17ae9..3988224f46 100755
--- a/src/include/libpq/libpq.h
+++ b/src/include/libpq/libpq.h
@@ -1,7 +1,7 @@
 /* -------------------------------------------------------------------------
  *
  * libpq.h
- *	  POSTGRES LIBPQ buffer structure definitions.
+ *    POSTGRES LIBPQ buffer structure definitions.
  *
  *
  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
@@ -22,19 +22,43 @@
 
 /* ----------------
  * PQArgBlock
- *		Information (pointer to array of this structure) required
- *		for the PQfn() call.  (This probably ought to go somewhere else...)
+ *      Information (pointer to array of this structure) required
+ *      for the PQfn() call.  (This probably ought to go somewhere else...)
  * ----------------
  */
 typedef struct {
     int len;
     int isint;
     union {
-        int* ptr; /* can't use void (dec compiler barfs)	 */
+        int *ptr; /* can't use void (dec compiler barfs)     */
         int integer;
     } u;
 } PQArgBlock;
 
+typedef struct {
+    void        (*comm_reset) (void);
+    int         (*flush) (void);
+    int         (*flush_if_writable) (void);
+    bool        (*is_send_pending) (void);
+    int         (*putmessage) (char msgtype, const char* s, size_t len);
+    int     (*putmessage_noblock) (char msgtype, const char* s, size_t len);
+    void        (*startcopyout) (void);
+    void        (*endcopyout) (bool errorAbort);
+} PQcommMethods;
+
+extern PGDLLIMPORT THR_LOCAL PQcommMethods *PqCommMethods;
+
+#define pq_comm_reset() (PqCommMethods->comm_reset())
+#define pq_flush() (PqCommMethods->flush())
+#define pq_flush_if_writable() (PqCommMethods->flush_if_writable())
+#define pq_is_send_pending() (PqCommMethods->is_send_pending())
+#define pq_putmessage(msgtype, s, len) \
+    (PqCommMethods->putmessage(msgtype, s, len))
+#define pq_putmessage_noblock(msgtype, s, len) \
+    (PqCommMethods->putmessage_noblock(msgtype, s, len))
+#define pq_startcopyout() (PqCommMethods->startcopyout())
+#define pq_endcopyout(errorAbort) (PqCommMethods->endcopyout(errorAbort))
+
 /*
  * External functions.
  */
@@ -49,7 +73,6 @@ extern int StreamConnection(pgsocket server_fd, Port* port);
 extern void StreamClose(pgsocket sock);
 extern void TouchSocketFile(void);
 extern void pq_init(void);
-extern void pq_comm_reset(void);
 extern int pq_getbytes(char* s, size_t len);
 extern int pq_getstring(StringInfo s);
 extern int pq_getmessage(StringInfo s, int maxlen);
@@ -57,14 +80,7 @@ extern int pq_getbyte(void);
 extern int pq_peekbyte(void);
 extern int pq_getbyte_if_available(unsigned char* c);
 extern int pq_putbytes(const char* s, size_t len);
-extern int pq_flush(void);
-extern int pq_flush_if_writable(void);
 extern void pq_flush_timedwait(int timeout);
-extern bool pq_is_send_pending(void);
-extern int pq_putmessage(char msgtype, const char* s, size_t len);
-extern int pq_putmessage_noblock(char msgtype, const char* s, size_t len);
-extern void pq_startcopyout(void);
-extern void pq_endcopyout(bool errorAbort);
 extern bool pq_select(int timeout_ms);
 extern void pq_abandon_sendbuffer(void);
 extern void pq_abandon_recvbuffer(void);
diff --git a/src/include/libpq/pqformat.h b/src/include/libpq/pqformat.h
index 28f9eca5f2..0a14b3b39e 100755
--- a/src/include/libpq/pqformat.h
+++ b/src/include/libpq/pqformat.h
@@ -46,6 +46,7 @@ extern const char* pq_getmsgbytes(StringInfo msg, int datalen);
 extern void pq_copymsgbytes(StringInfo msg, char* buf, int datalen);
 extern char* pq_getmsgtext(StringInfo msg, int rawbytes, int* nbytes);
 extern const char* pq_getmsgstring(StringInfo msg);
+extern const char* pq_getmsgrawstring(StringInfo msg);
 extern void pq_getmsgend(StringInfo msg);
 
 /*
diff --git a/src/include/libpq/pqmq.h b/src/include/libpq/pqmq.h
new file mode 100644
index 0000000000..2a749790ee
--- /dev/null
+++ b/src/include/libpq/pqmq.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ *
+ * pqmq.h
+ *	  Use the frontend/backend protocol for communication over a shm_mq
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/libpq/pqmq.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PQMQ_H
+#define PQMQ_H
+
+#include "lib/stringinfo.h"
+#include "storage/shm_mq.h"
+
+extern void pq_redirect_to_shm_mq(shm_mq_handle* mqh);
+extern void pq_stop_redirect_to_shm_mq(void);
+extern void pq_set_parallel_master(pid_t pid, BackendId backend_id);
+
+extern void pq_parse_errornotice(StringInfo str, ErrorData* edata);
+
+#endif /* PQMQ_H */
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 0a3efda844..1c2df987d4 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -55,6 +55,7 @@ typedef enum {
     PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
     PROCSIG_EXECUTOR_FLAG,
 
+    PROCSIG_PARALLEL_MESSAGE,   /* message from cooperating parallel backend */
     NUM_PROCSIGNALS /* Must be last! */
 } ProcSignalReason;
 
diff --git a/src/include/storage/shm_mq.h b/src/include/storage/shm_mq.h
new file mode 100644
index 0000000000..27f98af449
--- /dev/null
+++ b/src/include/storage/shm_mq.h
@@ -0,0 +1,82 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_mq.h
+ *    single-reader, single-writer shared memory message queue
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/shm_mq.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SHM_MQ_H
+#define SHM_MQ_H
+
+#include "postmaster/bgworker.h"
+#include "storage/proc.h"
+
+/* The queue itself, in shared memory. */
+struct shm_mq;
+typedef struct shm_mq shm_mq;
+
+/* Backend-private state. */
+struct shm_mq_handle;
+typedef struct shm_mq_handle shm_mq_handle;
+
+/* Descriptors for a single write spanning multiple locations. */
+typedef struct {
+    const char *data;
+    Size        len;
+} shm_mq_iovec;
+
+/* Possible results of a send or receive operation. */
+typedef enum {
+    SHM_MQ_SUCCESS,             /* Sent or received a message. */
+    SHM_MQ_WOULD_BLOCK,         /* Not completed; retry later. */
+    SHM_MQ_DETACHED             /* Other process has detached queue. */
+} shm_mq_result;
+
+/*
+ * Primitives to create a queue and set the sender and receiver.
+ *
+ * Both the sender and the receiver must be set before any messages are read
+ * or written, but they need not be set by the same process.  Each must be
+ * set exactly once.
+ */
+extern shm_mq *shm_mq_create(void *address, Size size);
+extern void shm_mq_set_receiver(shm_mq *mq, PGPROC *);
+extern void shm_mq_set_sender(shm_mq *mq, PGPROC *);
+
+/* Accessor methods for sender and receiver. */
+extern PGPROC *shm_mq_get_receiver(shm_mq *);
+extern PGPROC *shm_mq_get_sender(shm_mq *);
+
+/* Set up backend-local queue state. */
+extern shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg,
+                                    BackgroundWorkerHandle *handle);
+
+/* Associate worker handle with shm_mq. */
+extern void shm_mq_set_handle(shm_mq_handle *, BackgroundWorkerHandle *);
+
+/* Break connection, release handle resources. */
+extern void shm_mq_detach(shm_mq_handle *mqh);
+
+/* Get the shm_mq from handle. */
+extern shm_mq *shm_mq_get_queue(shm_mq_handle *mqh);
+
+/* Send or receive messages. */
+extern shm_mq_result shm_mq_send(shm_mq_handle *mqh,
+                                 Size nbytes, const void *data, bool nowait);
+extern shm_mq_result shm_mq_sendv(shm_mq_handle *mqh,
+                                  shm_mq_iovec *iov, int iovcnt, bool nowait);
+extern shm_mq_result shm_mq_receive(shm_mq_handle *mqh,
+                                    Size *nbytesp, void **datap, bool nowait);
+
+/* Wait for our counterparty to attach to the queue. */
+extern shm_mq_result shm_mq_wait_for_attach(shm_mq_handle *mqh);
+
+/* Smallest possible queue. */
+extern PGDLLIMPORT const Size shm_mq_minimum_size;
+
+#endif                          /* SHM_MQ_H */
-- 
Gitee


From ecbb5313784d6c7103c16df6ce173ce782e0375c Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Tue, 25 Aug 2020 21:36:32 +0800
Subject: [PATCH 3/6] GUC serialization for Autonomous Transaction

---
 src/common/backend/utils/misc/guc.cpp | 548 ++++++++++++++++++++++++++
 src/include/utils/guc.h               |   4 +
 2 files changed, 552 insertions(+)

diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp
index a7977f8784..000be09fb8 100644
--- a/src/common/backend/utils/misc/guc.cpp
+++ b/src/common/backend/utils/misc/guc.cpp
@@ -198,6 +198,18 @@
 #define MAX_PASSWORD_ASSIGNED_CHARACTER 999
 /* max length of password */
 #define MAX_PASSWORD_LENGTH 999
+/*
+ * Precision with which REAL type guc values are to be printed for GUC
+ * serialization.
+ */
+static const int REALTYPE_PRECISION = 17;
+
+static const int TYPICAL_LEN_RANGE_OF_VALUE = 1000;
+static const int MAX_DISPLAY_LEN_OF_BOOL = 5;
+static const int TYPICAL_DISPLAY_LEN_OF_INT = 4;
+static const int MAX_DISPLAY_LEN_OF_INT = 11;
+static const int MAX_DISPLAY_LEN_OF_INT64 = 20;
+static const int LEN_OF_REAL_EXCEPT_PRECISION = 8;
 
 extern volatile int synchronous_commit;
 extern volatile bool most_available_sync;
@@ -17767,6 +17779,542 @@ ArrayType* GUCArrayReset(ArrayType* array)
     return newarray;
 }
 
+/* GUC serialization */
+static bool CanSkipGucvar(const struct config_generic* gconf);
+static Size EstimateVariableSize(const struct config_generic* gconf);
+static void DoSerialize(char** destptr, Size& maxbytes, const char* fmt, ...);
+static void DoSerializeBinary(char** destptr, Size& maxbytes, const char* val, Size valsize);
+static void SerializeVariable(char** destptr, Size& maxbytes, const struct config_generic* gconf);
+static void InitializeOneGUCOption(struct config_generic& gconf);
+static char* ReadGucstate(char** srcptr, const char* srcend);
+static void ReadGucstateBinary(char** srcptr, const char* srcend, char* dest, Size size);
+
+/*
+ * CanSkipGucvar:
+ * When serializing, determine whether to skip this GUC.  When restoring, the
+ * negation of this test determines whether to restore the compiled-in default
+ * value before processing serialized values.
+ *
+ * A PGC_S_DEFAULT setting on the serialize side will typically match new
+ * postmaster children, but that can be false when got_SIGHUP == true and the
+ * pending configuration change modifies this setting.  Nonetheless, we omit
+ * PGC_S_DEFAULT settings from serialization and make up for that by restoring
+ * defaults before applying serialized values.
+ *
+ * PGC_POSTMASTER variables always have the same value in every child of a
+ * particular postmaster.  Most PGC_INTERNAL variables are compile-time
+ * constants; a few, like server_encoding and lc_ctype, are handled specially
+ * outside the serialize/restore procedure.  Therefore, SerializeGUCState()
+ * never sends these, and RestoreGUCState() never changes them.
+ */
+static bool CanSkipGucvar(const struct config_generic* gconf)
+{
+    return gconf->context == PGC_POSTMASTER ||
+        gconf->context == PGC_INTERNAL || gconf->source == PGC_S_DEFAULT ||
+        strcmp(gconf->name, "role") == 0;
+}
+
+
+/*
+ * EstimateVariableSize:
+ * Estimate max size for dumping the given GUC variable.
+ */
+static Size EstimateVariableSize(const struct config_generic* gconf)
+{
+    Size size;
+    Size valsize = 0;
+
+    if (CanSkipGucvar(gconf)) {
+        return 0;
+    }
+
+    size = strlen(gconf->name) + 1;
+
+    /* Get the maximum display length of the GUC value. */
+    switch (gconf->vartype) {
+        case PGC_BOOL: {
+            valsize = MAX_DISPLAY_LEN_OF_BOOL;
+            break;
+        }
+
+        case PGC_INT: {
+            const struct config_int* conf = (const struct config_int*)gconf;
+
+            /*
+             * Instead of getting the exact display length, use max
+             * length.  Also reduce the max length for typical ranges of
+             * small values.  Maximum value is 2147483647, i.e. 10 chars.
+             * Include one byte for sign.
+             */
+            if (Abs(*conf->variable) < TYPICAL_LEN_RANGE_OF_VALUE) {
+                valsize = TYPICAL_DISPLAY_LEN_OF_INT;
+            } else {
+                valsize = MAX_DISPLAY_LEN_OF_INT;
+            }
+            break;
+        }
+
+        case PGC_INT64: {
+            const struct config_int* conf = (const struct config_int*)gconf;
+
+            if (Abs(*conf->variable) < TYPICAL_LEN_RANGE_OF_VALUE) {
+                valsize = TYPICAL_DISPLAY_LEN_OF_INT;
+            } else {
+                valsize = MAX_DISPLAY_LEN_OF_INT64; /* Maximum value is 9,223,372,036,854,775,807, i.e. 19 chars. */
+            }
+            break;
+        }
+
+        case PGC_REAL: {
+            /*
+             * We are going to print it with %.17g. Account for sign,
+             * decimal point, and e+nnn notation. E.g.
+             * -3.9932904234000002e+110
+             */
+            valsize = LEN_OF_REAL_EXCEPT_PRECISION + REALTYPE_PRECISION;
+            break;
+        }
+
+        case PGC_STRING: {
+            const struct config_string* conf = (const struct config_string*)gconf;
+            /*
+             * If the value is NULL, we transmit it as an empty string.
+             * Although this is not physically the same value, GUC
+             * generally treats a NULL the same as empty string.
+            */
+            if (*conf->variable) {
+                valsize = strlen(*conf->variable);
+            } else {
+                valsize = 0;
+            }
+            break;
+        }
+
+        case PGC_ENUM: {
+            struct config_enum* conf = (struct config_enum*) gconf;
+            valsize = strlen(config_enum_lookup_by_value(conf, *conf->variable));
+            break;
+        }
+        default:
+            break;
+    }
+
+    /* Allow space for terminating zero-byte */
+    size = add_size(size, valsize + 1);
+
+    if (gconf->sourcefile) {
+        size = add_size(size, strlen(gconf->sourcefile));
+    }
+
+    /* Allow space for terminating zero-byte */
+    size = add_size(size, 1);
+
+    /* Include line whenever we include file. */
+    if (gconf->sourcefile && gconf->sourcefile[0]) {
+        size = add_size(size, sizeof(gconf->sourceline));
+    }
+
+    size = add_size(size, sizeof(gconf->source));
+    size = add_size(size, sizeof(gconf->scontext));
+
+    return size;
+}
+
+/*
+ * EstimateGUCStateSpace:
+ * Returns the size needed to store the GUC state for the current process
+ */
+Size EstimateGUCStateSpace(void)
+{
+    Size size;
+    int i;
+
+    /* Add space reqd for saving the data size of the guc state */
+    size = sizeof(Size);
+
+    /* Add up the space needed for each GUC variable */
+    for (i = 0; i < u_sess->num_guc_variables; i++) {
+        size = add_size(size, EstimateVariableSize(u_sess->guc_variables[i]));
+    }
+
+    return size;
+}
+
+/*
+ * DoSerialize:
+ * Copies the formatted string into the destination.  Moves ahead the
+ * destination pointer, and decrements the maxbytes by that many bytes. If
+ * maxbytes is not sufficient to copy the string, error out.
+ */
+static void DoSerialize(char** destptr, Size& maxbytes, const char* fmt, ...)
+{
+    va_list vargs;
+    int nRet;
+
+    if (maxbytes == 0) {
+        elog(ERROR, "not enough space to serialize GUC state");
+    }
+
+    va_start(vargs, fmt);
+    nRet = vsnprintf_s(*destptr, maxbytes, maxbytes - 1, fmt, vargs);
+    securec_check_ss(nRet, "\0", "\0");
+    va_end(vargs);
+
+    /*
+     * Cater to portability hazards in the vsnprintf() return value just like
+     * appendPQExpBufferVA() does.  Note that this requires an extra byte of
+     * slack at the end of the buffer.  Since serialize_variable() ends with a
+     * do_serialize_binary() rather than a do_serialize(), we'll always have
+     * that slack; estimate_variable_size() need not add a byte for it.
+     */
+    if (nRet < 0) {
+        /* Shouldn't happen. Better show errno description. */
+        elog(ERROR, "vsnprintf failed: %s with format string \"%s\"", strerror(nRet), fmt);
+    }
+    if (nRet >= static_cast<int>(maxbytes)) {
+        /* This shouldn't happen either, really. */
+        elog(ERROR, "not enough space to serialize GUC state");
+    }
+
+    /* Shift the destptr ahead of the null terminator */
+    *destptr += nRet + 1;
+    maxbytes -= static_cast<Size>(nRet) + 1;
+}
+
+/* Binary copy version of DoSerialize() */
+static void DoSerializeBinary(char** destptr, Size& maxbytes, const char* val, Size valsize)
+{
+    if (valsize > maxbytes) {
+        elog(ERROR, "not enough space to serialize GUC state");
+    }
+
+    errno_t rc = memcpy_s(*destptr, maxbytes, val, valsize);
+    securec_check(rc, "\0", "\0");
+    *destptr += valsize;
+    maxbytes -= valsize;
+}
+
+/*
+ * SerializeVariable:
+ * Dumps name, value and other information of a GUC variable into destptr.
+ */
+static void SerializeVariable(char** destptr, Size& maxbytes, const struct config_generic* gconf)
+{
+    if (CanSkipGucvar(gconf)) {
+        return;
+    }
+
+    DoSerialize(destptr, maxbytes, "%s", gconf->name);
+
+    switch (gconf->vartype) {
+        case PGC_BOOL: {
+            const struct config_bool* conf = (const struct config_bool*)gconf;
+            DoSerialize(destptr, maxbytes, (*conf->variable ? "true" : "false"));
+            break;
+        }
+
+        case PGC_INT: {
+            const struct config_int* conf = (const struct config_int*)gconf;
+            DoSerialize(destptr, maxbytes, "%d", *conf->variable);
+            break;
+        }
+
+        case PGC_INT64: {
+            const struct config_int64* conf = (const struct config_int64*)gconf;
+            DoSerialize(destptr, maxbytes, "%ld", *conf->variable);
+            break;
+        }
+
+        case PGC_REAL: {
+            const struct config_real* conf = (const struct config_real*)gconf;
+            DoSerialize(destptr, maxbytes, "%.*e", REALTYPE_PRECISION, *conf->variable);
+            break;
+        }
+
+        case PGC_STRING:{
+            const struct config_string* conf = (const struct config_string*)gconf;
+            DoSerialize(destptr, maxbytes, "%s", *conf->variable ? *conf->variable : "");
+            break;
+        }
+
+        case PGC_ENUM:{
+            struct config_enum* conf = (struct config_enum*)gconf;
+            DoSerialize(destptr, maxbytes, "%s", config_enum_lookup_by_value(conf, *conf->variable));
+            break;
+        }
+        default:
+            break;
+    }
+
+    DoSerialize(destptr, maxbytes, "%s", (gconf->sourcefile ? gconf->sourcefile : ""));
+
+    if (gconf->sourcefile) {
+        DoSerializeBinary(destptr, maxbytes, reinterpret_cast<const char*>(&gconf->sourceline),
+                          sizeof(gconf->sourceline));
+    }
+
+    DoSerializeBinary(destptr, maxbytes, reinterpret_cast<const char*>(&gconf->source), sizeof(gconf->source));
+    DoSerializeBinary(destptr, maxbytes, reinterpret_cast<const char*>(&gconf->scontext), sizeof(gconf->scontext));
+}
+
+/*
+ * SerializeGUCState:
+ * Dumps the complete GUC state onto the memory location at startAddress.
+ */
+void SerializeGUCState(Size maxsize, char* startAddress)
+{
+    char *curptr;
+    Size actualSize;
+    Size bytesLeft;
+    int i;
+
+    /* Reserve space for saving the actual size of the guc state */
+    Assert(maxsize > sizeof(actualSize));
+    curptr = startAddress + sizeof(actualSize);
+    bytesLeft = maxsize - sizeof(actualSize);
+
+    for (i = 0; i < u_sess->num_guc_variables; i++) {
+        SerializeVariable(&curptr, bytesLeft, u_sess->guc_variables[i]);
+    }
+
+    /* Store actual size without assuming alignment of startAddress. */
+    actualSize = maxsize - bytesLeft - sizeof(actualSize);
+    errno_t rc = memcpy_s(startAddress, maxsize, &actualSize, sizeof(actualSize));
+    securec_check(rc, "\0", "\0");
+}
+
+/*
+ * Initialize one GUC option variable to its compiled-in default.
+ *
+ * Note: the reason for calling check_hooks is not that we think the boot_val
+ * might fail, but that the hooks might wish to compute an "extra" struct.
+ */
+static void InitializeOneGUCOption(struct config_generic& gconf)
+{
+    gconf.status = 0;
+    gconf.source = PGC_S_DEFAULT;
+    gconf.reset_source = PGC_S_DEFAULT;
+    gconf.scontext = PGC_INTERNAL;
+    gconf.reset_scontext = PGC_INTERNAL;
+    gconf.stack = NULL;
+    gconf.extra = NULL;
+    gconf.sourcefile = NULL;
+    gconf.sourceline = 0;
+
+    switch (gconf.vartype) {
+        case PGC_BOOL: {
+            struct config_bool *conf = (struct config_bool*)&gconf;
+            bool newval = conf->boot_val;
+            void* extra = NULL;
+
+            if (!call_bool_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to %d", conf->gen.name, static_cast<int>(newval));
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+
+        case PGC_INT: {
+            struct config_int* conf = (struct config_int*)&gconf;
+            int newval = conf->boot_val;
+            void* extra = NULL;
+
+            Assert(newval >= conf->min);
+            Assert(newval <= conf->max);
+            if (!call_int_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to %d", conf->gen.name, newval);
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+
+        case PGC_INT64: {
+            struct config_int64* conf = (struct config_int64*)&gconf;
+            int64 newval = conf->boot_val;
+            void* extra = NULL;
+
+            Assert(newval >= conf->min);
+            Assert(newval <= conf->max);
+            if (!call_int64_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to %ld", conf->gen.name, newval);
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+
+        case PGC_REAL: {
+            struct config_real* conf = (struct config_real*)&gconf;
+            double newval = conf->boot_val;
+            void* extra = NULL;
+
+            Assert(newval >= conf->min);
+            Assert(newval <= conf->max);
+            if (!call_real_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to %g", conf->gen.name, newval);
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+
+        case PGC_STRING: {
+            struct config_string* conf = (struct config_string*)&gconf;
+            char* newval;
+            void* extra = NULL;
+
+            /* non-NULL boot_val must always get strdup'd */
+            if (conf->boot_val != NULL) {
+                newval = guc_strdup(FATAL, conf->boot_val);
+            } else {
+                newval = NULL;
+            }
+
+            if (!call_string_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to \"%s\"", conf->gen.name, newval ? newval : "");
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+
+        case PGC_ENUM: {
+            struct config_enum *conf = (struct config_enum*)&gconf;
+            int newval = conf->boot_val;
+            void* extra = NULL;
+
+            if (!call_enum_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) {
+                elog(FATAL, "failed to initialize %s to %d", conf->gen.name, newval);
+            }
+            if (conf->assign_hook) {
+                (*conf->assign_hook) (newval, extra);
+            }
+            *conf->variable = conf->reset_val = newval;
+            conf->gen.extra = conf->reset_extra = extra;
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+/*
+ * ReadGucstate:
+ * Actually it does not read anything, just returns the srcptr. But it does
+ * move the srcptr past the terminating zero byte, so that the caller is ready
+ * to read the next string.
+ */
+static char* ReadGucstate(char** srcptr, const char* srcend)
+{
+    char* retptr = *srcptr;
+    char* ptr;
+
+    if (*srcptr >= srcend) {
+        elog(ERROR, "incomplete GUC state");
+    }
+
+    /* The string variables are all null terminated */
+    for (ptr = *srcptr; ptr < srcend && *ptr != '\0'; ptr++) {}
+
+    if (ptr > srcend) {
+        elog(ERROR, "could not find null terminator in GUC state");
+    }
+
+    /* Set the new position to the byte following the terminating NUL */
+    *srcptr = ptr + 1;
+
+    return retptr;
+}
+
+/* Binary read version of ReadGucstate(). Copies into dest */
+static void ReadGucstateBinary(char** srcptr, const char* srcend, char* dest, Size size)
+{
+    if (*srcptr + size > srcend) {
+        elog(ERROR, "incomplete GUC state");
+    }
+
+    errno_t rc = memcpy_s(dest, size, *srcptr, size);
+    securec_check(rc, "\0", "\0");
+    *srcptr += size;
+}
+
+/*
+ * RestoreGUCState:
+ * Reads the GUC state at the specified address and updates the GUCs with the
+ * values read from the GUC state.
+ */
+void RestoreGUCState(char* gucstate)
+{
+    char* varname;
+    char* varvalue;
+    char* varsourcefile;
+    int varsourceline;
+    GucSource varsource;
+    GucContext varscontext;
+    char* srcptr = gucstate;
+    char* srcend;
+    Size len;
+    int i;
+
+    /* See comment at can_skip_gucvar(). */
+    for (i = 0; i < u_sess->num_guc_variables; i++) {
+        if (!CanSkipGucvar(u_sess->guc_variables[i])) {
+            InitializeOneGUCOption(*u_sess->guc_variables[i]);
+        }
+    }
+    /* First item is the length of the subsequent data */
+    errno_t rc = memcpy_s(&len, sizeof(len), gucstate, sizeof(len));
+    securec_check(rc, "\0", "\0");
+    srcptr += sizeof(len);
+    srcend = srcptr + len;
+
+    while (srcptr < srcend) {
+        int result;
+        varname = ReadGucstate(&srcptr, srcend);
+        varvalue = ReadGucstate(&srcptr, srcend);
+        varsourcefile = ReadGucstate(&srcptr, srcend);
+
+        if (varsourcefile[0]) {
+            ReadGucstateBinary(&srcptr, srcend,
+                               reinterpret_cast<char* >(&varsourceline), sizeof(varsourceline));
+        } else {
+            varsourceline = 0;
+        }
+        ReadGucstateBinary(&srcptr, srcend,
+                           reinterpret_cast<char* >(&varsource), sizeof(varsource));
+        ReadGucstateBinary(&srcptr, srcend,
+                           reinterpret_cast<char* >(&varscontext), sizeof(varscontext));
+
+        result = set_config_option(varname, varvalue, varscontext, varsource,
+                                   GUC_ACTION_SET, true, ERROR, true);
+        if (result <= 0) {
+            ereport(ERROR,
+                    (errcode(ERRCODE_INTERNAL_ERROR),
+                     errmsg("parameter \"%s\" could not be set", varname)));
+        }
+        if (varsourcefile[0]) {
+            set_config_sourcefile(varname, varsourcefile, varsourceline);
+        }
+    }
+}
+
 /*
  * Validate a proposed option setting for GUCArrayAdd/Delete/Reset.
  *
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 14640be878..d99e9e4f8d 100755
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -266,6 +266,10 @@ extern ArrayType* GUCArrayAdd(ArrayType* array, const char* name, const char* va
 extern ArrayType* GUCArrayDelete(ArrayType* array, const char* name);
 extern ArrayType* GUCArrayReset(ArrayType* array);
 
+extern Size EstimateGUCStateSpace(void);
+extern void SerializeGUCState(Size maxsize, char *start_address);
+extern void RestoreGUCState(char *gucstate);
+
 #ifdef EXEC_BACKEND
 extern void write_nondefault_variables(GucContext context);
 extern void read_nondefault_variables(void);
-- 
Gitee


From e763a80cc5e77c8cb8c8c7308afade0625215074 Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Tue, 25 Aug 2020 22:12:31 +0800
Subject: [PATCH 4/6] add autonomous transaction. The original patch is from:
 https://www.postgresql.org/message-id/attachment/45863/autonomous.patch

---
 src/common/backend/lib/stringinfo.cpp         |   1 +
 src/common/backend/parser/analyze.cpp         |   4 +-
 src/common/backend/parser/parse_param.cpp     |  44 +-
 src/common/backend/utils/error/elog.cpp       |  57 ++
 src/common/backend/utils/init/miscinit.cpp    |   7 +-
 src/common/backend/utils/misc/guc.cpp         |  18 +-
 src/common/pl/plpgsql/src/gram.y              |  16 +
 src/common/pl/plpgsql/src/pl_exec.cpp         | 164 +++-
 src/common/pl/plpgsql/src/pl_scanner.cpp      |   3 +-
 src/common/pl/plpgsql/src/plpgsql.h           |   4 +-
 src/gausskernel/optimizer/commands/async.cpp  |   3 +-
 .../optimizer/commands/prepare.cpp            |   2 +-
 .../optimizer/commands/variable.cpp           |   4 +-
 .../process/postmaster/bgworker.cpp           |   5 +
 src/gausskernel/process/tcop/Makefile         |   2 +-
 src/gausskernel/process/tcop/autonomous.cpp   | 857 ++++++++++++++++++
 src/gausskernel/process/tcop/postgres.cpp     |  16 +-
 src/gausskernel/storage/ipc/Makefile          |   4 +-
 src/gausskernel/storage/ipc/shm_toc.cpp       | 242 +++++
 src/include/commands/async.h                  |   1 +
 src/include/commands/variable.h               |   1 +
 src/include/parser/analyze.h                  |   2 +-
 src/include/parser/parse_param.h              |   2 +-
 src/include/postgres.h                        |   2 +
 src/include/storage/shm_toc.h                 |  59 ++
 src/include/tcop/autonomous.h                 |  43 +
 src/include/tcop/tcopprot.h                   |   6 +
 src/include/utils/elog.h                      |   1 +
 src/include/utils/plpgsql.h                   |   4 +-
 .../expected/autonomous_transaction.out       | 351 +++++++
 src/test/regress/parallel_schedule            |   3 +
 .../regress/sql/autonomous_transaction.sql    | 189 ++++
 32 files changed, 2082 insertions(+), 35 deletions(-)
 create mode 100644 src/gausskernel/process/tcop/autonomous.cpp
 create mode 100644 src/gausskernel/storage/ipc/shm_toc.cpp
 create mode 100644 src/include/storage/shm_toc.h
 create mode 100644 src/include/tcop/autonomous.h
 create mode 100755 src/test/regress/expected/autonomous_transaction.out
 create mode 100755 src/test/regress/sql/autonomous_transaction.sql

diff --git a/src/common/backend/lib/stringinfo.cpp b/src/common/backend/lib/stringinfo.cpp
index 8bd086eda1..202c76fe77 100755
--- a/src/common/backend/lib/stringinfo.cpp
+++ b/src/common/backend/lib/stringinfo.cpp
@@ -59,6 +59,7 @@ void initStringInfo(StringInfo str)
  */
 void resetStringInfo(StringInfo str)
 {
+
     str->data[0] = '\0';
     str->len = 0;
     str->cursor = 0;
diff --git a/src/common/backend/parser/analyze.cpp b/src/common/backend/parser/analyze.cpp
index 338693cfee..b10bf5744e 100644
--- a/src/common/backend/parser/analyze.cpp
+++ b/src/common/backend/parser/analyze.cpp
@@ -154,7 +154,7 @@ Query* parse_analyze(
  * symbol datatypes from context.  The passed-in paramTypes[] array can
  * be modified or enlarged (via repalloc).
  */
-Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams)
+Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams, char** paramTypeNames)
 {
     ParseState* pstate = make_parsestate(NULL);
     Query* query = NULL;
@@ -164,7 +164,7 @@ Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** pa
 
     pstate->p_sourcetext = sourceText;
 
-    parse_variable_parameters(pstate, paramTypes, numParams);
+    parse_variable_parameters(pstate, paramTypes, numParams, paramTypeNames);
 
     query = transformTopLevelStmt(pstate, parseTree);
 
diff --git a/src/common/backend/parser/parse_param.cpp b/src/common/backend/parser/parse_param.cpp
index 3658ab2cce..6eeef12a06 100755
--- a/src/common/backend/parser/parse_param.cpp
+++ b/src/common/backend/parser/parse_param.cpp
@@ -47,6 +47,7 @@ typedef struct FixedParamState {
 typedef struct VarParamState {
     Oid** paramTypes; /* array of parameter type OIDs */
     int* numParams;   /* number of array entries */
+    char **paramTypeNames;
 } VarParamState;
 
 static Node* fixed_paramref_hook(ParseState* pstate, ParamRef* pref);
@@ -54,6 +55,7 @@ static Node* variable_paramref_hook(ParseState* pstate, ParamRef* pref);
 static Node* variable_coerce_param_hook(
     ParseState* pstate, Param* param, Oid targetTypeId, int32 targetTypeMod, int location);
 static bool check_parameter_resolution_walker(Node* node, ParseState* pstate);
+static Node *variable_post_column_ref_hook(ParseState *pstate, ColumnRef *cref, Node *var);
 static bool query_contains_extern_params_walker(Node* node, void* context);
 
 /*
@@ -73,17 +75,57 @@ void parse_fixed_parameters(ParseState* pstate, Oid* paramTypes, int numParams)
 /*
  * Set up to process a query containing references to variable parameters.
  */
-void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams)
+void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams, char** paramTypeNames)
 {
     VarParamState* parstate = (VarParamState*)palloc(sizeof(VarParamState));
 
     parstate->paramTypes = paramTypes;
     parstate->numParams = numParams;
+    parstate->paramTypeNames = paramTypeNames;
+    pstate->p_post_columnref_hook = variable_post_column_ref_hook;
     pstate->p_ref_hook_state = (void*)parstate;
     pstate->p_paramref_hook = variable_paramref_hook;
     pstate->p_coerce_param_hook = variable_coerce_param_hook;
 }
 
+static Node * variable_post_column_ref_hook(ParseState *pstate, ColumnRef *cref, Node *var)
+{
+	VarParamState *parstate = (VarParamState *) pstate->p_ref_hook_state;
+
+	/* already resolved */
+	if (var != NULL)
+		return NULL;
+
+	/* did not supply parameter names */
+	if (!parstate->paramTypeNames)
+		return NULL;
+
+	if (list_length(cref->fields) == 1)
+	{
+		Node	   *field1 = (Node *) linitial(cref->fields);
+		char	   *name1;
+		int			i;
+		Param	   *param;
+
+		Assert(IsA(field1, String));
+		name1 = strVal(field1);
+		for (i = 0; i < *parstate->numParams; i++)
+			if (strcmp(name1, parstate->paramTypeNames[i]) == 0)
+			{
+				param = makeNode(Param);
+				param->paramkind = PARAM_EXTERN;
+				param->paramid = i + 1;
+				param->paramtype = (*parstate->paramTypes)[i];
+				param->paramtypmod = -1;
+				param->paramcollid = InvalidOid;
+				param->location = -1;
+				return (Node *) param;
+			}
+	}
+
+	return NULL;
+}
+
 /*
  * Transform a ParamRef using fixed parameter types.
  */
diff --git a/src/common/backend/utils/error/elog.cpp b/src/common/backend/utils/error/elog.cpp
index 749b7ddce0..fb8b5ccf25 100644
--- a/src/common/backend/utils/error/elog.cpp
+++ b/src/common/backend/utils/error/elog.cpp
@@ -1638,6 +1638,63 @@ void FlushErrorStateWithoutDeleteChildrenContext(void)
     MemoryContextReset(ErrorContext);
 }
 
+/*
+ * ThrowErrorData --- report an error described by an ErrorData structure
+ *
+ * This is somewhat like ReThrowError, but it allows elevels besides ERROR,
+ * and the boolean flags such as output_to_server are computed via the
+ * default rules rather than being copied from the given ErrorData.
+ * This is primarily used to re-report errors originally reported by
+ * background worker processes and then propagated (with or without
+ * modification) to the backend responsible for them.
+ */
+void
+ThrowErrorData(ErrorData *edata)
+{
+	ErrorData  *newedata;
+	MemoryContext oldcontext;
+
+	if (!errstart(edata->elevel, edata->filename, edata->lineno,
+				  edata->funcname, NULL))
+		return;					/* error is not to be reported at all */
+
+	newedata = &t_thrd.log_cxt.errordata[t_thrd.log_cxt.errordata_stack_depth];
+	t_thrd.log_cxt.recursion_depth++;
+	oldcontext =  MemoryContextSwitchTo(ErrorContext);
+
+	/* Copy the supplied fields to the error stack entry. */
+	if (edata->sqlerrcode != 0)
+		newedata->sqlerrcode = edata->sqlerrcode;
+	if (edata->message)
+		newedata->message = pstrdup(edata->message);
+	if (edata->detail)
+		newedata->detail = pstrdup(edata->detail);
+	if (edata->detail_log)
+		newedata->detail_log = pstrdup(edata->detail_log);
+	if (edata->hint)
+		newedata->hint = pstrdup(edata->hint);
+	if (edata->context)
+		newedata->context = pstrdup(edata->context);
+	/* assume message_id is not available */
+    if (newedata->filename)
+        newedata->filename = pstrdup(edata->filename);
+    if (newedata->funcname)
+        newedata->funcname = pstrdup(edata->funcname);
+    if (newedata->backtrace_log)
+        newedata->backtrace_log = pstrdup(edata->backtrace_log);
+
+	newedata->cursorpos = edata->cursorpos;
+	newedata->internalpos = edata->internalpos;
+	if (edata->internalquery)
+		newedata->internalquery = pstrdup(edata->internalquery);
+
+	MemoryContextSwitchTo(oldcontext);
+	t_thrd.log_cxt.recursion_depth--;
+
+	/* Process the error. */
+	errfinish(0);
+}
+
 /*
  * ReThrowError --- re-throw a previously copied error
  *
diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp
index 8eb2842083..f19f3bd2ef 100755
--- a/src/common/backend/utils/init/miscinit.cpp
+++ b/src/common/backend/utils/init/miscinit.cpp
@@ -728,6 +728,7 @@ void InitializeSessionUserId(const char* role_name, Oid role_id)
 {
     HeapTuple role_tup;
     Form_pg_authid rform;
+    //Oid role_id;
     char* rname = NULL;
     /* Audit user login */
     char details[PGAUDIT_MAXLENGTH];
@@ -763,14 +764,16 @@ void InitializeSessionUserId(const char* role_name, Oid role_id)
         role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id));
         if (!HeapTupleIsValid(role_tup)) {
             ereport(FATAL,
-                (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
-                    errmsg("role with OID %u does not exist", role_id)));
+                    (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
+                     errmsg("role with OID %u does not exist", role_id)));
         }
     }
 
     rform = (Form_pg_authid)GETSTRUCT(role_tup);
     role_id = HeapTupleGetOid(role_tup);
     rname = NameStr(rform->rolname);
+    ereport(LOG,
+            (errmsg("InitializeSessionUserId role name: %s with OID %u", rname, role_id)));
 
     u_sess->misc_cxt.AuthenticatedUserId = role_id;
     u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper;
diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp
index 000be09fb8..ed6d7c0956 100644
--- a/src/common/backend/utils/misc/guc.cpp
+++ b/src/common/backend/utils/misc/guc.cpp
@@ -17876,7 +17876,7 @@ static Size EstimateVariableSize(const struct config_generic* gconf)
         }
 
         case PGC_STRING: {
-            const struct config_string* conf = (const struct config_string*)gconf;
+            const struct config_string *conf = (const struct config_string*)gconf;
             /*
              * If the value is NULL, we transmit it as an empty string.
              * Although this is not physically the same value, GUC
@@ -17927,7 +17927,7 @@ static Size EstimateVariableSize(const struct config_generic* gconf)
 Size EstimateGUCStateSpace(void)
 {
     Size size;
-    int i;
+    int	i;
 
     /* Add space reqd for saving the data size of the guc state */
     size = sizeof(Size);
@@ -18008,25 +18008,25 @@ static void SerializeVariable(char** destptr, Size& maxbytes, const struct confi
 
     switch (gconf->vartype) {
         case PGC_BOOL: {
-            const struct config_bool* conf = (const struct config_bool*)gconf;
+            const struct config_bool *conf = (const struct config_bool*)gconf;
             DoSerialize(destptr, maxbytes, (*conf->variable ? "true" : "false"));
             break;
         }
 
         case PGC_INT: {
-            const struct config_int* conf = (const struct config_int*)gconf;
+            const struct config_int *conf = (const struct config_int*)gconf;
             DoSerialize(destptr, maxbytes, "%d", *conf->variable);
             break;
         }
 
         case PGC_INT64: {
-            const struct config_int64* conf = (const struct config_int64*)gconf;
+            const struct config_int64 *conf = (const struct config_int64*)gconf;
             DoSerialize(destptr, maxbytes, "%ld", *conf->variable);
             break;
         }
 
         case PGC_REAL: {
-            const struct config_real* conf = (const struct config_real*)gconf;
+            const struct config_real *conf = (const struct config_real*)gconf;
             DoSerialize(destptr, maxbytes, "%.*e", REALTYPE_PRECISION, *conf->variable);
             break;
         }
@@ -18293,14 +18293,14 @@ void RestoreGUCState(char* gucstate)
 
         if (varsourcefile[0]) {
             ReadGucstateBinary(&srcptr, srcend,
-                               reinterpret_cast<char* >(&varsourceline), sizeof(varsourceline));
+                               reinterpret_cast<char*>(&varsourceline), sizeof(varsourceline));
         } else {
             varsourceline = 0;
         }
         ReadGucstateBinary(&srcptr, srcend,
-                           reinterpret_cast<char* >(&varsource), sizeof(varsource));
+                           reinterpret_cast<char*>(&varsource), sizeof(varsource));
         ReadGucstateBinary(&srcptr, srcend,
-                           reinterpret_cast<char* >(&varscontext), sizeof(varscontext));
+                           reinterpret_cast<char*>(&varscontext), sizeof(varscontext));
 
         result = set_config_option(varname, varvalue, varscontext, varsource,
                                    GUC_ACTION_SET, true, ERROR, true);
diff --git a/src/common/pl/plpgsql/src/gram.y b/src/common/pl/plpgsql/src/gram.y
index c8fb857bbb..e7a9331389 100755
--- a/src/common/pl/plpgsql/src/gram.y
+++ b/src/common/pl/plpgsql/src/gram.y
@@ -176,6 +176,7 @@ static	void			 check_labels(const char *start_label,
 static	PLpgSQL_expr	*read_cursor_args(PLpgSQL_var *cursor,
                                           int until, const char *expected);
 static	List			*read_raise_options(void);
+static  bool            last_pragma;
 
 %}
 
@@ -213,6 +214,7 @@ static	List			*read_raise_options(void);
             char *label;
             int  n_initvars;
             int  *initvarnos;
+            bool autonomous;
         }						declhdr;
         struct
         {
@@ -399,6 +401,7 @@ static	List			*read_raise_options(void);
 %token <keyword>	K_PG_EXCEPTION_CONTEXT
 %token <keyword>	K_PG_EXCEPTION_DETAIL
 %token <keyword>	K_PG_EXCEPTION_HINT
+%token <keyword>	K_PRAGMA
 %token <keyword>	K_PRIOR
 %token <keyword>	K_QUERY
 %token <keyword>	K_RAISE
@@ -477,6 +480,7 @@ pl_block		: decl_sect K_BEGIN proc_sect exception_sect K_END opt_label
                         newp->cmd_type	= PLPGSQL_STMT_BLOCK;
                         newp->lineno		= plpgsql_location_to_lineno(@2);
                         newp->label		= $1.label;
+                        newp->autonomous = $1.autonomous;
                         newp->n_initvars = $1.n_initvars;
                         newp->initvarnos = $1.initvarnos;
                         newp->body		= $3;
@@ -500,6 +504,7 @@ decl_sect		: opt_block_label
                         $$.label	  = $1;
                         $$.n_initvars = 0;
                         $$.initvarnos = NULL;
+                        $$.autonomous = false;
                     }
                 | opt_block_label decl_start
                     {
@@ -507,6 +512,7 @@ decl_sect		: opt_block_label
                         $$.label	  = $1;
                         $$.n_initvars = 0;
                         $$.initvarnos = NULL;
+                        $$.autonomous = false;
                     }
                 | opt_block_label decl_start decl_stmts
                     {
@@ -514,6 +520,8 @@ decl_sect		: opt_block_label
                         $$.label	  = $1;
                         /* Remember variables declared in decl_stmts */
                         $$.n_initvars = plpgsql_add_initdatums(&($$.initvarnos));
+                        $$.autonomous = last_pragma;
+                        last_pragma = false;
                     }
                 ;
 
@@ -521,6 +529,7 @@ decl_start		: K_DECLARE
                     {
                         /* Forget any variables created before block */
                         plpgsql_add_initdatums(NULL);
+                        last_pragma = false;
                         /*
                          * Disable scanner lookup of identifiers while
                          * we process the decl_stmts
@@ -720,6 +729,13 @@ decl_statement	: decl_varname decl_const decl_datatype decl_collate decl_notnull
                                      errmsg("build variable failed")));
                         pfree_ext($1.name);
                     }
+				| K_PRAGMA any_identifier ';'
+					{
+						if (pg_strcasecmp($2, "autonomous_transaction") == 0)
+							last_pragma = true;
+						else
+							elog(ERROR, "invalid pragma");
+					}
                 ;
 
 record_attr_list : record_attr
diff --git a/src/common/pl/plpgsql/src/pl_exec.cpp b/src/common/pl/plpgsql/src/pl_exec.cpp
index a18b898282..15eca2aa7f 100755
--- a/src/common/pl/plpgsql/src/pl_exec.cpp
+++ b/src/common/pl/plpgsql/src/pl_exec.cpp
@@ -1,6 +1,6 @@
 /* -------------------------------------------------------------------------
  *
- * pl_exec.c		- Executor for the PL/pgSQL
+ * pl_exec.cpp		- Executor for the PL/pgSQL
  *			  procedural language
  *
  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  src/pl/plpgsql/src/pl_exec.c
+ *	  src/pl/plpgsql/src/pl_exec.cpp
  *
  * -------------------------------------------------------------------------
  */
@@ -33,6 +33,7 @@
 #include "pgstat.h"
 #include "optimizer/clauses.h"
 #include "storage/proc.h"
+#include "tcop/autonomous.h"
 #include "tcop/tcopprot.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -197,6 +198,7 @@ static int check_line_validity_in_for_query(PLpgSQL_stmt_forq* stmt, int, int);
 static void bind_cursor_with_portal(Portal portal, PLpgSQL_execstate *estate, int varno);
 static char* transform_anonymous_block(char* query);
 static bool need_recompile_plan(SPIPlanPtr plan);
+static THR_LOCAL PLpgSQL_expr* sqlstmt = NULL;
 
 /* ----------
  * plpgsql_check_line_validity	Called by the debugger plugin for
@@ -1412,6 +1414,17 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block)
     bool savedIsStp = u_sess->SPI_cxt.is_stp;
     TransactionId oldTransactionId = InvalidTransactionId;
 
+    if (block->autonomous) {
+        if (estate->func->fn_is_trigger) {
+            ereport(ERROR,
+                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("Un-support feature"),
+                    errdetail("Trigger doesnot support autonomous transaction")));
+        } else {
+            estate->autonomous_session = AutonomousSessionStart();
+        }
+    }
+
     /*
      * First initialize all variables declared in this block
      */
@@ -1732,6 +1745,8 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block)
     }
 
     estate->err_text = NULL;
+	if (block->autonomous)
+		AutonomousSessionEnd(estate->autonomous_session);
 
     /*
      * Handle the return code.
@@ -3664,6 +3679,7 @@ static void plpgsql_estate_setup(PLpgSQL_execstate* estate, PLpgSQL_function* fu
     estate->rettupdesc = NULL;
     estate->exitlabel = NULL;
     estate->cur_error = NULL;
+    estate->autonomous_session = NULL;
 
     estate->tuple_store = NULL;
     estate->cursor_return_data = NULL;
@@ -3810,6 +3826,59 @@ static void exec_prepare_plan(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, int
     exec_simple_check_plan(expr);
 }
 
+static void build_symbol_table(PLpgSQL_execstate *estate,
+                               PLpgSQL_nsitem *ns_start,
+                               int *ret_nitems,
+                               const char ***ret_names,
+                               Oid **ret_types)
+{
+    PLpgSQL_nsitem *nsitem = NULL;
+    List *names = NIL;
+    List *types = NIL;
+    ListCell *lc1, *lc2;
+    int i, nitems;
+    const char **names_vector;
+    Oid *types_vector = NULL;
+
+    for (nsitem = ns_start; nsitem; nsitem = nsitem->prev) {
+        if (nsitem->itemtype == PLPGSQL_NSTYPE_VAR) {
+            PLpgSQL_datum *datum;
+            PLpgSQL_var *var;
+            Oid		typoid;
+            Value  *name;
+
+            if (strcmp(nsitem->name, "found") == 0)
+                continue;  // XXX
+            elog(LOG, "namespace item variable itemno %d, name %s",
+                nsitem->itemno, nsitem->name);
+            datum = estate->datums[nsitem->itemno];
+            Assert(datum->dtype == PLPGSQL_DTYPE_VAR);
+            var = (PLpgSQL_var *) datum;
+            name = makeString(nsitem->name);
+            typoid = var->datatype->typoid;
+            if (!list_member(names, name)) {
+                names = lappend(names, name);
+                types = lappend_oid(types, typoid);
+            }
+        }
+    }
+
+    Assert(list_length(names) == list_length(types));
+    nitems = list_length(names);
+    names_vector = (const char **)palloc(nitems * sizeof(char *));
+    types_vector = (Oid *)palloc(nitems * sizeof(Oid));
+    i = 0;
+    forboth(lc1, names, lc2, types) {
+        names_vector[i] = pstrdup(strVal(lfirst(lc1)));
+        types_vector[i] = lfirst_oid(lc2);
+        i++;
+    }
+
+    *ret_nitems = nitems;
+    *ret_names = names_vector;
+    *ret_types = types_vector;
+}
+
 /* ----------
  * exec_stmt_execsql			Execute an SQL statement (possibly with INTO).
  * ----------
@@ -3827,6 +3896,29 @@ static int exec_stmt_execsql(PLpgSQL_execstate* estate, PLpgSQL_stmt_execsql* st
         oldTransactionId = GetTopTransactionId();
     }
 
+    if (estate->autonomous_session) {
+        int nparams = 0;
+        int i;
+        const char **param_names = NULL;
+        Oid *param_types = NULL;
+        AutonomousPreparedStatement *astmt = NULL;
+        Datum *values = NULL;
+        bool *nulls = NULL;
+        AutonomousResult *aresult = NULL;
+        sqlstmt = stmt->sqlstmt;
+        build_symbol_table(estate, stmt->sqlstmt->ns, &nparams, &param_names, &param_types);
+        astmt = AutonomousSessionPrepare(estate->autonomous_session, stmt->sqlstmt->query, (int16)nparams, param_types, param_names);
+
+        values = (Datum *)palloc(nparams * sizeof(*values));
+        nulls = (bool *)palloc(nparams * sizeof(*nulls));
+        for (i = 0; i < nparams; i++) {
+            nulls[i] = true;
+        }
+        aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls);
+        exec_set_found(estate, (list_length(aresult->tuples) != 0));
+        return PLPGSQL_RC_OK;
+    }
+
     /*
      * On the first call for this statement generate the plan, and detect
      * whether the statement is INSERT/UPDATE/DELETE/MERGE
@@ -4240,6 +4332,12 @@ static int exec_stmt_dynexecute(PLpgSQL_execstate* estate, PLpgSQL_stmt_dynexecu
 
     exec_eval_cleanup(estate);
 
+	if (estate->autonomous_session)
+	{
+		(void *)AutonomousSessionExecute(estate->autonomous_session, querystr);
+		return PLPGSQL_RC_OK;
+	}    
+
     if (stmt->params != NULL) {
         stmt->ppd = (void*)exec_eval_using_params(estate, stmt->params);
     }
@@ -4984,6 +5082,37 @@ static int exec_stmt_null(PLpgSQL_execstate* estate, PLpgSQL_stmt* stmt)
  */
 static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt)
 {
+    if (estate->autonomous_session) {
+        if (sqlstmt) {
+            int nparams = 0;
+            int i;
+            const char **param_names = NULL;
+            Oid *param_types = NULL;
+            AutonomousPreparedStatement *astmt = NULL;
+            Datum *values = NULL;
+            bool *nulls = NULL;
+            AutonomousResult *aresult = NULL;
+            ereport(LOG, (errmsg("query COMMIT")));
+            build_symbol_table(estate, sqlstmt->ns, &nparams, &param_names, &param_types);
+            astmt = AutonomousSessionPrepare(estate->autonomous_session, "COMMIT", (int16)nparams, param_types, param_names);
+
+            values = (Datum *)palloc(nparams * sizeof(*values));
+            nulls = (bool *)palloc(nparams * sizeof(*nulls));
+            for (i = 0; i < nparams; i++)
+            {
+                nulls[i] = true;
+            }
+            aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls);
+            exec_set_found(estate, (list_length(aresult->tuples) != 0));
+            sqlstmt = NULL;
+            return PLPGSQL_RC_OK;
+        } else {
+            ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("syntax error"),
+                errdetail("In antonomous transaction, commit/rollback must match start transaction")));
+        }
+    }
+
     const char* PORTAL = "Portal";
     int subTransactionCount = u_sess->SPI_cxt.portal_stp_exception_counter;
 
@@ -5046,6 +5175,37 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt
  */
 static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* stmt)
 {
+    if (estate->autonomous_session) {
+        if (sqlstmt) {
+            int nparams = 0;
+            int i;
+            const char **param_names = NULL;
+            Oid *param_types = NULL;
+            AutonomousPreparedStatement *astmt = NULL;
+            Datum *values = NULL;
+            bool *nulls = NULL;
+            AutonomousResult *aresult = NULL;
+            ereport(LOG, (errmsg("query ROLLBACK")));
+            build_symbol_table(estate, sqlstmt->ns, &nparams, &param_names, &param_types);
+            astmt = AutonomousSessionPrepare(estate->autonomous_session, "ROLLBACK", (int16)nparams, param_types, param_names);
+
+            values = (Datum *)palloc(nparams * sizeof(*values));
+            nulls = (bool *)palloc(nparams * sizeof(*nulls));
+            for (i = 0; i < nparams; i++)
+            {
+                nulls[i] = true;
+            }
+            aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls);
+            exec_set_found(estate, (list_length(aresult->tuples) != 0));
+            sqlstmt = NULL;
+            return PLPGSQL_RC_OK;
+        } else {
+            ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("syntax error"),
+                errdetail("In antonomous transaction, commit/rollback must match start transaction")));
+        }
+    }
+
     const char* PORTAL = "Portal";
     int subTransactionCount = u_sess->SPI_cxt.portal_stp_exception_counter;
 
diff --git a/src/common/pl/plpgsql/src/pl_scanner.cpp b/src/common/pl/plpgsql/src/pl_scanner.cpp
index 0e79839cab..37a10697ed 100755
--- a/src/common/pl/plpgsql/src/pl_scanner.cpp
+++ b/src/common/pl/plpgsql/src/pl_scanner.cpp
@@ -100,7 +100,8 @@ static const ScanKeyword unreserved_keywords[] = {
             UNRESERVED_KEYWORD) PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD) PG_KEYWORD("option", K_OPTION,
             UNRESERVED_KEYWORD) PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
             PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD) PG_KEYWORD("pg_exception_hint",
-                K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
+                K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) PG_KEYWORD("pragma", K_PRAGMA, UNRESERVED_KEYWORD) 
+                PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
                 PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD) PG_KEYWORD("record", K_RECORD, UNRESERVED_KEYWORD)
                     PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD) PG_KEYWORD("result_oid", K_RESULT_OID,
                         UNRESERVED_KEYWORD) PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
diff --git a/src/common/pl/plpgsql/src/plpgsql.h b/src/common/pl/plpgsql/src/plpgsql.h
index cc9d19f643..22c936e286 100755
--- a/src/common/pl/plpgsql/src/plpgsql.h
+++ b/src/common/pl/plpgsql/src/plpgsql.h
@@ -23,6 +23,7 @@
 #include "catalog/namespace.h"
 #include "commands/trigger.h"
 #include "executor/spi.h"
+#include "tcop/autonomous.h"
 
 /**********************************************************************
  * Definitions
@@ -382,6 +383,7 @@ typedef struct PLpgSQL_stmt_block { /* Block of statements			*/
     int cmd_type;
     int lineno;
     char* label;
+    bool autonomous;
     List* body; /* List of statements */
     int n_initvars;
     int* initvarnos;
@@ -775,7 +777,7 @@ typedef struct PLpgSQL_execstate { /* Runtime execution data	*/
     MemoryContext tuple_store_cxt;
     ResourceOwner tuple_store_owner;
     ReturnSetInfo* rsi;
-
+    AutonomousSession *autonomous_session;
     int found_varno;
 
     /*
diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp
index e04af32a11..8dad9eb7e1 100755
--- a/src/gausskernel/optimizer/commands/async.cpp
+++ b/src/gausskernel/optimizer/commands/async.cpp
@@ -328,7 +328,6 @@ static void asyncQueueReadAllNotifications(void);
 static bool asyncQueueProcessPageEntries(QueuePosition* current, const QueuePosition &stop, char* page_buffer);
 static void asyncQueueAdvanceTail(void);
 static void ProcessIncomingNotify(void);
-static void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid);
 static bool AsyncExistsPendingNotify(const char* channel, const char* payload);
 static void ClearPendingActionsAndNotifies(void);
 
@@ -1835,7 +1834,7 @@ static void ProcessIncomingNotify(void)
 /*
  * Send NOTIFY message to my front end.
  */
-static void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid)
+void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid)
 {
     if (t_thrd.postgres_cxt.whereToSendOutput == DestRemote) {
         StringInfoData buf;
diff --git a/src/gausskernel/optimizer/commands/prepare.cpp b/src/gausskernel/optimizer/commands/prepare.cpp
index 2c84b48673..d37f3c3d44 100755
--- a/src/gausskernel/optimizer/commands/prepare.cpp
+++ b/src/gausskernel/optimizer/commands/prepare.cpp
@@ -114,7 +114,7 @@ void PrepareQuery(PrepareStmt* stmt, const char* queryString)
      * Because parse analysis scribbles on the raw querytree, we must make a
      * copy to ensure we don't modify the passed-in tree.
      */
-    query = parse_analyze_varparams((Node*)copyObject(stmt->query), queryString, &argtypes, &nargs);
+    query = parse_analyze_varparams((Node*)copyObject(stmt->query), queryString, &argtypes, &nargs, NULL);
 
     /*
      * Check that all parameter types were determined.
diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp
index f07028fe04..4172da8487 100755
--- a/src/gausskernel/optimizer/commands/variable.cpp
+++ b/src/gausskernel/optimizer/commands/variable.cpp
@@ -686,11 +686,13 @@ bool check_mix_replication_param(bool* newval, void** extra, GucSource source)
 /*
  * SET CLIENT_ENCODING
  */
+void (*check_client_encoding_hook)(void);
 bool check_client_encoding(char** newval, void** extra, GucSource source)
 {
     int encoding;
     const char* canonical_name = NULL;
-
+	if (check_client_encoding_hook)
+		check_client_encoding_hook();
     /* Look up the encoding by name */
     encoding = pg_valid_client_encoding(*newval);
     if (encoding < 0) {
diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp
index 580bf35a5c..d38484b433 100644
--- a/src/gausskernel/process/postmaster/bgworker.cpp
+++ b/src/gausskernel/process/postmaster/bgworker.cpp
@@ -28,6 +28,7 @@
 #include "storage/procsignal.h"
 #include "storage/shmem.h"
 #include "tcop/tcopprot.h"
+#include "tcop/autonomous.h"
 #include "utils/ascii.h"
 #include "utils/ps_status.h"
 #include "utils/postinit.h"
@@ -110,6 +111,10 @@ static const struct {
 } InternalBGWorkers[] =
 
 {
+    {
+        "autonomous_worker_main",
+        autonomous_worker_main
+    }
 };
 
 /* Private functions. */
diff --git a/src/gausskernel/process/tcop/Makefile b/src/gausskernel/process/tcop/Makefile
index 8cd077bf23..8d68139eb3 100755
--- a/src/gausskernel/process/tcop/Makefile
+++ b/src/gausskernel/process/tcop/Makefile
@@ -29,7 +29,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
     endif
   endif
 endif
-OBJS= stmt_retry.o dest.o fastpath.o postgres.o pquery.o utility.o auditfuncs.o
+OBJS= autonomous.o stmt_retry.o dest.o fastpath.o postgres.o pquery.o utility.o auditfuncs.o
 
 ifneq (,$(filter $(PORTNAME),cygwin win32))
 override CPPFLAGS += -fPIC -DWIN32_STACK_RLIMIT=$(WIN32_STACK_RLIMIT)
diff --git a/src/gausskernel/process/tcop/autonomous.cpp b/src/gausskernel/process/tcop/autonomous.cpp
new file mode 100644
index 0000000000..44b25d1d5d
--- /dev/null
+++ b/src/gausskernel/process/tcop/autonomous.cpp
@@ -0,0 +1,857 @@
+/*--------------------------------------------------------------------------
+ *
+ * autonomous.cpp
+ *        Run SQL commands using a background worker.
+ *
+ * Copyright (C) 2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *        src/gausskernel/process/tcop/autonomous.cpp
+ *
+ *
+ * This implements a C API to open an autonomous session and run SQL queries
+ * in it.  The session looks much like a normal database connection, but it is
+ * always to the same database, and there is no authentication needed.  The
+ * "backend" for that connection is a background worker.  The normal backend
+ * and the autonomous session worker communicate over the normal FE/BE
+ * protocol.
+ *
+ * Types:
+ *
+ * AutonomousSession -- opaque connection handle
+ * AutonomousPreparedStatement -- opaque prepared statement handle
+ * AutonomousResult -- query result
+ *
+ * Functions:
+ *
+ * AutonomousSessionStart() -- start a session (launches background worker)
+ * and return a handle
+ *
+ * AutonomousSessionEnd() -- close session and free resources
+ *
+ * AutonomousSessionExecute() -- run SQL string and return result (rows or
+ * status)
+ *
+ * AutonomousSessionPrepare() -- prepare an SQL string for subsequent
+ * execution
+ *
+ * AutonomousSessionExecutePrepared() -- run prepared statement
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "gs_thread.h"
+
+#include "access/htup.h"
+#include "access/tupdesc.h"
+#include "access/xact.h"
+#include "commands/async.h"
+#include "commands/variable.h"
+#include "lib/stringinfo.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "libpq/pqmq.h"
+#include "libpq/pqsignal.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+#include "tcop/autonomous.h"
+#include "tcop/tcopprot.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+
+/* Table-of-contents constants for our dynamic shared memory segment. */
+#define AUTONOMOUS_MAGIC                0x50674267
+
+#define AUTONOMOUS_KEY_FIXED_DATA        0
+#define AUTONOMOUS_KEY_GUC                1
+#define AUTONOMOUS_KEY_COMMAND_QUEUE    2
+#define AUTONOMOUS_KEY_RESPONSE_QUEUE    3
+#define AUTONOMOUS_NKEYS                4
+
+#define AUTONOMOUS_QUEUE_SIZE            16384
+
+/* Fixed-size data passed via our dynamic shared memory segment. */
+struct autonomous_session_fixed_data {
+    Oid database_id;
+    Oid authenticated_user_id;
+    Oid current_user_id;
+    int sec_context;
+};
+
+struct AutonomousSession {
+    char *seg;
+    BackgroundWorkerHandle *worker_handle;
+    shm_mq_handle *command_qh;
+    shm_mq_handle *response_qh;
+    int transaction_status;
+};
+
+struct AutonomousPreparedStatement {
+    AutonomousSession *session;
+    Oid *argtypes;
+    TupleDesc tupdesc;
+};
+
+static void shm_mq_receive_stringinfo(shm_mq_handle *qh, StringInfoData *msg);
+static void autonomous_check_client_encoding_hook(void);
+static TupleDesc TupleDesc_from_RowDescription(StringInfo msg);
+static HeapTuple HeapTuple_from_DataRow(TupleDesc tupdesc, StringInfo msg);
+static void forward_NotifyResponse(StringInfo msg);
+static void rethrow_errornotice(StringInfo msg);
+static void invalid_protocol_message(char msgtype);
+
+AutonomousSession * AutonomousSessionStart(void)
+{
+    BackgroundWorker worker = {0};
+    ThreadId pid;
+    AutonomousSession *session = NULL;
+    shm_toc_estimator e;
+    Size segsize;
+    Size guc_len;
+    char *gucstate = NULL;
+    char *seg = NULL;
+    shm_toc *toc = NULL;
+    autonomous_session_fixed_data *fdata = NULL;
+    shm_mq *command_mq = NULL;
+    shm_mq *response_mq = NULL;
+    BgwHandleStatus bgwstatus;
+    StringInfoData msg;
+    char msgtype;
+    errno_t rc;
+
+    session = (AutonomousSession *)palloc(sizeof(*session));
+
+    shm_toc_initialize_estimator(&e);
+    shm_toc_estimate_chunk(&e, sizeof(autonomous_session_fixed_data));
+    shm_toc_estimate_chunk(&e, AUTONOMOUS_QUEUE_SIZE);
+    shm_toc_estimate_chunk(&e, AUTONOMOUS_QUEUE_SIZE);
+    guc_len = EstimateGUCStateSpace();
+    shm_toc_estimate_chunk(&e, guc_len);
+    shm_toc_estimate_keys(&e, AUTONOMOUS_NKEYS);
+    segsize = shm_toc_estimate(&e);
+    seg = (char *)palloc(sizeof(char) * segsize);
+
+    session->seg = seg;
+
+    toc = shm_toc_create(AUTONOMOUS_MAGIC, seg, segsize);
+
+    /* Store fixed-size data in dynamic shared memory. */
+    fdata = (autonomous_session_fixed_data *)shm_toc_allocate(toc, sizeof(*fdata));
+    fdata->database_id = u_sess->proc_cxt.MyDatabaseId;
+    fdata->authenticated_user_id = GetAuthenticatedUserId();
+    GetUserIdAndSecContext(&fdata->current_user_id, &fdata->sec_context);
+    shm_toc_insert(toc, AUTONOMOUS_KEY_FIXED_DATA, fdata);
+
+    /* Store GUC state in dynamic shared memory. */
+    gucstate = (char *)shm_toc_allocate(toc, guc_len);
+    SerializeGUCState(guc_len, gucstate);
+    shm_toc_insert(toc, AUTONOMOUS_KEY_GUC, gucstate);
+
+    command_mq = shm_mq_create(shm_toc_allocate(toc, AUTONOMOUS_QUEUE_SIZE),
+                               AUTONOMOUS_QUEUE_SIZE);
+    shm_toc_insert(toc, AUTONOMOUS_KEY_COMMAND_QUEUE, command_mq);
+    shm_mq_set_sender(command_mq, t_thrd.proc);
+
+    response_mq = shm_mq_create(shm_toc_allocate(toc, AUTONOMOUS_QUEUE_SIZE),
+                                AUTONOMOUS_QUEUE_SIZE);
+    shm_toc_insert(toc, AUTONOMOUS_KEY_RESPONSE_QUEUE, response_mq);
+    shm_mq_set_receiver(response_mq, t_thrd.proc);
+
+    session->command_qh = shm_mq_attach(command_mq, seg, NULL);
+    session->response_qh = shm_mq_attach(response_mq, seg, NULL);
+
+    worker.bgw_flags =
+        BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
+    worker.bgw_start_time = BgWorkerStart_ConsistentState;
+    worker.bgw_restart_time = BGW_NEVER_RESTART;
+    rc = snprintf_s(worker.bgw_library_name, BGW_MAXLEN, BGW_MAXLEN, "postgres");
+    securec_check_ss(rc, "\0", "\0");
+    rc = snprintf_s(worker.bgw_function_name, BGW_MAXLEN, BGW_MAXLEN, "autonomous_worker_main");
+    securec_check_ss(rc, "\0", "\0");
+    rc = snprintf_s(worker.bgw_name, BGW_MAXLEN, BGW_MAXLEN, "autonomous session by PID %lu", 
+                    t_thrd.proc_cxt.MyProcPid);
+    securec_check_ss(rc, "\0", "\0");
+    worker.bgw_main_arg = PointerGetDatum(seg);
+    worker.bgw_notify_pid = t_thrd.proc_cxt.MyProcPid;
+
+    if (!RegisterDynamicBackgroundWorker(&worker, &session->worker_handle))
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                 errmsg("could not register background process"),
+                 errhint("You might need to increase max_background_workers.")));
+
+    shm_mq_set_handle(session->command_qh, session->worker_handle);
+    shm_mq_set_handle(session->response_qh, session->worker_handle);
+
+    bgwstatus = WaitForBackgroundWorkerStartup(session->worker_handle, &pid);
+    if (bgwstatus != BGWH_STARTED)
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                 errmsg("could not start background worker")));
+
+    do {
+        ereport(LOG, (errmsg("front begin receive msg")));
+        shm_mq_receive_stringinfo(session->response_qh, &msg);
+        ereport(LOG, (errmsg("front end receive msg")));
+        ereport(LOG, (errmsg("front function AutonomousSessionStart receive msg %s", msg.data)));
+        msgtype = pq_getmsgbyte(&msg);
+
+        switch (msgtype) {
+            case 'E':
+            case 'N':
+                rethrow_errornotice(&msg);
+                break;
+            case 'Z':
+                session->transaction_status = pq_getmsgbyte(&msg);
+                pq_getmsgend(&msg);
+                break;
+            default:
+                invalid_protocol_message(msgtype);
+                break;
+        }
+    }
+    while (msgtype != 'Z');
+
+    return session;
+}
+
+void AutonomousSessionEnd(AutonomousSession *session)
+{
+    StringInfoData msg;
+    BgwHandleStatus bgwstatus;
+    if (session->transaction_status == 'T')
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("autonomous session ended with transaction block open")));
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'X');
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+    bgwstatus = WaitForBackgroundWorkerShutdown(session->worker_handle);
+    if (bgwstatus != BGWH_STOPPED)
+        ereport(ERROR,
+                (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                 errmsg("could not stop background worker")));
+    pfree(session->worker_handle);
+    pfree(session->seg);
+    pfree(session);
+}
+
+AutonomousResult *AutonomousSessionExecute(AutonomousSession *session, const char *sql)
+{
+    StringInfoData msg;
+    char        msgtype;
+    AutonomousResult *result = NULL;
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'Q');
+    pq_sendstring(&msg, sql);
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+
+    result = (AutonomousResult *)palloc0(sizeof(*result));
+
+    do {
+        shm_mq_receive_stringinfo(session->response_qh, &msg);
+        ereport(LOG, (errmsg("front function AutonomousSessionExecute receive msg %s", msg.data)));
+        msgtype = pq_getmsgbyte(&msg);
+
+        switch (msgtype) {
+            case 'A':
+                forward_NotifyResponse(&msg);
+                break;
+            case 'C':
+                {
+                    const char *tag = pq_getmsgstring(&msg);
+                    result->command = pstrdup(tag);
+                    pq_getmsgend(&msg);
+                    break;
+                }
+            case 'D':
+                if (!result->tupdesc)
+                    elog(ERROR, "no T before D");
+                result->tuples = lappend(result->tuples, HeapTuple_from_DataRow(result->tupdesc, &msg));
+                pq_getmsgend(&msg);
+                break;
+            case 'E':
+            case 'N':
+                rethrow_errornotice(&msg);
+                break;
+            case 'T':
+                if (result->tupdesc)
+                    elog(ERROR, "already received a T message");
+                result->tupdesc = TupleDesc_from_RowDescription(&msg);
+                pq_getmsgend(&msg);
+                break;
+            case 'Z':
+                session->transaction_status = pq_getmsgbyte(&msg);
+                pq_getmsgend(&msg);
+                break;
+            default:
+                invalid_protocol_message(msgtype);
+                break;
+        }
+    }
+    while (msgtype != 'Z');
+    return result;
+}
+
+AutonomousPreparedStatement *AutonomousSessionPrepare(AutonomousSession *session, const char *sql, int16 nargs,
+                                                      Oid argtypes[], const char *argnames[])
+{
+    AutonomousPreparedStatement *result = NULL;
+    StringInfoData msg;
+    int16        i;
+    char        msgtype;
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'P');
+    pq_sendstring(&msg, "");
+    pq_sendstring(&msg, sql);
+    pq_sendint16(&msg, (uint16)nargs);
+    for (i = 0; i < nargs; i++)
+        pq_sendint32(&msg, (uint32)argtypes[i]);
+    if (argnames)
+        for (i = 0; i < nargs; i++)
+            pq_sendstring(&msg, argnames[i]);
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+
+    result = (AutonomousPreparedStatement *)palloc0(sizeof(*result));
+    result->session = session;
+    result->argtypes = (Oid *)palloc(nargs * sizeof(*result->argtypes));
+    errno_t rc;
+    rc = memcpy_s(result->argtypes, nargs * sizeof(*result->argtypes), argtypes, nargs * sizeof(*result->argtypes));
+    securec_check(rc, "\0", "\0");
+
+    shm_mq_receive_stringinfo(session->response_qh, &msg);
+    ereport(LOG, (errmsg("front function AutonomousSessionPrepare receive msg %s", msg.data)));
+    msgtype = pq_getmsgbyte(&msg);
+
+    switch (msgtype) {
+        case '1':
+            break;
+        case 'E':
+            rethrow_errornotice(&msg);
+            break;
+        default:
+            invalid_protocol_message(msgtype);
+            break;
+    }
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'D');
+    pq_sendbyte(&msg, 'S');
+    pq_sendstring(&msg, "");
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+
+    do {
+        shm_mq_receive_stringinfo(session->response_qh, &msg);
+        ereport(LOG, (errmsg("front function AutonomousSessionPrepare receive msg %s", msg.data)));
+        msgtype = pq_getmsgbyte(&msg);
+
+        switch (msgtype) {
+            case 'A':
+                forward_NotifyResponse(&msg);
+                break;
+            case 'E':
+                rethrow_errornotice(&msg);
+                break;
+            case 'n':
+                break;
+            case 't':
+                /* ignore for now */
+                break;
+            case 'T':
+                if (result->tupdesc)
+                    elog(ERROR, "already received a T message");
+                result->tupdesc = TupleDesc_from_RowDescription(&msg);
+                pq_getmsgend(&msg);
+                break;
+            default:
+                invalid_protocol_message(msgtype);
+                break;
+        }
+    }
+    while (msgtype != 'n' && msgtype != 'T');
+
+    return result;
+}
+
+AutonomousResult *AutonomousSessionExecutePrepared(AutonomousPreparedStatement *stmt, int16 nargs, 
+                                                   Datum *values, bool *nulls)
+{
+    AutonomousSession *session = NULL;
+    StringInfoData msg;
+    AutonomousResult *result = NULL;
+    char msgtype;
+    int16 i;
+
+    session = stmt->session;
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'B');
+    pq_sendstring(&msg, "");
+    pq_sendstring(&msg, "");
+    pq_sendint16(&msg, 1);  /* number of parameter format codes */
+    pq_sendint16(&msg, 1);
+    pq_sendint16(&msg, (uint16)nargs);  /* number of parameter values */
+    for (i = 0; i < nargs; i++) {
+        if (nulls[i])
+            pq_sendint32(&msg, -1);
+        else {
+            Oid typsend;
+            bool typisvarlena;
+            bytea *outputbytes = NULL;
+
+            getTypeBinaryOutputInfo(stmt->argtypes[i], &typsend, &typisvarlena);
+            outputbytes = OidSendFunctionCall(typsend, values[i]);
+            pq_sendint32(&msg, VARSIZE(outputbytes) - VARHDRSZ);
+            pq_sendbytes(&msg, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ);
+            pfree(outputbytes);
+        }
+    }
+    pq_sendint16(&msg, 1);  /* number of result column format codes */
+    pq_sendint16(&msg, 1);
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+
+    shm_mq_receive_stringinfo(session->response_qh, &msg);
+    ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data)));
+    msgtype = pq_getmsgbyte(&msg);
+
+    switch (msgtype) {
+        case '2':
+            break;
+        case 'E':
+            rethrow_errornotice(&msg);
+            break;
+        default:
+            invalid_protocol_message(msgtype);
+            break;
+    }
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_beginmessage(&msg, 'E');
+    pq_sendstring(&msg, "");
+    pq_sendint32(&msg, 0);
+    pq_endmessage(&msg);
+    pq_stop_redirect_to_shm_mq();
+
+    result = (AutonomousResult *)palloc0(sizeof(*result));
+    result->tupdesc = stmt->tupdesc;
+
+    do {
+        shm_mq_receive_stringinfo(session->response_qh, &msg);
+        ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data)));
+        msgtype = pq_getmsgbyte(&msg);
+
+        switch (msgtype) {
+            case 'A':
+                forward_NotifyResponse(&msg);
+                break;
+            case 'C':
+                {
+                    const char *tag = pq_getmsgstring(&msg);
+                    result->command = pstrdup(tag);
+                    pq_getmsgend(&msg);
+                    break;
+                }
+            case 'D':
+                if (!stmt->tupdesc)
+                    elog(ERROR, "did not expect any rows");
+                result->tuples = lappend(result->tuples, HeapTuple_from_DataRow(stmt->tupdesc, &msg));
+                pq_getmsgend(&msg);
+                break;
+            case 'E':
+            case 'N':
+                rethrow_errornotice(&msg);
+                break;
+            default:
+                invalid_protocol_message(msgtype);
+                break;
+        }
+    }
+    while (msgtype != 'C');
+
+    pq_redirect_to_shm_mq(session->command_qh);
+    pq_putemptymessage('S');
+    pq_stop_redirect_to_shm_mq();
+
+    shm_mq_receive_stringinfo(session->response_qh, &msg);
+    ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data)));
+    msgtype = pq_getmsgbyte(&msg);
+
+    switch (msgtype) {
+        case 'A':
+            forward_NotifyResponse(&msg);
+            break;
+        case 'Z':
+            session->transaction_status = pq_getmsgbyte(&msg);
+            pq_getmsgend(&msg);
+            break;
+        default:
+            invalid_protocol_message(msgtype);
+            break;
+    }
+
+    return result;
+}
+
+void autonomous_worker_main(Datum main_arg)
+{
+    char *seg = NULL;
+    shm_toc *toc = NULL;
+    autonomous_session_fixed_data *fdata = NULL;
+    char *gucstate = NULL;
+    shm_mq *command_mq = NULL;
+    shm_mq *response_mq = NULL;
+    shm_mq_handle *command_qh = NULL;
+    shm_mq_handle *response_qh = NULL;
+    StringInfoData msg;
+
+    char msgtype;
+
+    (void)gspqsignal(SIGTERM, die);
+    BackgroundWorkerUnblockSignals();
+
+    /* Set up a memory context and resource owner. */
+    Assert(t_thrd.utils_cxt.CurrentResourceOwner == NULL);
+    t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "autonomous");
+    CurrentMemoryContext = AllocSetContextCreate(t_thrd.top_mem_cxt,
+                                                 "autonomous session",
+                                                 ALLOCSET_DEFAULT_MINSIZE,
+                                                 ALLOCSET_DEFAULT_INITSIZE,
+                                                 ALLOCSET_DEFAULT_MAXSIZE);
+
+    initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf));
+    seg = (char *)DatumGetPointer(main_arg);
+    if (seg == NULL)
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("could not map dynamic shared memory segment")));
+
+    toc = shm_toc_attach(AUTONOMOUS_MAGIC, seg);
+    if (toc == NULL)
+        ereport(ERROR,
+                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                 errmsg("bad magic number in dynamic shared memory segment")));
+
+    /* Find data structures in dynamic shared memory. */
+    fdata = (autonomous_session_fixed_data *)shm_toc_lookup(toc, AUTONOMOUS_KEY_FIXED_DATA);
+
+    gucstate = (char *)shm_toc_lookup(toc, AUTONOMOUS_KEY_GUC);
+
+    command_mq = (shm_mq *)shm_toc_lookup(toc, AUTONOMOUS_KEY_COMMAND_QUEUE);
+    shm_mq_set_receiver(command_mq, t_thrd.proc);
+    command_qh = shm_mq_attach(command_mq, seg, NULL);
+
+    response_mq = (shm_mq *)shm_toc_lookup(toc, AUTONOMOUS_KEY_RESPONSE_QUEUE);
+    shm_mq_set_sender(response_mq, t_thrd.proc);
+    response_qh = shm_mq_attach(response_mq, seg, NULL);
+
+    pq_redirect_to_shm_mq(response_qh);
+    BackgroundWorkerInitializeConnectionByOid(fdata->database_id,
+                                              fdata->authenticated_user_id);
+
+    (void)SetClientEncoding(GetDatabaseEncoding());
+
+    StartTransactionCommand();
+    RestoreGUCState(gucstate);
+    CommitTransactionCommand();
+
+    process_local_preload_libraries();
+
+    SetUserIdAndSecContext(fdata->current_user_id, fdata->sec_context);
+
+    t_thrd.postgres_cxt.whereToSendOutput = DestRemote;
+    ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput);
+
+    t_thrd.mem_cxt.msg_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt,
+                                           "MessageContext",
+                                           ALLOCSET_DEFAULT_MINSIZE,
+                                           ALLOCSET_DEFAULT_INITSIZE,
+                                           ALLOCSET_DEFAULT_MAXSIZE);
+
+    do {
+        (void)MemoryContextSwitchTo(t_thrd.mem_cxt.msg_mem_cxt);
+        MemoryContextResetAndDeleteChildren(t_thrd.mem_cxt.msg_mem_cxt);
+
+        ProcessCompletedNotifies();
+        pgstat_report_stat(false);
+        pgstat_report_activity(STATE_IDLE, NULL);
+
+        shm_mq_receive_stringinfo(command_qh, &msg);
+        ereport(LOG, (errmsg("bgworker receive msg %s", msg.data)));
+        msgtype = pq_getmsgbyte(&msg);
+
+        switch (msgtype) {
+            case 'B':
+                {
+                    SetCurrentStatementStartTimestamp();
+                    exec_bind_message(&msg);
+                    break;
+                }
+            case 'D':
+                {
+                    int describe_type;
+                    const char *describe_target;
+
+                    SetCurrentStatementStartTimestamp();
+
+                    describe_type = pq_getmsgbyte(&msg);
+                    describe_target = pq_getmsgstring(&msg);
+                    pq_getmsgend(&msg);
+
+                    switch (describe_type) {
+                        case 'S':
+                            exec_describe_statement_message(describe_target);
+                            break;
+#ifdef TODO
+                        case 'P':
+                            exec_describe_portal_message(describe_target);
+                            break;
+#endif
+                        default:
+                            ereport(ERROR,
+                                    (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                                     errmsg("invalid DESCRIBE message subtype %d",
+                                            describe_type)));
+                            break;
+                    }
+                }
+                break;
+            case 'E':
+                {
+                    const char *portal_name;
+                    int max_rows;
+
+                    SetCurrentStatementStartTimestamp();
+
+                    portal_name = pq_getmsgstring(&msg);
+                    max_rows = (int)pq_getmsgint(&msg, 4);
+                    pq_getmsgend(&msg);
+
+                    exec_execute_message(portal_name, max_rows);
+                }
+                break;
+
+            case 'P':
+                {
+                    const char *stmt_name;
+                    const char *query_string;
+                    uint16 numParams;
+                    Oid *paramTypes = NULL;
+                    char **paramTypeNames = NULL;
+
+                    SetCurrentStatementStartTimestamp();
+
+                    stmt_name = pq_getmsgstring(&msg);
+                    query_string = pq_getmsgstring(&msg);
+                    numParams = pq_getmsgint(&msg, 2);
+                    if (numParams > 0) {
+                        int i;
+
+                        paramTypes = (Oid *)palloc(numParams * sizeof(Oid));
+                        for (i = 0; i < numParams; i++)
+                            paramTypes[i] = pq_getmsgint(&msg, 4);
+                    }
+                    /* If data left in message, read parameter names. */
+                    if (msg.cursor != msg.len) {
+                        int i;
+
+                        paramTypeNames = (char **)palloc(numParams * sizeof(char *));
+                        for (i = 0; i < numParams; i++)
+                            paramTypeNames[i] = (char *)pq_getmsgstring(&msg);
+                    }
+                    pq_getmsgend(&msg);
+
+                    exec_parse_message(query_string, stmt_name, paramTypes, paramTypeNames, (int)numParams);
+                    break;
+                }
+            case 'Q':
+                {
+                    const char *sql;
+                    int save_log_statement;
+                    bool save_log_duration;
+                    int save_log_min_duration_statement;
+
+                    sql = pq_getmsgstring(&msg);
+                    pq_getmsgend(&msg);
+
+                    /* XXX room for improvement */
+                    save_log_statement = u_sess->attr.attr_common.log_statement;
+                    save_log_duration = u_sess->attr.attr_sql.log_duration;
+                    save_log_min_duration_statement = u_sess->attr.attr_storage.log_min_duration_statement;
+
+                    check_client_encoding_hook = autonomous_check_client_encoding_hook;
+                    u_sess->attr.attr_common.log_statement = LOGSTMT_NONE;
+                    u_sess->attr.attr_sql.log_duration = false;
+                    u_sess->attr.attr_storage.log_min_duration_statement = -1;
+
+                    SetCurrentStatementStartTimestamp();
+                    exec_simple_query(sql, QUERY_MESSAGE);
+
+                    u_sess->attr.attr_common.log_statement = save_log_statement;
+                    u_sess->attr.attr_sql.log_duration = save_log_duration;
+                    u_sess->attr.attr_storage.log_min_duration_statement = save_log_min_duration_statement;
+                    check_client_encoding_hook = NULL;
+
+                    ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput);
+                    break;
+                }
+            case 'S':
+                {
+                    pq_getmsgend(&msg);
+                    finish_xact_command();
+                    ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput);
+                    break;
+                }
+            case 'X':
+                break;
+            default:
+                ereport(ERROR,
+                        (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                         errmsg("invalid protocol message type from autonomous session leader: %c",
+                                msgtype)));
+                break;
+        }
+    }
+    while (msgtype != 'X');
+}
+
+static void shm_mq_receive_stringinfo(shm_mq_handle *qh, StringInfoData *msg)
+{
+    shm_mq_result res;
+    Size nbytes = 0;
+    void *data = NULL;
+
+    res = shm_mq_receive(qh, &nbytes, &data, false);
+    if (res != SHM_MQ_SUCCESS)
+        elog(ERROR, "shm_mq_receive failed: %d", res);
+    initStringInfo(msg);
+    appendBinaryStringInfo(msg, (const char*)data, (int)nbytes);
+}
+
+static void autonomous_check_client_encoding_hook(void)
+{
+    elog(ERROR, "cannot set client encoding in autonomous session");
+}
+
+static TupleDesc TupleDesc_from_RowDescription(StringInfo msg)
+{
+    TupleDesc tupdesc;
+    int16 natts = pq_getmsgint(msg, 2);
+    int16 i;
+
+    tupdesc = CreateTemplateTupleDesc(natts, false);
+    for (i = 0; i < natts; i++) {
+        const char *colname;
+        Oid type_oid;
+        uint32 typmod;
+        uint16 format;
+
+        colname = pq_getmsgstring(msg);
+        (void) pq_getmsgint(msg, 4);   /* table OID */
+        (void) pq_getmsgint(msg, 2);   /* table attnum */
+        type_oid = pq_getmsgint(msg, 4);
+        (void) pq_getmsgint(msg, 2);   /* type length */
+        typmod = pq_getmsgint(msg, 4);
+        format = pq_getmsgint(msg, 2);
+        (void) format;
+#ifdef TODO
+        /* XXX The protocol sometimes sends 0 (text) if the format is not
+         * determined yet.  We always use binary, so this check is probably
+         * not useful. */
+        if (format != 1)
+            elog(ERROR, "format must be binary");
+#endif
+
+        TupleDescInitEntry(tupdesc, i + 1, colname, type_oid, typmod, 0);
+    }
+    return tupdesc;
+}
+
+static HeapTuple HeapTuple_from_DataRow(TupleDesc tupdesc, StringInfo msg)
+{
+    int16 natts = pq_getmsgint(msg, 2);
+    int16 i;
+    Datum *values;
+    bool *nulls;
+    StringInfoData buf;
+
+    Assert(tupdesc);
+
+    if (natts != tupdesc->natts)
+        elog(ERROR, "malformed DataRow");
+
+    values = (Datum *)palloc(natts * sizeof(*values));
+    nulls = (bool *)palloc(natts * sizeof(*nulls));
+    initStringInfo(&buf);
+
+    for (i = 0; i < natts; i++) {
+        int32 len = pq_getmsgint(msg, 4);
+
+        if (len < 0)
+            nulls[i] = true;
+        else {
+            Oid recvid;
+            Oid typioparams;
+
+            nulls[i] = false;
+
+            getTypeBinaryInputInfo(tupdesc->attrs[i]->atttypid,
+                                   &recvid,
+                                   &typioparams);
+            resetStringInfo(&buf);
+            appendBinaryStringInfo(&buf, pq_getmsgbytes(msg, len), len);
+            values[i] = OidReceiveFunctionCall(recvid, &buf, typioparams,
+                                               tupdesc->attrs[i]->atttypmod);
+        }
+    }
+
+    return heap_form_tuple(tupdesc, values, nulls);
+}
+
+static void forward_NotifyResponse(StringInfo msg)
+{
+    int32 pid;
+    const char *channel;
+    const char *payload;
+
+    pid = (int32)pq_getmsgint(msg, 4);
+    channel = pq_getmsgrawstring(msg);
+    payload = pq_getmsgrawstring(msg);
+    pq_endmessage(msg);
+
+    NotifyMyFrontEnd(channel, payload, pid);
+}
+
+
+static void rethrow_errornotice(StringInfo msg)
+{
+    ErrorData edata;
+
+    pq_parse_errornotice(msg, &edata);
+    edata.elevel = Min(edata.elevel, ERROR);
+    ThrowErrorData(&edata);
+}
+
+
+static void invalid_protocol_message(char msgtype)
+{
+    ereport(ERROR,
+            (errcode(ERRCODE_PROTOCOL_VIOLATION),
+             errmsg("invalid protocol message type from autonomous session: %c",
+                    msgtype)));
+}
+
diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp
index 3a3c8b119f..737217687d 100755
--- a/src/gausskernel/process/tcop/postgres.cpp
+++ b/src/gausskernel/process/tcop/postgres.cpp
@@ -203,7 +203,7 @@ static void get_query_result(TupleTableSlot* slot, DestReceiver* self);
  * @hdfs
  * Define different mesage type used for exec_simple_query
  */
-typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType;
+//typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType;
 
 /* ----------------------------------------------------------------
  *		decls for routines only used in this file
@@ -235,6 +235,8 @@ extern void CancelAutoAnalyze();
 extern List* RevalidateCachedQuery(CachedPlanSource* plansource);
 static void InitRecursiveCTEGlobalVariables(const PlannedStmt* planstmt);
 
+THR_LOCAL bool needEnd = true;
+
 bool StreamThreadAmI()
 {
     return (t_thrd.role == STREAM_WORKER);
@@ -1874,7 +1876,7 @@ void exec_init_poolhandles(void)
  * hybridmesage, this parameter will be set to 1 to tell us the normal query string
  * followed by information string. query_string = normal querystring + message.
  */
-static void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg = NULL)
+void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg)
 {
     CommandDest dest = (CommandDest)t_thrd.postgres_cxt.whereToSendOutput;
     MemoryContext oldcontext;
@@ -2883,7 +2885,7 @@ static void exec_plan_with_params(StringInfo input_message)
  * If paramTypeNames is specified, paraTypes is filled with corresponding OIDs.
  * The caller is expected to allocate space for the paramTypes.
  */
-static void exec_parse_message(const char* query_string, /* string to execute */
+void exec_parse_message(const char* query_string,        /* string to execute */
     const char* stmt_name,                               /* name for prepared stmt */
     Oid* paramTypes,                                     /* parameter types */
     char** paramTypeNames,                               /* parameter type names */
@@ -3104,7 +3106,7 @@ static void exec_parse_message(const char* query_string, /* string to execute */
         if (u_sess->attr.attr_common.log_parser_stats)
             ResetUsage();
 
-        query = parse_analyze_varparams(raw_parse_tree, query_string, &paramTypes, &numParams);
+        query = parse_analyze_varparams(raw_parse_tree, query_string, &paramTypes, &numParams, paramTypeNames);
 
         /* check cross engine queries  */
         StorageEngineType storageEngineType = SE_TYPE_UNSPECIFIED;
@@ -3766,7 +3768,7 @@ static void exec_get_ddl_params(StringInfo input_message)
  *
  * Process a "Bind" message to create a portal from a prepared statement
  */
-static void exec_bind_message(StringInfo input_message)
+void exec_bind_message(StringInfo input_message)
 {
     const char* portal_name = NULL;
     const char* stmt_name = NULL;
@@ -4325,7 +4327,7 @@ static void exec_bind_message(StringInfo input_message)
  *
  * Process an "Execute" message for a portal
  */
-static void exec_execute_message(const char* portal_name, long max_rows)
+void exec_execute_message(const char* portal_name, long max_rows)
 {
     CommandDest dest;
     DestReceiver* receiver = NULL;
@@ -4790,7 +4792,7 @@ static int errdetail_recovery_conflict(void)
  *
  * Process a "Describe" message for a prepared statement
  */
-static void exec_describe_statement_message(const char* stmt_name)
+void exec_describe_statement_message(const char* stmt_name)
 {
     CachedPlanSource* psrc = NULL;
     int i;
diff --git a/src/gausskernel/storage/ipc/Makefile b/src/gausskernel/storage/ipc/Makefile
index 010ea8de9a..4b09ca2ce3 100644
--- a/src/gausskernel/storage/ipc/Makefile
+++ b/src/gausskernel/storage/ipc/Makefile
@@ -17,6 +17,6 @@ ifneq "$(MAKECMDGOALS)" "clean"
   endif
 endif
 OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
-	sinval.o sinvaladt.o standby.o shm_mq.o
+	sinval.o sinvaladt.o standby.o shm_mq.o shm_toc.o
 
-include $(top_srcdir)/src/gausskernel/common.mk
+include $(top_srcdir)/src/gausskernel/common.mk
\ No newline at end of file
diff --git a/src/gausskernel/storage/ipc/shm_toc.cpp b/src/gausskernel/storage/ipc/shm_toc.cpp
new file mode 100644
index 0000000000..dcf6cbad31
--- /dev/null
+++ b/src/gausskernel/storage/ipc/shm_toc.cpp
@@ -0,0 +1,242 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_toc.cpp
+ *      shared memory segment table of contents
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/gausskernel/storage/ipc/shm_toc.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "storage/barrier.h"
+#include "storage/shm_toc.h"
+#include "storage/spin.h"
+
+struct shm_toc_entry
+{
+    uint64        key;            /* Arbitrary identifier */
+    uint64        offset;            /* Bytes offset */
+};
+
+struct shm_toc
+{
+    uint64        toc_magic;        /* Magic number for this TOC */
+    slock_t        toc_mutex;        /* Spinlock for mutual exclusion */
+    Size        toc_total_bytes;    /* Bytes managed by this TOC */
+    Size        toc_allocated_bytes;    /* Bytes allocated of those managed */
+    Size        toc_nentry;        /* Number of entries in TOC */
+    shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER];
+};
+
+/*
+ * Initialize a region of shared memory with a table of contents.
+ */
+shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes)
+{
+    shm_toc    *toc = (shm_toc *) address;
+
+    Assert(nbytes > offsetof(shm_toc, toc_entry));
+    toc->toc_magic = magic;
+    SpinLockInit(&toc->toc_mutex);
+    toc->toc_total_bytes = nbytes;
+    toc->toc_allocated_bytes = 0;
+    toc->toc_nentry = 0;
+
+    return toc;
+}
+
+/*
+ * Attach to an existing table of contents.  If the magic number found at
+ * the target address doesn't match our expectations, returns NULL.
+ */
+extern shm_toc *shm_toc_attach(uint64 magic, void *address)
+{
+    shm_toc    *toc = (shm_toc *) address;
+
+    if (toc->toc_magic != magic)
+        return NULL;
+
+    Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes);
+    Assert(toc->toc_total_bytes >= offsetof(shm_toc, toc_entry));
+
+    return toc;
+}
+
+/*
+ * Allocate shared memory from a segment managed by a table of contents.
+ *
+ * This is not a full-blown allocator; there's no way to free memory.  It's
+ * just a way of dividing a single physical shared memory segment into logical
+ * chunks that may be used for different purposes.
+ *
+ * We allocated backwards from the end of the segment, so that the TOC entries
+ * can grow forward from the start of the segment.
+ */
+extern void *shm_toc_allocate(shm_toc *toc, Size nbytes)
+{
+    volatile shm_toc *vtoc = toc;
+    Size        total_bytes;
+    Size        allocated_bytes;
+    Size        nentry;
+    Size        toc_bytes;
+
+    /* Make sure request is well-aligned. */
+    nbytes = BUFFERALIGN(nbytes);
+
+    SpinLockAcquire(&toc->toc_mutex);
+
+    total_bytes = vtoc->toc_total_bytes;
+    allocated_bytes = vtoc->toc_allocated_bytes;
+    nentry = vtoc->toc_nentry;
+    toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
+        + allocated_bytes;
+
+    /* Check for memory exhaustion and overflow. */
+    if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes)
+    {
+        SpinLockRelease(&toc->toc_mutex);
+        ereport(ERROR,
+                (errcode(ERRCODE_OUT_OF_MEMORY),
+                 errmsg("out of shared memory")));
+    }
+    vtoc->toc_allocated_bytes += nbytes;
+
+    SpinLockRelease(&toc->toc_mutex);
+
+    return ((char *) toc) + (total_bytes - allocated_bytes - nbytes);
+}
+
+/*
+ * Return the number of bytes that can still be allocated.
+ */
+extern Size shm_toc_freespace(shm_toc *toc)
+{
+    volatile shm_toc *vtoc = toc;
+    Size        total_bytes;
+    Size        allocated_bytes;
+    Size        nentry;
+    Size        toc_bytes;
+
+    SpinLockAcquire(&toc->toc_mutex);
+    total_bytes = vtoc->toc_total_bytes;
+    allocated_bytes = vtoc->toc_allocated_bytes;
+    nentry = vtoc->toc_nentry;
+    SpinLockRelease(&toc->toc_mutex);
+
+    toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry);
+    Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes);
+    return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes));
+}
+
+/*
+ * Insert a TOC entry.
+ *
+ * The idea here is that process setting up the shared memory segment will
+ * register the addresses of data structures within the segment using this
+ * function.  Each data structure will be identified using a 64-bit key, which
+ * is assumed to be a well-known or discoverable integer.  Other processes
+ * accessing the shared memory segment can pass the same key to
+ * shm_toc_lookup() to discover the addresses of those data structures.
+ *
+ * Since the shared memory segment may be mapped at different addresses within
+ * different backends, we store relative rather than absolute pointers.
+ *
+ * This won't scale well to a large number of keys.  Hopefully, that isn't
+ * necessary; if it proves to be, we might need to provide a more sophisticated
+ * data structure here.  But the real idea here is just to give someone mapping
+ * a dynamic shared memory the ability to find the bare minimum number of
+ * pointers that they need to bootstrap.  If you're storing a lot of stuff in
+ * here, you're doing it wrong.
+ */
+void
+shm_toc_insert(shm_toc *toc, uint64 key, void *address)
+{
+    volatile shm_toc *vtoc = toc;
+    uint64        total_bytes;
+    uint64        allocated_bytes;
+    uint64        nentry;
+    uint64        toc_bytes;
+    uint64        offset;
+
+    /* Relativize pointer. */
+    Assert(address > (void *) toc);
+    offset = ((char *) address) - (char *) toc;
+
+    SpinLockAcquire(&toc->toc_mutex);
+
+    total_bytes = vtoc->toc_total_bytes;
+    allocated_bytes = vtoc->toc_allocated_bytes;
+    nentry = vtoc->toc_nentry;
+    toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry)
+        + allocated_bytes;
+
+    /* Check for memory exhaustion and overflow. */
+    if (toc_bytes + sizeof(shm_toc_entry) > total_bytes ||
+        toc_bytes + sizeof(shm_toc_entry) < toc_bytes)
+    {
+        SpinLockRelease(&toc->toc_mutex);
+        ereport(ERROR,
+                (errcode(ERRCODE_OUT_OF_MEMORY),
+                 errmsg("out of shared memory")));
+    }
+
+    Assert(offset < total_bytes);
+    vtoc->toc_entry[nentry].key = key;
+    vtoc->toc_entry[nentry].offset = offset;
+
+    /*
+     * By placing a write barrier after filling in the entry and before
+     * updating the number of entries, we make it safe to read the TOC
+     * unlocked.
+     */
+    pg_write_barrier();
+
+    vtoc->toc_nentry++;
+
+    SpinLockRelease(&toc->toc_mutex);
+}
+
+/*
+ * Look up a TOC entry.
+ *
+ * Unlike the other functions in this file, this operation acquires no lock;
+ * it uses only barriers.  It probably wouldn't hurt concurrency very much even
+ * if it did get a lock, but since it's reasonably likely that a group of
+ * worker processes could each read a series of entries from the same TOC
+ * right around the same time, there seems to be some value in avoiding it.
+ */
+void *shm_toc_lookup(shm_toc *toc, uint64 key)
+{
+    uint64        nentry;
+    uint64        i;
+
+    /* Read the number of entries before we examine any entry. */
+    nentry = toc->toc_nentry;
+    pg_read_barrier();
+
+    /* Now search for a matching entry. */
+    for (i = 0; i < nentry; ++i)
+        if (toc->toc_entry[i].key == key)
+            return ((char *) toc) + toc->toc_entry[i].offset;
+
+    /* No matching entry was found. */
+    return NULL;
+}
+
+/*
+ * Estimate how much shared memory will be required to store a TOC and its
+ * dependent data structures.
+ */
+Size
+shm_toc_estimate(shm_toc_estimator *e)
+{
+    return add_size(offsetof(shm_toc, toc_entry),
+                 add_size(mul_size(e->number_of_keys, sizeof(shm_toc_entry)),
+                          e->space_for_chunks));
+}
+
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index a109973968..adb8e4b484 100755
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -27,6 +27,7 @@ extern Size AsyncShmemSize(void);
 extern void AsyncShmemInit(void);
 
 /* notify-related SQL statements */
+extern void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid);
 extern void Async_Notify(const char* channel, const char* payload);
 extern void Async_Listen(const char* channel);
 extern void Async_Unlisten(const char* channel);
diff --git a/src/include/commands/variable.h b/src/include/commands/variable.h
index 06e93e73fb..8cb4b1a963 100755
--- a/src/include/commands/variable.h
+++ b/src/include/commands/variable.h
@@ -28,6 +28,7 @@ extern bool check_transaction_deferrable(bool* newval, void** extra, GucSource s
 extern bool check_random_seed(double* newval, void** extra, GucSource source);
 extern void assign_random_seed(double newval, void* extra);
 extern const char* show_random_seed(void);
+extern void (*check_client_encoding_hook)(void);
 extern bool check_client_encoding(char** newval, void** extra, GucSource source);
 extern void assign_client_encoding(const char* newval, void* extra);
 extern bool check_mix_replication_param(bool* newval, void** extra, GucSource source);
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index acd6a56371..3c7ce32cb4 100755
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -24,7 +24,7 @@ extern THR_LOCAL PGDLLIMPORT post_parse_analyze_hook_type post_parse_analyze_hoo
 
 extern Query* parse_analyze(Node* parseTree, const char* sourceText, Oid* paramTypes, int numParams,
     bool isFirstNode = true, bool isCreateView = false);
-extern Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams);
+extern Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams, char** paramTypeNames);
 
 extern Query* parse_sub_analyze(Node* parseTree, ParseState* parentParseState, CommonTableExpr* parentCTE,
     bool locked_from_parent, bool resolve_unknowns);
diff --git a/src/include/parser/parse_param.h b/src/include/parser/parse_param.h
index 2798a0ad91..5f5788d802 100644
--- a/src/include/parser/parse_param.h
+++ b/src/include/parser/parse_param.h
@@ -16,7 +16,7 @@
 #include "parser/parse_node.h"
 
 extern void parse_fixed_parameters(ParseState* pstate, Oid* paramTypes, int numParams);
-extern void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams);
+extern void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams, char** paramTypeNames);
 extern void check_variable_parameters(ParseState* pstate, Query* query);
 extern bool query_contains_extern_params(Query* query);
 
diff --git a/src/include/postgres.h b/src/include/postgres.h
index f3cbff2a3c..9cf80535b7 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -241,6 +241,8 @@ typedef enum { SKEW_OPT_OFF, SKEW_OPT_NORMAL, SKEW_OPT_LAZY } SkewStrategy;
 
 typedef enum { RESOURCE_TRACK_NONE, RESOURCE_TRACK_QUERY, RESOURCE_TRACK_OPERATOR } ResourceTrackOption;
 
+typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType;
+
 typedef enum {
     CODEGEN_PARTIAL, /* allow to call c-function in codegen */
     CODEGEN_PURE     /* do not allow to call c-function in codegen */
diff --git a/src/include/storage/shm_toc.h b/src/include/storage/shm_toc.h
new file mode 100644
index 0000000000..eb7c4d7502
--- /dev/null
+++ b/src/include/storage/shm_toc.h
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * shm_toc.h
+ *      shared memory segment table of contents
+ *
+ * This is intended to provide a simple way to divide a chunk of shared
+ * memory (probably dynamic shared memory allocated via dsm_create) into
+ * a number of regions and keep track of the addresses of those regions or
+ * key data structures within those regions.  This is not intended to
+ * scale to a large number of keys and will perform poorly if used that
+ * way; if you need a large number of pointers, store them within some
+ * other data structure within the segment and only put the pointer to
+ * the data structure itself in the table of contents.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/shm_toc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SHM_TOC_H
+#define SHM_TOC_H
+
+#include "storage/shmem.h"
+
+struct shm_toc;
+typedef struct shm_toc shm_toc;
+struct shm_toc_entry;
+typedef struct shm_toc_entry shm_toc_entry;
+
+extern shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes);
+extern shm_toc *shm_toc_attach(uint64 magic, void *address);
+extern void *shm_toc_allocate(shm_toc *toc, Size nbytes);
+extern Size shm_toc_freespace(shm_toc *toc);
+extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address);
+extern void *shm_toc_lookup(shm_toc *toc, uint64 key);
+
+/*
+ * Tools for estimating how large a chunk of shared memory will be needed
+ * to store a TOC and its dependent objects.
+ */
+typedef struct
+{
+    Size        space_for_chunks;
+    Size        number_of_keys;
+} shm_toc_estimator;
+
+#define shm_toc_initialize_estimator(e) \
+    ((e)->space_for_chunks = 0, (e)->number_of_keys = 0)
+#define shm_toc_estimate_chunk(e, sz) \
+    ((e)->space_for_chunks = add_size((e)->space_for_chunks, \
+        BUFFERALIGN((sz))))
+#define shm_toc_estimate_keys(e, cnt) \
+    ((e)->number_of_keys = add_size((e)->number_of_keys, (cnt)))
+
+extern Size shm_toc_estimate(shm_toc_estimator *);
+
+#endif   /* SHM_TOC_H */
diff --git a/src/include/tcop/autonomous.h b/src/include/tcop/autonomous.h
new file mode 100644
index 0000000000..9a59cf416b
--- /dev/null
+++ b/src/include/tcop/autonomous.h
@@ -0,0 +1,43 @@
+/*--------------------------------------------------------------------------
+ *
+ * autonomous.h
+ *        Run SQL commands using a background worker.
+ *
+ * Copyright (C) 2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *        src/include/tcop/autonomous.h
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef AUTONOMOUS_H
+#define AUTONOMOUS_H
+
+#include "access/tupdesc.h"
+#include "nodes/pg_list.h"
+
+struct AutonomousSession;
+typedef struct AutonomousSession AutonomousSession;
+
+struct AutonomousPreparedStatement;
+typedef struct AutonomousPreparedStatement AutonomousPreparedStatement;
+
+struct autonomous_session_fixed_data;
+typedef struct autonomous_session_fixed_data autonomous_session_fixed_data;
+
+typedef struct AutonomousResult
+{
+	TupleDesc	tupdesc;
+	List	   *tuples;
+	char	   *command;
+} AutonomousResult;
+
+AutonomousSession *AutonomousSessionStart(void);
+void AutonomousSessionEnd(AutonomousSession *session);
+AutonomousResult *AutonomousSessionExecute(AutonomousSession *session, const char *sql);
+AutonomousPreparedStatement *AutonomousSessionPrepare(AutonomousSession *session, const char *sql, int16 nargs,
+							  Oid argtypes[], const char *argnames[]);
+AutonomousResult *AutonomousSessionExecutePrepared(AutonomousPreparedStatement *stmt, int16 nargs, Datum *values, bool *nulls);
+extern void autonomous_worker_main(Datum main_arg);
+
+#endif /* AUTONOMOUS_H */
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index e9cf57d6f0..871c6009ae 100755
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -23,6 +23,7 @@
 #include "nodes/parsenodes.h"
 #include "storage/procsignal.h"
 #include "utils/guc.h"
+#include "postgres.h"
 
 /* Required daylight between max_stack_depth and the kernel limit, in bytes */
 #define STACK_DEPTH_SLOP (640 * 1024L)
@@ -67,5 +68,10 @@ extern int check_log_duration(char* msec_str, bool was_logged);
 extern void set_debug_options(int debug_flag, GucContext context, GucSource source);
 extern bool set_plan_disabling_options(const char* arg, GucContext context, GucSource source);
 extern const char* get_stats_option_name(const char* arg);
+extern void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg = NULL);
+extern void exec_parse_message(const char* query_string, const char* stmt_name, Oid* paramTypes, char** paramTypeNames, int numParams);
+extern void exec_bind_message(StringInfo input_message);
+extern void exec_execute_message(const char *portal_name, long max_rows);
+extern void exec_describe_statement_message(const char *stmt_name);
 
 #endif /* TCOPPROT_H */
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 737c908b24..6532c407be 100755
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -495,6 +495,7 @@ extern void UpdateErrorData(ErrorData* edata, ErrorData* newData);
 extern void FreeErrorData(ErrorData* edata);
 extern void FlushErrorState(void);
 extern void FlushErrorStateWithoutDeleteChildrenContext(void);
+extern void ThrowErrorData(ErrorData *edata);
 extern void ReThrowError(ErrorData* edata) __attribute__((noreturn));
 extern void pg_re_throw(void) __attribute__((noreturn));
 
diff --git a/src/include/utils/plpgsql.h b/src/include/utils/plpgsql.h
index 898ad986d1..9a1b625394 100755
--- a/src/include/utils/plpgsql.h
+++ b/src/include/utils/plpgsql.h
@@ -21,6 +21,7 @@
 #include "catalog/namespace.h"
 #include "commands/trigger.h"
 #include "executor/spi.h"
+#include "tcop/autonomous.h"
 
 /**********************************************************************
  * Definitions
@@ -380,6 +381,7 @@ typedef struct PLpgSQL_stmt_block { /* Block of statements			*/
     int cmd_type;
     int lineno;
     char* label;
+    bool autonomous;
     List* body; /* List of statements */
     int n_initvars;
     int* initvarnos;
@@ -776,7 +778,7 @@ typedef struct PLpgSQL_execstate { /* Runtime execution data	*/
     MemoryContext tuple_store_cxt;
     ResourceOwner tuple_store_owner;
     ReturnSetInfo* rsi;
-
+    AutonomousSession *autonomous_session;
     int found_varno;
 
     /*
diff --git a/src/test/regress/expected/autonomous_transaction.out b/src/test/regress/expected/autonomous_transaction.out
new file mode 100755
index 0000000000..859fd3fab3
--- /dev/null
+++ b/src/test/regress/expected/autonomous_transaction.out
@@ -0,0 +1,351 @@
+create table at_tb2(id int, val varchar(20));
+create or replace function at_test2(i int) returns integer
+LANGUAGE plpgsql
+as $$
+declare
+pragma autonomous_transaction;
+begin
+START TRANSACTION;
+insert into at_tb2 values(1, 'before s1');
+if i > 10 then
+rollback;
+else
+commit;
+end if;
+return i;
+end;
+$$;
+select at_test2(15);
+ at_test2
+----------
+       15
+(1 row)
+
+select * from at_tb2;
+ id | val
+----+-----
+(0 rows)
+
+select at_test2(5);
+ at_test2
+----------
+        5
+(1 row)
+
+select * from at_tb2;
+ id |    val
+----+-----------
+  1 | before s1
+(1 row)
+
+truncate table at_tb2;
+create or replace procedure at_test3(i int)
+AS 
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+  START TRANSACTION;
+  insert into at_tb2 values(1, 'before s1');
+  insert into at_tb2 values(2, 'after s1');
+  if i > 10 then
+  rollback;
+  else
+  commit;
+  end if;
+end;
+/
+call at_test3(6);
+ at_test3
+----------
+
+(1 row)
+
+select * from at_tb2;
+ id |    val
+----+-----------
+  1 | before s1
+  2 | after s1
+(2 rows)
+
+truncate table at_tb2;
+create or replace procedure at_test4(i int)
+AS 
+DECLARE
+BEGIN
+  insert into at_tb2 values(3, 'klk');
+  PERFORM at_test3(6);
+  insert into at_tb2 values(4, 'klk');
+  PERFORM at_test3(15);
+end;
+/
+select at_test4(6);
+ at_test4
+----------
+
+(1 row)
+
+select * from at_tb2;
+ id |    val
+----+-----------
+  3 | klk
+  1 | before s1
+  2 | after s1
+  4 | klk
+(4 rows)
+
+truncate table at_tb2;
+DECLARE
+begin
+insert into at_tb2 values(1, 'begin');
+PERFORM at_test3(6);
+end;
+/
+select * from at_tb2;
+ id |    val
+----+-----------
+  1 | begin
+  1 | before s1
+  2 | after s1
+(3 rows)
+
+truncate table at_tb2;
+begin;
+insert into at_tb2 values(1, 'begin');
+select * from at_tb2;
+ id |  val
+----+-------
+  1 | begin
+(1 row)
+
+call at_test3(6);
+ at_test3
+----------
+
+(1 row)
+
+select * from at_tb2;
+ id |    val
+----+-----------
+  1 | begin
+  1 | before s1
+  2 | after s1
+(3 rows)
+
+rollback;
+select * from at_tb2;
+ id |    val
+----+-----------
+  1 | before s1
+  2 | after s1
+(2 rows)
+
+create table at_test1 (a int);
+create or replace procedure autonomous_test()
+AS 
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+  START TRANSACTION;
+  for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+end;
+/
+truncate table at_test1;
+begin;
+insert into at_test1 values(1);
+select * from at_test1;
+ a
+---
+ 1
+(1 row)
+
+call autonomous_test();
+ autonomous_test
+-----------------
+
+(1 row)
+
+select * from at_test1;
+ a
+---
+ 1
+ 0
+ 2
+ 4
+ 6
+ 8
+(6 rows)
+
+rollback;
+select * from at_test1;
+ a
+---
+ 0
+ 2
+ 4
+ 6
+ 8
+(5 rows)
+
+create or replace function autonomous_test2() returns integer
+LANGUAGE plpgsql
+as $$
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+begin
+START TRANSACTION;
+for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+  return 42;
+  end;
+$$;
+truncate table at_test1;
+begin;
+insert into at_test1 values(20);
+select * from at_test1;
+ a
+----
+ 20
+(1 row)
+
+select autonomous_test2();
+ autonomous_test2
+------------------
+               42
+(1 row)
+
+select * from at_test1;
+ a
+----
+ 20
+  0
+  2
+  4
+  6
+  8
+(6 rows)
+
+rollback;
+select * from at_test1;
+ a
+---
+ 0
+ 2
+ 4
+ 6
+ 8
+(5 rows)
+
+create or replace function autonomous_test3() returns text
+LANGUAGE plpgsql
+as $$
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+begin
+START TRANSACTION;
+for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+  return 'autonomous_test3 end';
+  end;
+$$;
+truncate table at_test1;
+begin;
+insert into at_test1 values(30);
+select * from at_test1;
+ a
+----
+ 30
+(1 row)
+
+select autonomous_test3();
+   autonomous_test3
+----------------------
+ autonomous_test3 end
+(1 row)
+
+select * from at_test1;
+ a
+----
+ 30
+  0
+  2
+  4
+  6
+  8
+(6 rows)
+
+rollback;
+select * from at_test1;
+ a
+---
+ 0
+ 2
+ 4
+ 6
+ 8
+(5 rows)
+
+CREATE TABLE cp_test1 (a int, b text);
+CREATE TABLE cp_test2 (a int, b text);
+CREATE TABLE cp_test3 (a int, b text);
+CREATE OR REPLACE FUNCTION autonomous_cp() RETURNS integer
+LANGUAGE plpgsql
+AS $$
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+    START TRANSACTION;
+    insert into cp_test1 values(1,'a'),(2,'b');
+	insert into cp_test2 values(1,'c'),(2,'d');
+	with s1 as (select cp_test1.a, cp_test1.b from cp_test1 left join cp_test2 on cp_test1.a = cp_test2.a) insert into cp_test3 select * from s1;
+    COMMIT;
+  RETURN 42;
+END;
+$$;
+select autonomous_cp();
+ autonomous_cp
+---------------
+            42
+(1 row)
+
+select * from cp_test3;
+ a | b
+---+---
+ 1 | a
+ 2 | b
+(2 rows)
+
+CREATE TABLE tg_test1 (a int, b varchar(25), c timestamp, d int);
+CREATE TABLE tg_test2 (a int, b varchar(25), c timestamp, d int);
+CREATE OR REPLACE FUNCTION tri_insert_test2_func() RETURNS TRIGGER AS
+$$
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+insert into tg_test2 values(new.a,new.b,new.c,new.d);
+RETURN NEW;
+commit;
+END
+$$ LANGUAGE PLPGSQL;
+CREATE TRIGGER TG_TEST2_TEMP
+before insert
+ON  tg_test1
+FOR EACH ROW
+EXECUTE PROCEDURE tri_insert_test2_func();
+insert into tg_test1 values(1,'a','2020-08-13 09:00:00', 1);  
+ERROR:  Un-support feature
+DETAIL:  Trigger doesnot support autonomous transaction
+CONTEXT:  PL/pgSQL function tri_insert_test2_func() line 4 at statement block  
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 0b39cb6019..346a7a6d14 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -582,3 +582,6 @@ test: gtt_clean
 
 # procedure, Function Test
 test: create_procedure create_function pg_compatibility postgres_fdw
+
+# autonomous transaction Test
+test: autonomous_transaction
diff --git a/src/test/regress/sql/autonomous_transaction.sql b/src/test/regress/sql/autonomous_transaction.sql
new file mode 100755
index 0000000000..e6593513a8
--- /dev/null
+++ b/src/test/regress/sql/autonomous_transaction.sql
@@ -0,0 +1,189 @@
+create table at_tb2(id int, val varchar(20));
+create or replace function at_test2(i int) returns integer
+LANGUAGE plpgsql
+as $$
+declare
+pragma autonomous_transaction;
+begin
+START TRANSACTION;
+insert into at_tb2 values(1, 'before s1');
+if i > 10 then
+rollback;
+else
+commit;
+end if;
+return i;
+end;
+$$;
+select at_test2(15);
+select * from at_tb2;
+select at_test2(5);
+select * from at_tb2;
+
+truncate table at_tb2;
+create or replace procedure at_test3(i int)
+AS 
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+  START TRANSACTION;
+  insert into at_tb2 values(1, 'before s1');
+  insert into at_tb2 values(2, 'after s1');
+  if i > 10 then
+  rollback;
+  else
+  commit;
+  end if;
+end;
+/
+call at_test3(6);
+select * from at_tb2;
+
+truncate table at_tb2;
+create or replace procedure at_test4(i int)
+AS 
+DECLARE
+BEGIN
+  insert into at_tb2 values(3, 'klk');
+  PERFORM at_test3(6);
+  insert into at_tb2 values(4, 'klk');
+  PERFORM at_test3(15);
+end;
+/
+select at_test4(6);
+select * from at_tb2;
+
+truncate table at_tb2;
+DECLARE
+begin
+insert into at_tb2 values(1, 'begin');
+PERFORM at_test3(6);
+end;
+/
+select * from at_tb2;
+
+truncate table at_tb2;
+begin;
+insert into at_tb2 values(1, 'begin');
+select * from at_tb2;
+call at_test3(6);
+select * from at_tb2;
+rollback;
+select * from at_tb2;
+
+create table at_test1 (a int);
+create or replace procedure autonomous_test()
+AS 
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+  START TRANSACTION;
+  for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+end;
+/
+
+truncate table at_test1;
+begin;
+insert into at_test1 values(1);
+select * from at_test1;
+call autonomous_test();
+select * from at_test1;
+rollback;
+select * from at_test1;
+
+
+create or replace function autonomous_test2() returns integer
+LANGUAGE plpgsql
+as $$
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+begin
+START TRANSACTION;
+for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+  return 42;
+  end;
+$$;
+truncate table at_test1;
+begin;
+insert into at_test1 values(20);
+select * from at_test1;
+select autonomous_test2();
+select * from at_test1;
+rollback;
+select * from at_test1;
+
+create or replace function autonomous_test3() returns text
+LANGUAGE plpgsql
+as $$
+declare
+PRAGMA AUTONOMOUS_TRANSACTION;
+begin
+START TRANSACTION;
+for i in 0..9 loop
+  if i % 2 = 0 then
+  execute 'insert into at_test1 values ('||i::integer||')';
+  end if;
+  end loop;
+  commit;
+  return 'autonomous_test3 end';
+  end;
+$$;
+truncate table at_test1;
+begin;
+insert into at_test1 values(30);
+select * from at_test1;
+select autonomous_test3();
+select * from at_test1;
+rollback;
+select * from at_test1;
+
+CREATE TABLE cp_test1 (a int, b text);
+CREATE TABLE cp_test2 (a int, b text);
+CREATE TABLE cp_test3 (a int, b text);
+CREATE OR REPLACE FUNCTION autonomous_cp() RETURNS integer
+LANGUAGE plpgsql
+AS $$
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+    START TRANSACTION;
+    insert into cp_test1 values(1,'a'),(2,'b');
+	insert into cp_test2 values(1,'c'),(2,'d');
+	with s1 as (select cp_test1.a, cp_test1.b from cp_test1 left join cp_test2 on cp_test1.a = cp_test2.a) insert into cp_test3 select * from s1;
+    COMMIT;
+  RETURN 42;
+END;
+$$;
+select autonomous_cp();
+select * from cp_test3;
+
+CREATE TABLE tg_test1 (a int, b varchar(25), c timestamp, d int);
+CREATE TABLE tg_test2 (a int, b varchar(25), c timestamp, d int);
+CREATE OR REPLACE FUNCTION tri_insert_test2_func() RETURNS TRIGGER AS
+$$
+DECLARE
+  PRAGMA AUTONOMOUS_TRANSACTION;
+BEGIN
+insert into tg_test2 values(new.a,new.b,new.c,new.d);
+RETURN NEW;
+commit;
+END
+$$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER TG_TEST2_TEMP
+before insert
+ON  tg_test1
+FOR EACH ROW
+EXECUTE PROCEDURE tri_insert_test2_func();
+insert into tg_test1 values(1,'a','2020-08-13 09:00:00', 1);  
+  
-- 
Gitee


From c235237a4c78b967d76869054236655722b58ed8 Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Tue, 25 Aug 2020 22:50:30 +0800
Subject: [PATCH 5/6] remove redundant log

---
 src/common/backend/utils/init/miscinit.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp
index f19f3bd2ef..8eb2842083 100755
--- a/src/common/backend/utils/init/miscinit.cpp
+++ b/src/common/backend/utils/init/miscinit.cpp
@@ -728,7 +728,6 @@ void InitializeSessionUserId(const char* role_name, Oid role_id)
 {
     HeapTuple role_tup;
     Form_pg_authid rform;
-    //Oid role_id;
     char* rname = NULL;
     /* Audit user login */
     char details[PGAUDIT_MAXLENGTH];
@@ -764,16 +763,14 @@ void InitializeSessionUserId(const char* role_name, Oid role_id)
         role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id));
         if (!HeapTupleIsValid(role_tup)) {
             ereport(FATAL,
-                    (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
-                     errmsg("role with OID %u does not exist", role_id)));
+                (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
+                    errmsg("role with OID %u does not exist", role_id)));
         }
     }
 
     rform = (Form_pg_authid)GETSTRUCT(role_tup);
     role_id = HeapTupleGetOid(role_tup);
     rname = NameStr(rform->rolname);
-    ereport(LOG,
-            (errmsg("InitializeSessionUserId role name: %s with OID %u", rname, role_id)));
 
     u_sess->misc_cxt.AuthenticatedUserId = role_id;
     u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper;
-- 
Gitee


From 9717117500b1e74ff2177003d8fdd9afada3a801 Mon Sep 17 00:00:00 2001
From: jiang_jianyu <jiangjy80@sohu.com>
Date: Sat, 29 Aug 2020 10:41:31 +0800
Subject: [PATCH 6/6] forbidden nested autonomous transaction

---
 src/common/backend/libpq/pqmq.cpp             |  7 +++++
 src/common/pl/plpgsql/src/pl_exec.cpp         | 26 +++++++++----------
 src/gausskernel/process/tcop/autonomous.cpp   | 20 +++++++++++---
 .../process/threadpool/knl_thread.cpp         |  7 +++++
 src/include/knl/knl_thread.h                  |  8 ++++++
 5 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/src/common/backend/libpq/pqmq.cpp b/src/common/backend/libpq/pqmq.cpp
index 274c285a0c..fd3197de2a 100644
--- a/src/common/backend/libpq/pqmq.cpp
+++ b/src/common/backend/libpq/pqmq.cpp
@@ -224,6 +224,13 @@ void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
             case PG_DIAG_SEVERITY:
                 /* ignore, trusting we'll get a nonlocalized version */
                 break;
+            case PG_DIAG_INTERNEL_ERRCODE:
+                /* ignore */
+                break;
+            case PG_DIAG_MODULE_ID:
+                /* It is always MOD_MAX */
+                edata->mod_id = MOD_MAX;
+                break;
             case PG_DIAG_SQLSTATE:
                 if (strlen(value) != 5) {
                     elog(ERROR, "invalid SQLSTATE: \"%s\"", value);
diff --git a/src/common/pl/plpgsql/src/pl_exec.cpp b/src/common/pl/plpgsql/src/pl_exec.cpp
index 15eca2aa7f..ac073f06d0 100755
--- a/src/common/pl/plpgsql/src/pl_exec.cpp
+++ b/src/common/pl/plpgsql/src/pl_exec.cpp
@@ -198,7 +198,6 @@ static int check_line_validity_in_for_query(PLpgSQL_stmt_forq* stmt, int, int);
 static void bind_cursor_with_portal(Portal portal, PLpgSQL_execstate *estate, int varno);
 static char* transform_anonymous_block(char* query);
 static bool need_recompile_plan(SPIPlanPtr plan);
-static THR_LOCAL PLpgSQL_expr* sqlstmt = NULL;
 
 /* ----------
  * plpgsql_check_line_validity	Called by the debugger plugin for
@@ -1420,6 +1419,9 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block)
                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                     errmsg("Un-support feature"),
                     errdetail("Trigger doesnot support autonomous transaction")));
+        } else if (t_thrd.autonomous_cxt.isnested) {
+            ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("Un-support feature : Autonomous transaction doesnot support nesting")));
         } else {
             estate->autonomous_session = AutonomousSessionStart();
         }
@@ -3905,7 +3907,7 @@ static int exec_stmt_execsql(PLpgSQL_execstate* estate, PLpgSQL_stmt_execsql* st
         Datum *values = NULL;
         bool *nulls = NULL;
         AutonomousResult *aresult = NULL;
-        sqlstmt = stmt->sqlstmt;
+        t_thrd.autonomous_cxt.sqlstmt = stmt->sqlstmt;
         build_symbol_table(estate, stmt->sqlstmt->ns, &nparams, &param_names, &param_types);
         astmt = AutonomousSessionPrepare(estate->autonomous_session, stmt->sqlstmt->query, (int16)nparams, param_types, param_names);
 
@@ -5083,7 +5085,7 @@ static int exec_stmt_null(PLpgSQL_execstate* estate, PLpgSQL_stmt* stmt)
 static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt)
 {
     if (estate->autonomous_session) {
-        if (sqlstmt) {
+        if (t_thrd.autonomous_cxt.sqlstmt) {
             int nparams = 0;
             int i;
             const char **param_names = NULL;
@@ -5093,7 +5095,7 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt
             bool *nulls = NULL;
             AutonomousResult *aresult = NULL;
             ereport(LOG, (errmsg("query COMMIT")));
-            build_symbol_table(estate, sqlstmt->ns, &nparams, &param_names, &param_types);
+            build_symbol_table(estate, t_thrd.autonomous_cxt.sqlstmt->ns, &nparams, &param_names, &param_types);
             astmt = AutonomousSessionPrepare(estate->autonomous_session, "COMMIT", (int16)nparams, param_types, param_names);
 
             values = (Datum *)palloc(nparams * sizeof(*values));
@@ -5104,12 +5106,11 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt
             }
             aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls);
             exec_set_found(estate, (list_length(aresult->tuples) != 0));
-            sqlstmt = NULL;
+            t_thrd.autonomous_cxt.sqlstmt = NULL;
             return PLPGSQL_RC_OK;
         } else {
             ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("syntax error"),
-                errdetail("In antonomous transaction, commit/rollback must match start transaction")));
+                errmsg("Syntax error: In antonomous transaction, commit/rollback must match start transaction")));
         }
     }
 
@@ -5176,7 +5177,7 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt
 static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* stmt)
 {
     if (estate->autonomous_session) {
-        if (sqlstmt) {
+        if (t_thrd.autonomous_cxt.sqlstmt) {
             int nparams = 0;
             int i;
             const char **param_names = NULL;
@@ -5186,7 +5187,7 @@ static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback*
             bool *nulls = NULL;
             AutonomousResult *aresult = NULL;
             ereport(LOG, (errmsg("query ROLLBACK")));
-            build_symbol_table(estate, sqlstmt->ns, &nparams, &param_names, &param_types);
+            build_symbol_table(estate, t_thrd.autonomous_cxt.sqlstmt->ns, &nparams, &param_names, &param_types);
             astmt = AutonomousSessionPrepare(estate->autonomous_session, "ROLLBACK", (int16)nparams, param_types, param_names);
 
             values = (Datum *)palloc(nparams * sizeof(*values));
@@ -5197,13 +5198,12 @@ static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback*
             }
             aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls);
             exec_set_found(estate, (list_length(aresult->tuples) != 0));
-            sqlstmt = NULL;
+            t_thrd.autonomous_cxt.sqlstmt = NULL;
             return PLPGSQL_RC_OK;
         } else {
             ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("syntax error"),
-                errdetail("In antonomous transaction, commit/rollback must match start transaction")));
-        }
+                errmsg("Syntax error: In antonomous transaction, commit/rollback must match start transaction")));
+           }
     }
 
     const char* PORTAL = "Portal";
diff --git a/src/gausskernel/process/tcop/autonomous.cpp b/src/gausskernel/process/tcop/autonomous.cpp
index 44b25d1d5d..e3f02bf6f1 100644
--- a/src/gausskernel/process/tcop/autonomous.cpp
+++ b/src/gausskernel/process/tcop/autonomous.cpp
@@ -65,6 +65,7 @@
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
+#include "utils/ps_status.h"
 
 /* Table-of-contents constants for our dynamic shared memory segment. */
 #define AUTONOMOUS_MAGIC                0x50674267
@@ -524,6 +525,8 @@ void autonomous_worker_main(Datum main_arg)
     (void)gspqsignal(SIGTERM, die);
     BackgroundWorkerUnblockSignals();
 
+    t_thrd.autonomous_cxt.isnested = true;
+
     /* Set up a memory context and resource owner. */
     Assert(t_thrd.utils_cxt.CurrentResourceOwner == NULL);
     t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "autonomous");
@@ -533,7 +536,6 @@ void autonomous_worker_main(Datum main_arg)
                                                  ALLOCSET_DEFAULT_INITSIZE,
                                                  ALLOCSET_DEFAULT_MAXSIZE);
 
-    initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf));
     seg = (char *)DatumGetPointer(main_arg);
     if (seg == NULL)
         ereport(ERROR,
@@ -586,9 +588,19 @@ void autonomous_worker_main(Datum main_arg)
         (void)MemoryContextSwitchTo(t_thrd.mem_cxt.msg_mem_cxt);
         MemoryContextResetAndDeleteChildren(t_thrd.mem_cxt.msg_mem_cxt);
 
-        ProcessCompletedNotifies();
-        pgstat_report_stat(false);
-        pgstat_report_activity(STATE_IDLE, NULL);
+        if (IsAbortedTransactionBlockState()) {
+            set_ps_display("idle in transaction (aborted)", false);
+            pgstat_report_activity(STATE_IDLEINTRANSACTION_ABORTED, NULL);
+        } else if (IsTransactionOrTransactionBlock()) {
+            set_ps_display("idle in transaction", false);
+            pgstat_report_activity(STATE_IDLEINTRANSACTION, NULL);
+        } else {
+            ProcessCompletedNotifies();
+            pgstat_report_stat(false);
+
+            set_ps_display("idle", false);
+            pgstat_report_activity(STATE_IDLE, NULL);
+        }
 
         shm_mq_receive_stringinfo(command_qh, &msg);
         ereport(LOG, (errmsg("bgworker receive msg %s", msg.data)));
diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp
index 1d0ca27a90..f5a026dd71 100755
--- a/src/gausskernel/process/threadpool/knl_thread.cpp
+++ b/src/gausskernel/process/threadpool/knl_thread.cpp
@@ -1381,6 +1381,12 @@ static void knl_t_heartbeat_init(knl_t_heartbeat_context* heartbeat_cxt)
     heartbeat_cxt->state = NULL;
 }
 
+static void knl_t_autonomous_init(knl_t_autonomous_context* autonomous_cxt)
+{
+    autonomous_cxt->isnested = false;
+    autonomous_cxt->sqlstmt = NULL;
+}
+
 static void knl_t_mot_init(knl_t_mot_context* mot_cxt)
 {
     mot_cxt->last_error_code = 0;
@@ -1498,6 +1504,7 @@ void knl_thread_init(knl_thread_role role)
     knl_t_heartbeat_init(&t_thrd.heartbeat_cxt);
     knl_t_poolcleaner_init(&t_thrd.poolcleaner_cxt);
     knl_t_mot_init(&t_thrd.mot_cxt);
+    knl_t_autonomous_init(&t_thrd.autonomous_cxt);
 }
 
 void knl_thread_set_name(const char* name)
diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h
index 21a08dde19..a809352f29 100644
--- a/src/include/knl/knl_thread.h
+++ b/src/include/knl/knl_thread.h
@@ -2657,6 +2657,13 @@ typedef struct knl_t_heartbeat_context {
     struct heartbeat_state* state;
 } knl_t_heartbeat_context;
 
+/* autonomous_transaction */
+struct PLpgSQL_expr;
+typedef struct knl_t_autonomous_context {
+    PLpgSQL_expr* sqlstmt;
+    bool isnested;
+} knl_t_autonomous_context;
+
 /* MOT thread attributes */
 #define MOT_MAX_ERROR_MESSAGE 256
 #define MOT_MAX_ERROR_FRAMES  32
@@ -2731,6 +2738,7 @@ typedef struct knl_thrd_context {
     knl_t_arch_context arch;
     knl_t_async_context asy_cxt;
     knl_t_audit_context audit;
+    knl_t_autonomous_context autonomous_cxt;
     knl_t_autovacuum_context autovacuum_cxt;
     knl_t_basebackup_context basebackup_cxt;
     knl_t_bgwriter_context bgwriter_cxt;
-- 
Gitee