From b3c0ecfb4c80342218ed0fc74ae2a374024da7a5 Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Tue, 25 Aug 2020 21:17:24 +0800 Subject: [PATCH 1/6] support background workers from PG --- src/common/backend/port/unix_latch.cpp | 3 +- src/common/backend/utils/init/globals.cpp | 1 + src/common/backend/utils/init/miscinit.cpp | 45 +- src/common/backend/utils/init/postinit.cpp | 24 +- src/common/backend/utils/misc/guc.cpp | 34 + src/gausskernel/optimizer/commands/async.cpp | 2 - src/gausskernel/process/postmaster/Makefile | 2 +- .../process/postmaster/bgworker.cpp | 1309 +++++++++++++++++ .../process/postmaster/postmaster.cpp | 442 +++++- .../process/threadpool/knl_instance.cpp | 8 + .../process/threadpool/knl_thread.cpp | 6 + src/gausskernel/storage/ipc/ipci.cpp | 3 + src/gausskernel/storage/ipc/procsignal.cpp | 1 + src/gausskernel/storage/lmgr/lwlocknames.txt | 1 + src/gausskernel/storage/lmgr/proc.cpp | 38 +- src/include/gs_thread.h | 1 + .../knl/knl_guc/knl_instance_attr_storage.h | 1 + src/include/knl/knl_instance.h | 8 + src/include/knl/knl_thread.h | 7 + src/include/miscadmin.h | 5 +- src/include/postmaster/bgworker.h | 157 ++ src/include/postmaster/bgworker_internals.h | 64 + src/include/postmaster/postmaster.h | 1 + src/include/storage/pmsignal.h | 1 + src/include/storage/proc.h | 2 + src/include/threadpool/threadpool_worker.h | 1 + src/include/utils/postinit.h | 4 +- 27 files changed, 2130 insertions(+), 41 deletions(-) create mode 100644 src/gausskernel/process/postmaster/bgworker.cpp create mode 100644 src/include/postmaster/bgworker.h create mode 100644 src/include/postmaster/bgworker_internals.h diff --git a/src/common/backend/port/unix_latch.cpp b/src/common/backend/port/unix_latch.cpp index 2a02a0daff..44fd96d5c7 100644 --- a/src/common/backend/port/unix_latch.cpp +++ b/src/common/backend/port/unix_latch.cpp @@ -526,8 +526,9 @@ void ResetLatch(volatile Latch* latch) */ void latch_sigusr1_handler(void) { - if (waiting) + if (waiting) { sendSelfPipeByte(); + } } /* Send one byte to the self-pipe, to wake up WaitLatch */ diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp index e0818b9a76..e0601f5665 100755 --- a/src/common/backend/utils/init/globals.cpp +++ b/src/common/backend/utils/init/globals.cpp @@ -51,6 +51,7 @@ THR_LOCAL object_access_hook_type object_access_hook = NULL; * These are initialized for the bootstrap/standalone case. */ THR_LOCAL bool IsUnderPostmaster = false; +THR_LOCAL bool IsBackgroundWorker = false; volatile ThreadId PostmasterPid = 0; bool IsPostmasterEnvironment = false; diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp index 1aab5afed5..8eb2842083 100755 --- a/src/common/backend/utils/init/miscinit.cpp +++ b/src/common/backend/utils/init/miscinit.cpp @@ -724,11 +724,11 @@ bool has_rolvcadmin(Oid role_id) /* * Initialize user identity during normal backend startup */ -void InitializeSessionUserId(const char* role_name) +void InitializeSessionUserId(const char* role_name, Oid role_id) { HeapTuple role_tup; Form_pg_authid rform; - Oid role_id; + char* rname = NULL; /* Audit user login */ char details[PGAUDIT_MAXLENGTH]; @@ -744,23 +744,33 @@ void InitializeSessionUserId(const char* role_name) AssertState(!OidIsValid(u_sess->misc_cxt.AuthenticatedUserId)); } - role_tup = SearchSysCache1(AUTHNAME, PointerGetDatum(role_name)); - if (!HeapTupleIsValid(role_tup)) { - /* Audit user login */ - int rcs = snprintf_truncated_s(details, - sizeof(details), - "login db(%s) failed-the role(%s)does not exist", - u_sess->proc_cxt.MyProcPort->database_name, - role_name); - securec_check_ss(rcs, "", ""); - pgaudit_user_login(FALSE, u_sess->proc_cxt.MyProcPort->database_name, details); - - ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), - errmsg("Invalid username/password,login denied."))); + if (role_name != NULL) { + role_tup = SearchSysCache1(AUTHNAME, PointerGetDatum(role_name)); + if (!HeapTupleIsValid(role_tup)) { + /* Audit user login */ + int rcs = snprintf_truncated_s(details, + sizeof(details), + "login db(%s) failed-the role(%s)does not exist", + u_sess->proc_cxt.MyProcPort->database_name, + role_name); + securec_check_ss(rcs, "", ""); + pgaudit_user_login(FALSE, u_sess->proc_cxt.MyProcPort->database_name, details); + + ereport(FATAL, (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), + errmsg("Invalid username/password,login denied."))); + } + } else { + role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id)); + if (!HeapTupleIsValid(role_tup)) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), + errmsg("role with OID %u does not exist", role_id))); + } } rform = (Form_pg_authid)GETSTRUCT(role_tup); role_id = HeapTupleGetOid(role_tup); + rname = NameStr(rform->rolname); u_sess->misc_cxt.AuthenticatedUserId = role_id; u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper; @@ -832,10 +842,11 @@ void InitializeSessionUserIdStandalone(void) { /* * This function should only be called in single-user mode and in - * autovacuum workers. + * autovacuum workers, and in background workers. */ AssertState(!IsUnderPostmaster || IsAutoVacuumWorkerProcess() || - IsJobSchedulerProcess() || IsJobWorkerProcess() || AM_WAL_SENDER); + IsJobSchedulerProcess() || IsJobWorkerProcess() || AM_WAL_SENDER || + IsBackgroundWorker); /* In pooler stateless reuse mode, to reset session userid */ if (!g_instance.attr.attr_network.PoolerStatelessReuse) { diff --git a/src/common/backend/utils/init/postinit.cpp b/src/common/backend/utils/init/postinit.cpp index b8b4a5fd59..2775810b90 100644 --- a/src/common/backend/utils/init/postinit.cpp +++ b/src/common/backend/utils/init/postinit.cpp @@ -683,7 +683,7 @@ void PostgresResetUsernamePgoption(const char* username) u_sess->proc_cxt.MyProcPort->user_name = (char*)GetSuperUserName((char*)username); } - InitializeSessionUserId(username); + InitializeSessionUserId(username, InvalidOid); am_superuser = superuser(); u_sess->misc_cxt.CurrentUserName = u_sess->proc_cxt.MyProcPort->user_name; } @@ -1059,6 +1059,7 @@ PostgresInitializer::PostgresInitializer() m_indbname = NULL; m_dboid = InvalidOid; m_username = NULL; + m_useroid = InvalidOid; m_isSuperUser = false; m_fullpath = NULL; memset_s(m_dbname, NAMEDATALEN, 0, NAMEDATALEN); @@ -1074,11 +1075,13 @@ PostgresInitializer::~PostgresInitializer() m_username = NULL; } -void PostgresInitializer::SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username) +void PostgresInitializer::SetDatabaseAndUser( + const char* in_dbname, Oid dboid, const char* username, Oid useroid) { m_indbname = in_dbname; m_dboid = dboid; m_username = username; + m_useroid = useroid; } void PostgresInitializer::InitBootstrap() @@ -1489,12 +1492,19 @@ void PostgresInitializer::InitSession() StartXact(); - if (IsUnderPostmaster) { - CheckAuthentication(); - InitUser(); - } else { + if (!IsUnderPostmaster) { CheckAtLeastOneRoles(); SetSuperUserStandalone(); + } else if (IsBackgroundWorker) { + if (m_username == NULL && !OidIsValid(m_useroid)) { + InitializeSessionUserIdStandalone(); + m_isSuperUser = true; + } else { + InitUser(); + } + } else { + CheckAuthentication(); + InitUser(); } CheckConnPermission(); @@ -1626,7 +1636,7 @@ void PostgresInitializer::SetSuperUserAndDatabase() void PostgresInitializer::InitUser() { - InitializeSessionUserId(m_username); + InitializeSessionUserId(m_username, m_useroid); m_isSuperUser = superuser(); u_sess->misc_cxt.CurrentUserName = u_sess->proc_cxt.MyProcPort->user_name; } diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index e0d216deae..a7977f8784 100644 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -459,6 +459,7 @@ static void assign_statistics_memory(int newval, void* extra); static void assign_history_memory(int newval, void* extra); static bool check_history_memory_limit(int* newval, void** extra, GucSource source); static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource source); +static bool check_max_worker_processes(int* newval, void** extra, GucSource source); static bool check_job_max_workers(int* newval, void** extra, GucSource source); static bool check_effective_io_concurrency(int* newval, void** extra, GucSource source); static void assign_effective_io_concurrency(int newval, void* extra); @@ -7070,6 +7071,23 @@ static void init_configure_names_int() NULL, NULL }, + { + /* see max_connections */ + { + "max_background_workers", + PGC_POSTMASTER, + RESOURCES_ASYNCHRONOUS, + gettext_noop("Maximum number of concurrent background worker processes."), + NULL + }, + &g_instance.attr.attr_storage.max_background_workers, + 8, + 0, + MAX_BACKENDS, + check_max_worker_processes, + NULL, + NULL + }, { { "job_queue_processes", @@ -18748,6 +18766,7 @@ static bool check_maxconnections(int* newval, void** extra, GucSource source) } #endif if (*newval + g_instance.attr.attr_storage.autovacuum_max_workers + g_instance.attr.attr_sql.job_queue_processes + + g_instance.attr.attr_storage.max_background_workers + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections > MAX_BACKENDS) { return false; @@ -18758,6 +18777,7 @@ static bool check_maxconnections(int* newval, void** extra, GucSource source) static bool CheckMaxInnerToolConnections(int* newval, void** extra, GucSource source) { if (*newval + g_instance.attr.attr_storage.autovacuum_max_workers + g_instance.attr.attr_sql.job_queue_processes + + g_instance.attr.attr_storage.max_background_workers + g_instance.attr.attr_network.MaxConnections + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS > MAX_BACKENDS) { return false; } @@ -18767,6 +18787,7 @@ static bool CheckMaxInnerToolConnections(int* newval, void** extra, GucSource so static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource source) { if (g_instance.attr.attr_network.MaxConnections + *newval + g_instance.attr.attr_sql.job_queue_processes + + g_instance.attr.attr_storage.max_background_workers + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections > MAX_BACKENDS) { return false; @@ -18774,6 +18795,18 @@ static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource so return true; } +static bool check_max_worker_processes(int* newval, void** extra, GucSource source) +{ + if (g_instance.attr.attr_network.MaxConnections + g_instance.attr.attr_storage.autovacuum_max_workers + + g_instance.attr.attr_sql.job_queue_processes + *newval + + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections > + MAX_BACKENDS) { + return false; + } + return true; +} + + /* * Description: Check wheth out of max backends after max job worker threads. * @@ -18784,6 +18817,7 @@ static bool check_autovacuum_max_workers(int* newval, void** extra, GucSource so static bool check_job_max_workers(int* newval, void** extra, GucSource source) { if (g_instance.attr.attr_network.MaxConnections + g_instance.attr.attr_storage.autovacuum_max_workers + *newval + + g_instance.attr.attr_storage.max_background_workers + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS + g_instance.attr.attr_network.maxInnerToolConnections > MAX_BACKENDS) { return false; diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp index 4668be049c..e04af32a11 100755 --- a/src/gausskernel/optimizer/commands/async.cpp +++ b/src/gausskernel/optimizer/commands/async.cpp @@ -200,8 +200,6 @@ typedef struct QueueBackendStatus { QueuePosition pos; /* backend has read queue up to here */ } QueueBackendStatus; -#define InvalidPid ((ThreadId)(-1)) - /* * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff) * diff --git a/src/gausskernel/process/postmaster/Makefile b/src/gausskernel/process/postmaster/Makefile index b2eb420f7f..a491ec9048 100755 --- a/src/gausskernel/process/postmaster/Makefile +++ b/src/gausskernel/process/postmaster/Makefile @@ -32,7 +32,7 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o gaussdb_version.o\ - startup.o syslogger.o walwriter.o checkpointer.o pgaudit.o alarmchecker.o \ + startup.o syslogger.o walwriter.o checkpointer.o pgaudit.o alarmchecker.o bgworker.o\ twophasecleaner.o aiocompleter.o fencedudf.o lwlockmonitor.o cbmwriter.o remoteservice.o pagewriter.o\ $(top_builddir)/src/lib/config/libconfig.a diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp new file mode 100644 index 0000000000..580bf35a5c --- /dev/null +++ b/src/gausskernel/process/postmaster/bgworker.cpp @@ -0,0 +1,1309 @@ +/* -------------------------------------------------------------------- + * bgworker.cpp + * POSTGRES pluggable background workers implementation + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/gausskernel/process/postmaster/bgworker.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "libpq/pqsignal.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgworker_internals.h" +#include "postmaster/postmaster.h" +#include "storage/ipc.h" +#include "storage/latch.h" +#include "storage/lwlock.h" +#include "storage/pg_shmem.h" +#include "storage/pmsignal.h" +#include "storage/proc.h" +#include "storage/procsignal.h" +#include "storage/shmem.h" +#include "tcop/tcopprot.h" +#include "utils/ascii.h" +#include "utils/ps_status.h" +#include "utils/postinit.h" + +/* + * The postmaster's list of registered background workers, in private memory. + */ +THR_LOCAL slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList); + +/* + * BackgroundWorkerSlots exist in shared memory and can be accessed (via + * the BackgroundWorkerArray) by both the postmaster and by regular backends. + * However, the postmaster cannot take locks, even spinlocks, because this + * might allow it to crash or become wedged if shared memory gets corrupted. + * Such an outcome is intolerable. Therefore, we need a lockless protocol + * for coordinating access to this data. + * + * The 'in_use' flag is used to hand off responsibility for the slot between + * the postmaster and the rest of the system. When 'in_use' is false, + * the postmaster will ignore the slot entirely, except for the 'in_use' flag + * itself, which it may read. In this state, regular backends may modify the + * slot. Once a backend sets 'in_use' to true, the slot becomes the + * responsibility of the postmaster. Regular backends may no longer modify it, + * but the postmaster may examine it. Thus, a backend initializing a slot + * must fully initialize the slot - and insert a write memory barrier - before + * marking it as in use. + * + * As an exception, however, even when the slot is in use, regular backends + * may set the 'terminate' flag for a slot, telling the postmaster not + * to restart it. Once the background worker is no longer running, the slot + * will be released for reuse. + * + * In addition to coordinating with the postmaster, backends modifying this + * data structure must coordinate with each other. Since they can take locks, + * this is straightforward: any backend wishing to manipulate a slot must + * take BackgroundWorkerLock in exclusive mode. Backends wishing to read + * data that might get concurrently modified by other backends should take + * this lock in shared mode. No matter what, backends reading this data + * structure must be able to tolerate concurrent modifications by the + * postmaster. + */ +typedef struct BackgroundWorkerSlot { + bool in_use; + bool terminate; + ThreadId pid; /* InvalidPid = not started yet; 0 = dead */ + uint64 generation; /* incremented when slot is recycled */ + BackgroundWorker worker; +} BackgroundWorkerSlot; + +/* + * In order to limit the total number of parallel workers (according to + * max_parallel_workers GUC), we maintain the number of active parallel + * workers. Since the postmaster cannot take locks, two variables are used for + * this purpose: the number of registered parallel workers (modified by the + * backends, protected by BackgroundWorkerLock) and the number of terminated + * parallel workers (modified only by the postmaster, lockless). The active + * number of parallel workers is the number of registered workers minus the + * terminated ones. These counters can of course overflow, but it's not + * important here since the subtraction will still give the right number. + */ +typedef struct BackgroundWorkerArray { + int total_slots; + uint32 parallel_register_count; // For extension only + uint32 parallel_terminate_count; // For extension only + BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER]; +} BackgroundWorkerArray; + +struct BackgroundWorkerHandle { + int slot; + uint64 generation; +}; + +/* + * List of internal background worker entry points. We need this for + * reasons explained in LookupBackgroundWorkerFunction(), below. + */ +static const struct { + const char *fn_name; + bgworker_main_type fn_addr; +} InternalBGWorkers[] = + +{ +}; + +/* Private functions. */ +static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname); + +/* + * Calculate shared memory needed. + */ +Size BackgroundWorkerShmemSize(void) +{ + Size size; + + /* Array of workers is variably sized. */ + size = offsetof(BackgroundWorkerArray, slot); + size = add_size(size, mul_size((Size)g_instance.attr.attr_storage.max_background_workers, + sizeof(BackgroundWorkerSlot))); + + return size; +} + +/* + * Initialize shared memory. + */ +void BackgroundWorkerShmemInit(void) +{ + bool found; + + t_thrd.bgworker_cxt.background_worker_data = (BackgroundWorkerArray*)ShmemInitStruct("Background Worker Data", + BackgroundWorkerShmemSize(), + &found); + if (!IsUnderPostmaster) { + slist_iter siter; + int slotno = 0; + + t_thrd.bgworker_cxt.background_worker_data->total_slots = g_instance.attr.attr_storage.max_background_workers; + t_thrd.bgworker_cxt.background_worker_data->parallel_register_count = 0; + t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count = 0; + + /* + * Copy contents of worker list into shared memory. Record the shared + * memory slot assigned to each worker. This ensures a 1-to-1 + * correspondence between the postmaster's private list and the array + * in shared memory. + */ + slist_foreach(siter, &BackgroundWorkerList) { + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; + RegisteredBgWorker *rw; + + rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); + Assert(slotno < g_instance.attr.attr_storage.max_background_workers); + slot->in_use = true; + slot->terminate = false; + slot->pid = InvalidPid; + slot->generation = 0; + rw->rw_shmem_slot = slotno; + rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */ + int ss_rc = memcpy_s(&slot->worker, sizeof(BackgroundWorker), &rw->rw_worker, sizeof(BackgroundWorker)); + securec_check(ss_rc, "\0", "\0"); + ++slotno; + } + + /* + * Mark any remaining slots as not in use. + */ + while (slotno < g_instance.attr.attr_storage.max_background_workers) { + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; + + slot->in_use = false; + ++slotno; + } + } else { + Assert(found); + } +} + +/* + * Search the postmaster's backend-private list of RegisteredBgWorker objects + * for the one that maps to the given slot number. + */ +static RegisteredBgWorker * FindRegisteredWorkerBySlotNumber(int slotno) +{ + slist_iter siter; + + slist_foreach(siter, &BackgroundWorkerList) { + RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); + if (rw->rw_shmem_slot == slotno) { + return rw; + } + } + + return NULL; +} + +/* + * Notice changes to shared memory made by other backends. This code + * runs in the postmaster, so we must be very careful not to assume that + * shared memory contents are sane. Otherwise, a rogue backend could take + * out the postmaster. + */ +void BackgroundWorkerStateChange(void) +{ + int slotno; + + /* + * The total number of slots stored in shared memory should match our + * notion of max_background_workers. If it does not, something is very + * wrong. Further down, we always refer to this value as + * max_background_workers, in case shared memory gets corrupted while we're + * looping. + */ + if (g_instance.attr.attr_storage.max_background_workers != t_thrd.bgworker_cxt.background_worker_data->total_slots) { + elog(LOG, + "inconsistent background worker state (max_background_workers=%d, total_slots=%d", + g_instance.attr.attr_storage.max_background_workers, + t_thrd.bgworker_cxt.background_worker_data->total_slots); + return; + } + + /* + * Iterate through slots, looking for newly-registered workers or workers + * who must die. + */ + for (slotno = 0; slotno < g_instance.attr.attr_storage.max_background_workers; ++slotno) { + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; + RegisteredBgWorker *rw = NULL; + + if (!slot->in_use) { + continue; + } + + /* + * Make sure we don't see the in_use flag before the updated slot + * contents. + */ + pg_read_barrier(); + + /* See whether we already know about this worker. */ + rw = FindRegisteredWorkerBySlotNumber(slotno); + if (rw != NULL) { + /* + * In general, the worker data can't change after it's initially + * registered. However, someone can set the terminate flag. + */ + if (slot->terminate && !rw->rw_terminate) { + rw->rw_terminate = true; + if (rw->rw_pid != 0) { + if (gs_signal_send(rw->rw_pid, SIGTERM) != 0) { + ereport(WARNING, + (errmsg("sending SIGTERM to %lu failed", rw->rw_pid))); + } + } else { + /* Report never-started, now-terminated worker as dead. */ + ReportBackgroundWorkerPID(rw); + } + } + continue; + } + + /* + * If the worker is marked for termination, we don't need to add it to + * the registered workers list; we can just free the slot. However, if + * bgw_notify_pid is set, the process that registered the worker may + * need to know that we've processed the terminate request, so be sure + * to signal it. + */ + if (slot->terminate) { + /* + * We need a memory barrier here to make sure that the load of + * bgw_notify_pid and the update of parallel_terminate_count + * complete before the store to in_use. + */ + ThreadId notify_pid = slot->worker.bgw_notify_pid; + if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) { + t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count++; + } + pg_memory_barrier(); + slot->pid = 0; + slot->in_use = false; + if (notify_pid != 0) { + if (gs_signal_send(notify_pid, SIGUSR1) != 0) { + ereport(WARNING, + (errmsg("sending SIGUSR1 to %lu failed", notify_pid))); + } + } + + continue; + } + + /* + * Copy the registration data into the registered workers list. + */ + rw = (RegisteredBgWorker*)malloc(sizeof(RegisteredBgWorker)); + if (rw == NULL) { + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + return; + } + + /* + * Copy strings in a paranoid way. If shared memory is corrupted, the + * source data might not even be NUL-terminated. + */ + ascii_safe_strlcpy(rw->rw_worker.bgw_name, + slot->worker.bgw_name, BGW_MAXLEN); + ascii_safe_strlcpy(rw->rw_worker.bgw_type, + slot->worker.bgw_type, BGW_MAXLEN); + ascii_safe_strlcpy(rw->rw_worker.bgw_library_name, + slot->worker.bgw_library_name, BGW_MAXLEN); + ascii_safe_strlcpy(rw->rw_worker.bgw_function_name, + slot->worker.bgw_function_name, BGW_MAXLEN); + + /* + * Copy various fixed-size fields. + * + * flags, start_time, and restart_time are examined by the postmaster, + * but nothing too bad will happen if they are corrupted. The + * remaining fields will only be examined by the child process. It + * might crash, but we won't. + */ + rw->rw_worker.bgw_flags = slot->worker.bgw_flags; + rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time; + rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time; + rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg; + int ss_rc = memcpy_s(rw->rw_worker.bgw_extra, BGW_EXTRALEN, slot->worker.bgw_extra, BGW_EXTRALEN); + securec_check(ss_rc, "\0", "\0"); + + /* + * Copy the PID to be notified about state changes, but only if the + * postmaster knows about a backend with that PID. It isn't an error + * if the postmaster doesn't know about the PID, because the backend + * that requested the worker could have died (or been killed) just + * after doing so. Nonetheless, at least until we get some experience + * with how this plays out in the wild, log a message at a relative + * high debug level. + */ + rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid; + if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid)) { + elog(DEBUG1, "worker notification PID %lu is not valid", + rw->rw_worker.bgw_notify_pid); + rw->rw_worker.bgw_notify_pid = 0; + } + + /* Initialize postmaster bookkeeping. */ + rw->rw_backend = NULL; + rw->rw_pid = 0; + rw->rw_child_slot = 0; + rw->rw_crashed_at = 0; + rw->rw_shmem_slot = slotno; + rw->rw_terminate = false; + + /* Log it! */ + ereport(DEBUG1, + (errmsg("registering background worker \"%s\"", + rw->rw_worker.bgw_name))); + + slist_push_head(&BackgroundWorkerList, &rw->rw_lnode); + } +} + +/* + * Forget about a background worker that's no longer needed. + * + * The worker must be identified by passing an slist_mutable_iter that + * points to it. This convention allows deletion of workers during + * searches of the worker list, and saves having to search the list again. + * + * This function must be invoked only in the postmaster. + */ +void ForgetBackgroundWorker(slist_mutable_iter *cur) +{ + RegisteredBgWorker *rw = NULL; + BackgroundWorkerSlot *slot = NULL; + + rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur); + + Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers); + slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot]; + if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) { + t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count++; + } + + slot->in_use = false; + + ereport(DEBUG1, + (errmsg("unregistering background worker \"%s\"", + rw->rw_worker.bgw_name))); + + slist_delete_current(cur); + free(rw); +} + +/* + * Report the PID of a newly-launched background worker in shared memory. + * + * This function should only be called from the postmaster. + */ +void ReportBackgroundWorkerPID(const RegisteredBgWorker *rw) +{ + BackgroundWorkerSlot *slot; + + Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers); + slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot]; + slot->pid = rw->rw_pid; + ereport(LOG, + (errmsg("ReportBackgroundWorkerPID slot: %d, pid: %lu, bgw_notify_pid: %lu", + rw->rw_shmem_slot, slot->pid, rw->rw_worker.bgw_notify_pid))); + + if (rw->rw_worker.bgw_notify_pid != 0) { + int ret = gs_signal_send(rw->rw_worker.bgw_notify_pid, SIGUSR1); + ereport(LOG, + (errmsg("ReportBackgroundWorkerPID send SIGUSR1 to bgw_notify_pid: %lu, ret: %d", + rw->rw_worker.bgw_notify_pid, ret))); + } +} + +/* + * Report that the PID of a background worker is now zero because a + * previously-running background worker has exited. + * + * This function should only be called from the postmaster. + */ +void ReportBackgroundWorkerExit(slist_mutable_iter *cur) +{ + RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur); + + Assert(rw->rw_shmem_slot < g_instance.attr.attr_storage.max_background_workers); + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[rw->rw_shmem_slot]; + slot->pid = rw->rw_pid; + ThreadId notify_pid = rw->rw_worker.bgw_notify_pid; + + /* + * If this worker is slated for deregistration, do that before notifying + * the process which started it. Otherwise, if that process tries to + * reuse the slot immediately, it might not be available yet. In theory + * that could happen anyway if the process checks slot->pid at just the + * wrong moment, but this makes the window narrower. + */ + if (rw->rw_terminate || + rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) { + ForgetBackgroundWorker(cur); + } + + if (notify_pid != 0) { + int ret = gs_signal_send(notify_pid, SIGUSR1); + ereport(LOG, + (errmsg("ReportBackgroundWorkerExit send SIGUSR1 to bgw_notify_pid: %lu, ret: %d", + notify_pid, ret))); + } +} + +/* + * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend. + * + * This function should only be called from the postmaster. + */ +void BackgroundWorkerStopNotifications(ThreadId pid) +{ + slist_iter siter; + + slist_foreach(siter, &BackgroundWorkerList) + { + RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); + if (rw->rw_worker.bgw_notify_pid == pid) { + rw->rw_worker.bgw_notify_pid = 0; + } + } +} + +/* + * Reset background worker crash state. + * + * We assume that, after a crash-and-restart cycle, background workers without + * the never-restart flag should be restarted immediately, instead of waiting + * for bgw_restart_time to elapse. + */ +void ResetBackgroundWorkerCrashTimes(void) +{ + slist_mutable_iter iter; + + slist_foreach_modify(iter, &BackgroundWorkerList) + { + RegisteredBgWorker *rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); + + if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) { + /* + * Workers marked BGW_NEVER_RESTART shouldn't get relaunched after + * the crash, so forget about them. (If we wait until after the + * crash to forget about them, and they are parallel workers, + * parallel_terminate_count will get incremented after we've + * already zeroed parallel_register_count, which would be bad.) + */ + ForgetBackgroundWorker(&iter); + } else { + /* + * The accounting which we do via parallel_register_count and + * parallel_terminate_count would get messed up if a worker marked + * parallel could survive a crash and restart cycle. All such + * workers should be marked BGW_NEVER_RESTART, and thus control + * should never reach this branch. + */ + Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0); + + /* + * Allow this worker to be restarted immediately after we finish + * resetting. + */ + rw->rw_crashed_at = 0; + } + } +} + +#ifdef EXEC_BACKEND +/* + * In EXEC_BACKEND mode, return address of the corresponding slot in + * shared memory. + */ +void* GetBackgroundWorkerShmAddr(int slotno) +{ + Assert(slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots); + return (void*)&t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; +} + +/* + * In EXEC_BACKEND mode, workers use this to retrieve their details from + * shared memory. + */ +BackgroundWorker* BackgroundWorkerEntry(const BackgroundWorkerSlot* bgWorkerSlotShmAddr) +{ + static THR_LOCAL BackgroundWorker myEntry; + + Assert(bgWorkerSlotShmAddr != NULL); + Assert(bgWorkerSlotShmAddr->in_use); + + /* must copy this in case we don't intend to retain shmem access */ + int ss_rc = memcpy_s(&myEntry, sizeof(myEntry), &bgWorkerSlotShmAddr->worker, sizeof(myEntry)); + securec_check(ss_rc, "\0", "\0"); + return &myEntry; +} +#endif + +/* + * Complain about the BackgroundWorker definition using error level elevel. + * Return true if it looks ok, false if not (unless elevel >= ERROR, in + * which case we won't return at all in the not-OK case). + */ +static bool SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel) +{ + /* sanity check for flags */ + if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) { + if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS)) { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection", + worker->bgw_name))); + return false; + } + + if (worker->bgw_start_time == BgWorkerStart_PostmasterStart) { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("background worker \"%s\": cannot request database access if starting at postmaster start", + worker->bgw_name))); + return false; + } + + /* XXX other checks? */ + } + + if ((worker->bgw_restart_time < 0 && + worker->bgw_restart_time != BGW_NEVER_RESTART) || + (worker->bgw_restart_time > USECS_PER_DAY / 1000)) { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("background worker \"%s\": invalid restart interval", + worker->bgw_name))); + return false; + } + + /* + * Parallel workers may not be configured for restart, because the + * parallel_register_count/parallel_terminate_count accounting can't + * handle parallel workers lasting through a crash-and-restart cycle. + */ + if (worker->bgw_restart_time != BGW_NEVER_RESTART && + (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("background worker \"%s\": parallel workers may not be configured for restart", + worker->bgw_name))); + return false; + } + + /* + * If bgw_type is not filled in, use bgw_name. + */ + if (strcmp(worker->bgw_type, "") == 0) { + int rd = strncpy_s(worker->bgw_type, BGW_MAXLEN, worker->bgw_name, BGW_MAXLEN); + securec_check(rd, "\0", "\0"); + } + + return true; +} + +static void bgworker_quickdie(SIGNAL_ARGS) +{ + /* + * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here + * because shared memory may be corrupted, so we don't want to try to + * clean up our transaction. Just nail the windows shut and get out of + * town. The callbacks wouldn't be safe to run from a signal handler, + * anyway. + * + * Note we do _exit(2) not _exit(0). This is to force the postmaster into + * a system reset cycle if someone sends a manual SIGQUIT to a random + * backend. This is necessary precisely because we don't clean up our + * shared memory state. (The "dead man switch" mechanism in pmsignal.c + * should ensure the postmaster sees this as a crash, too, but no harm in + * being doubly sure.) + */ + _exit(2); +} + +/* + * Standard SIGTERM handler for background workers + */ +static void bgworker_die(SIGNAL_ARGS) +{ + (void)PG_SETMASK(&t_thrd.libpq_cxt.BlockSig); + + ereport(FATAL, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating background worker \"%s\" due to administrator command", + t_thrd.bgworker_cxt.my_bgworker_entry->bgw_type))); +} + +/* + * Standard SIGUSR1 handler for unconnected workers + * + * Here, we want to make sure an unconnected worker will at least heed + * latch activity. + */ +static void bgworker_sigusr1_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + + latch_sigusr1_handler(); + + errno = save_errno; +} + +/* + * Start a new background worker + * + * This is the main entry point for background worker, to be called from + * postmaster. + */ +void StartBackgroundWorker(void* bgWorkerSlotShmAddr) +{ + sigjmp_buf local_sigjmp_buf; + t_thrd.bgworker_cxt.my_bgworker_entry = BackgroundWorkerEntry((BackgroundWorkerSlot *)bgWorkerSlotShmAddr); + BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry; + bgworker_main_type entrypt; + + /* + * Create memory context and buffer used for RowDescription messages. As + * SendRowDescriptionMessage(), via exec_describe_statement_message(), is + * frequently executed for ever single statement, we don't want to + * allocate a separate buffer every time. + */ + t_thrd.mem_cxt.row_desc_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt, + "RowDescriptionContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + MemoryContext old_mc = MemoryContextSwitchTo(t_thrd.mem_cxt.row_desc_mem_cxt); + initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf)); + (void)MemoryContextSwitchTo(old_mc); + + t_thrd.mem_cxt.mask_password_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt, + "MaskPasswordCtx", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + if (worker == NULL) { + ereport(FATAL, + (errmsg("unable to find bgworker entry"))); + } + + IsBackgroundWorker = true; + + /* Identify myself via ps */ + init_ps_display(worker->bgw_name, "", "", ""); + + SetProcessingMode(InitProcessing); + + /* + * Set up signal handlers. + */ + if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION) { + /* + * SIGINT is used to signal canceling the current action + */ + (void)gspqsignal(SIGINT, StatementCancelHandler); + (void)gspqsignal(SIGUSR1, procsignal_sigusr1_handler); + (void)gspqsignal(SIGFPE, FloatExceptionHandler); + + /* XXX Any other handlers needed here? */ + } else { + (void)gspqsignal(SIGINT, SIG_IGN); + (void)gspqsignal(SIGUSR1, bgworker_sigusr1_handler); + (void)gspqsignal(SIGFPE, SIG_IGN); + } + (void)gspqsignal(SIGTERM, bgworker_die); + (void)gspqsignal(SIGHUP, SIG_IGN); + + (void)gspqsignal(SIGQUIT, bgworker_quickdie); + (void)gspqsignal(SIGALRM, handle_sig_alarm); + + (void)gspqsignal(SIGPIPE, SIG_IGN); + (void)gspqsignal(SIGUSR2, SIG_IGN); + (void)gspqsignal(SIGCHLD, SIG_DFL); + + /* + * If an exception is encountered, processing resumes here. + * + * See notes in postgres.c about the design of this coding. + */ + if (sigsetjmp(local_sigjmp_buf, 1) != 0) { + /* Since not using PG_TRY, must reset error stack by hand */ + t_thrd.log_cxt.error_context_stack = NULL; + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log */ + EmitErrorReport(); + + /* + * Do we need more cleanup here? For shmem-connected bgworkers, we + * will call InitProcess below, which will install ProcKill as exit + * callback. That will take care of releasing locks, etc. + */ + + /* and go away */ + proc_exit(1); + } + + /* We can now handle ereport(ERROR) */ + t_thrd.log_cxt.PG_exception_stack = &local_sigjmp_buf; + + /* + * If the background worker request shared memory access, set that up now; + * else, detach all shared memory segments. + */ + if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS) { + /* + * Early initialization. Some of this could be useful even for + * background workers that aren't using shared memory, but they can + * call the individual startup routines for those subsystems if + * needed. + */ + BaseInit(); + + /* + * Create a per-backend PGPROC struct in shared memory, except in the + * EXEC_BACKEND case where this was done in SubPostmasterMain. We must + * do this before we can use LWLocks (and in the EXEC_BACKEND case we + * already had to do some stuff with LWLocks). + */ +#ifndef EXEC_BACKEND + InitProcess(); +#endif + } + + /* + * Look up the entry point function, loading its library if necessary. + */ + entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name, + worker->bgw_function_name); + + /* + * Note that in normal processes, we would call InitPostgres here. For a + * worker, however, we don't know what database to connect to, yet; so we + * need to wait until the user code does it via + * BackgroundWorkerInitializeConnection(). + */ + + /* + * Now invoke the user-defined worker code + */ + entrypt(worker->bgw_main_arg); + + /* ... and if it returns, we're done */ + proc_exit(0); +} + +/* + * Register a new static background worker. + * + * This can only be called directly from postmaster or in the _PG_init + * function of a module library that's loaded by shared_preload_libraries; + * otherwise it will have no effect. + */ +void RegisterBackgroundWorker(BackgroundWorker *worker) +{ + RegisteredBgWorker *rw; + static THR_LOCAL int numworkers = 0; + + if (!IsUnderPostmaster) { + ereport(DEBUG1, + (errmsg("registering background worker \"%s\"", worker->bgw_name))); + } + + if (!u_sess->misc_cxt.process_shared_preload_libraries_in_progress && + strcmp(worker->bgw_library_name, "postgres") != 0) { + if (!IsUnderPostmaster) { + ereport(LOG, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("background worker \"%s\": must be registered in shared_preload_libraries", + worker->bgw_name))); + } + return; + } + + if (!SanityCheckBackgroundWorker(worker, LOG)) { + return; + } + + if (worker->bgw_notify_pid != 0) { + ereport(LOG, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("background worker \"%s\": only dynamic background workers can request notification", + worker->bgw_name))); + return; + } + + /* + * Enforce maximum number of workers. Note this is overly restrictive: we + * could allow more non-shmem-connected workers, because these don't count + * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem + * important to relax this restriction. + */ + if (++numworkers > g_instance.attr.attr_storage.max_background_workers) { + ereport(LOG, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("too many background workers"), + errdetail_plural("Up to %d background worker can be registered with the current settings.", + "Up to %d background workers can be registered with the current settings.", + g_instance.attr.attr_storage.max_background_workers, + g_instance.attr.attr_storage.max_background_workers), + errhint("Consider increasing the configuration parameter \"max_background_workers\"."))); + return; + } + + /* + * Copy the registration data into the registered workers list. + */ + rw = (RegisteredBgWorker*)malloc(sizeof(RegisteredBgWorker)); + if (rw == NULL) { + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + return; + } + + rw->rw_worker = *worker; + rw->rw_backend = NULL; + rw->rw_pid = 0; + rw->rw_child_slot = 0; + rw->rw_crashed_at = 0; + rw->rw_terminate = false; + + slist_push_head(&BackgroundWorkerList, &rw->rw_lnode); +} + +/* + * Register a new background worker from a regular backend. + * + * Returns true on success and false on failure. Failure typically indicates + * that no background worker slots are currently available. + * + * If handle != NULL, we'll set *handle to a pointer that can subsequently + * be used as an argument to GetBackgroundWorkerPid(). The caller can + * free this pointer using pfree(), if desired. + */ +bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, + BackgroundWorkerHandle **handle) +{ + int slotno; + bool success = false; + bool parallel; + uint64 generation = 0; + + /* + * We can't register dynamic background workers from the postmaster. If + * this is a standalone backend, we're the only process and can't start + * any more. In a multi-process environment, it might be theoretically + * possible, but we don't currently support it due to locking + * considerations; see comments on the BackgroundWorkerSlot data + * structure. + */ + if (!IsUnderPostmaster) { + return false; + } + + if (!SanityCheckBackgroundWorker(worker, ERROR)) { + return false; + } + + parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0; + + (void)LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE); + + /* + * If this is a parallel worker, check whether there are already too many + * parallel workers; if so, don't register another one. Our view of + * parallel_terminate_count may be slightly stale, but that doesn't really + * matter: we would have gotten the same result if we'd arrived here + * slightly earlier anyway. There's no help for it, either, since the + * postmaster must not take locks; a memory barrier wouldn't guarantee + * anything useful. + */ + if (parallel && (int)(t_thrd.bgworker_cxt.background_worker_data->parallel_register_count - + t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count) >= + g_instance.shmem_cxt.max_parallel_workers) { + Assert(t_thrd.bgworker_cxt.background_worker_data->parallel_register_count - + t_thrd.bgworker_cxt.background_worker_data->parallel_terminate_count <= + MAX_PARALLEL_WORKER_LIMIT); + LWLockRelease(BackgroundWorkerLock); + return false; + } + + /* + * Look for an unused slot. If we find one, grab it. + */ + for (slotno = 0; slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots; ++slotno) { + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; + + if (!slot->in_use) { + int ss_rc = memcpy_s(&slot->worker, sizeof(BackgroundWorker), worker, sizeof(BackgroundWorker)); + securec_check(ss_rc, "\0", "\0"); + slot->pid = InvalidPid; /* indicates not started yet */ + slot->generation++; + slot->terminate = false; + generation = slot->generation; + if (parallel) + t_thrd.bgworker_cxt.background_worker_data->parallel_register_count++; + + /* + * Make sure postmaster doesn't see the slot as in use before it + * sees the new contents. + */ + pg_write_barrier(); + + slot->in_use = true; + success = true; + break; + } + } + + LWLockRelease(BackgroundWorkerLock); + + /* If we found a slot, tell the postmaster to notice the change. */ + if (success) { + SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE); + } + + /* + * If we found a slot and the user has provided a handle, initialize it. + */ + if (success && handle) { + *handle = (BackgroundWorkerHandle*)palloc(sizeof(BackgroundWorkerHandle)); + (*handle)->slot = slotno; + (*handle)->generation = generation; + } + + return success; +} + +/* + * Get the PID of a dynamically-registered background worker. + * + * If the worker is determined to be running, the return value will be + * BGWH_STARTED and *pidp will get the PID of the worker process. If the + * postmaster has not yet attempted to start the worker, the return value will + * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED. + * + * BGWH_STOPPED can indicate either that the worker is temporarily stopped + * (because it is configured for automatic restart and exited non-zero), + * or that the worker is permanently stopped (because it exited with exit + * code 0, or was not configured for automatic restart), or even that the + * worker was unregistered without ever starting (either because startup + * failed and the worker is not configured for automatic restart, or because + * TerminateBackgroundWorker was used before the worker was successfully + * started). + */ +BgwHandleStatus GetBackgroundWorkerPid(const BackgroundWorkerHandle *handle, ThreadId *pidp) +{ + ThreadId pid = InvalidPid; + + Assert(handle->slot < g_instance.attr.attr_storage.max_background_workers); + BackgroundWorkerSlot* slot = &t_thrd.bgworker_cxt.background_worker_data->slot[handle->slot]; + + /* + * We could probably arrange to synchronize access to data using memory + * barriers only, but for now, let's just keep it simple and grab the + * lock. It seems unlikely that there will be enough traffic here to + * result in meaningful contention. + */ + (void)LWLockAcquire(BackgroundWorkerLock, LW_SHARED); + + /* + * The generation number can't be concurrently changed while we hold the + * lock. The pid, which is updated by the postmaster, can change at any + * time, but we assume such changes are atomic. So the value we read + * won't be garbage, but it might be out of date by the time the caller + * examines it (but that's unavoidable anyway). + * + * The in_use flag could be in the process of changing from true to false, + * but if it is already false then it can't change further. + */ + if (handle->generation != slot->generation || !slot->in_use) { + pid = 0; + } else { + pid = slot->pid; + } + + /* All done. */ + LWLockRelease(BackgroundWorkerLock); + + ereport(LOG, + (errmsg("GetBackgroundWorkerPid slot: %d, pid: %lu", + handle->slot, pid))); + if (pid == 0) { + return BGWH_STOPPED; + } else if (pid == InvalidPid) { + return BGWH_NOT_YET_STARTED; + } + *pidp = pid; + return BGWH_STARTED; +} + +/* + * Wait for a background worker to start up. + * + * This is like GetBackgroundWorkerPid(), except that if the worker has not + * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never + * returned. However, if the postmaster has died, we give up and return + * BGWH_POSTMASTER_DIED, since it that case we know that startup will not + * take place. + */ +BgwHandleStatus WaitForBackgroundWorkerStartup(const BackgroundWorkerHandle *handle, ThreadId *pidp) +{ + BgwHandleStatus status; + int rc; + volatile knl_thrd_context* localThrd = &t_thrd; + + for (;;) { + ThreadId pid = 0; + + CHECK_FOR_INTERRUPTS(); + + status = GetBackgroundWorkerPid(handle, &pid); + ereport(LOG, + (errmsg("WaitForBackgroundWorkerStartup slot: %d, pid: %lu, status: %u, mypid: %lu", + handle->slot, pid, status, t_thrd.proc_cxt.MyProcPid))); + ereport(LOG, + (errmsg("WaitForBackgroundWorkerStartup addr: %p", localThrd))); + if (status == BGWH_STARTED) { + *pidp = pid; + } + if (status != BGWH_NOT_YET_STARTED) { + break; + } + + rc = WaitLatch(&t_thrd.proc->procLatch, + WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); + + if (rc & WL_POSTMASTER_DEATH) { + status = BGWH_POSTMASTER_DIED; + break; + } + + ResetLatch(&t_thrd.proc->procLatch); + } + + return status; +} + +/* + * Wait for a background worker to stop. + * + * If the worker hasn't yet started, or is running, we wait for it to stop + * and then return BGWH_STOPPED. However, if the postmaster has died, we give + * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that + * notifies us when a worker's state changes. + */ +BgwHandleStatus WaitForBackgroundWorkerShutdown(const BackgroundWorkerHandle *handle) +{ + BgwHandleStatus status; + int rc; + + for (;;) { + ThreadId pid = InvalidPid; + + CHECK_FOR_INTERRUPTS(); + + status = GetBackgroundWorkerPid(handle, &pid); + if (status == BGWH_STOPPED) { + break; + } + + rc = WaitLatch(&t_thrd.proc->procLatch, + WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); + + if (rc & WL_POSTMASTER_DEATH) { + status = BGWH_POSTMASTER_DIED; + break; + } + + ResetLatch(&t_thrd.proc->procLatch); + } + + return status; +} + +/* + * Instruct the postmaster to terminate a background worker. + * + * Note that it's safe to do this without regard to whether the worker is + * still running, or even if the worker may already have existed and been + * unregistered. + */ +void TerminateBackgroundWorker(const BackgroundWorkerHandle *handle) +{ + bool signal_postmaster = false; + + Assert(handle->slot < g_instance.attr.attr_storage.max_background_workers); + BackgroundWorkerSlot* slot = &t_thrd.bgworker_cxt.background_worker_data->slot[handle->slot]; + + /* Set terminate flag in shared memory, unless slot has been reused. */ + (void)LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE); + if (handle->generation == slot->generation) { + slot->terminate = true; + signal_postmaster = true; + } + LWLockRelease(BackgroundWorkerLock); + + /* Make sure the postmaster notices the change to shared memory. */ + if (signal_postmaster) { + SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE); + } +} + +/* + * Look up (and possibly load) a bgworker entry point function. + * + * For functions contained in the core code, we use library name "postgres" + * and consult the InternalBGWorkers array. External functions are + * looked up, and loaded if necessary, using load_external_function(). + * + * The point of this is to pass function names as strings across process + * boundaries. We can't pass actual function addresses because of the + * possibility that the function has been loaded at a different address + * in a different process. This is obviously a hazard for functions in + * loadable libraries, but it can happen even for functions in the core code + * on platforms using EXEC_BACKEND (e.g., Windows). + * + * At some point it might be worthwhile to get rid of InternalBGWorkers[] + * in favor of applying load_external_function() for core functions too; + * but that raises portability issues that are not worth addressing now. + */ +static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname) +{ + /* + * If the function is to be loaded from postgres itself, search the + * InternalBGWorkers array. + */ + if (strcmp(libraryname, "postgres") == 0) { + size_t i; + for (i = 0; i < lengthof(InternalBGWorkers); i++) { + if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0) { + return InternalBGWorkers[i].fn_addr; + } + } + + /* We can only reach this by programming error. */ + elog(ERROR, "internal function \"%s\" not found", funcname); + } + + /* Otherwise load from external library. */ + return (bgworker_main_type) + load_external_function(libraryname, (char*)funcname, true, true).user_fn; +} + +/* + * Given a PID, get the bgw_type of the background worker. Returns NULL if + * not a valid background worker. + * + * The return value is in static memory belonging to this function, so it has + * to be used before calling this function again. This is so that the caller + * doesn't have to worry about the background worker locking protocol. + */ +const char * GetBackgroundWorkerTypeByPid(ThreadId pid) +{ + int slotno; + bool found = false; + static THR_LOCAL char result[BGW_MAXLEN]; + + (void)LWLockAcquire(BackgroundWorkerLock, LW_SHARED); + + for (slotno = 0; slotno < t_thrd.bgworker_cxt.background_worker_data->total_slots; slotno++) { + BackgroundWorkerSlot *slot = &t_thrd.bgworker_cxt.background_worker_data->slot[slotno]; + + if (slot->pid > 0 && slot->pid == pid) { + int rd = strncpy_s(result, BGW_MAXLEN, slot->worker.bgw_type, BGW_MAXLEN); + securec_check(rd, "\0", "\0"); + found = true; + break; + } + } + + LWLockRelease(BackgroundWorkerLock); + + if (!found) { + return NULL; + } + + return result; +} + +/* + * Connect background worker to a database. + */ +void BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags) +{ + BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry; + + /* XXX is this the right errcode? */ + if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) { + ereport(FATAL, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("database connection requirement not indicated during registration"))); + } + + t_thrd.proc_cxt.PostInit->SetDatabaseAndUser(dbname, InvalidOid, username, InvalidOid); + t_thrd.proc_cxt.PostInit->InitBackendWorker(); + + /* it had better not gotten out of "init" mode yet */ + if (!IsInitProcessingMode()) { + ereport(ERROR, + (errmsg("invalid processing mode in background worker"))); + } + SetProcessingMode(NormalProcessing); +} + +/* + * Connect background worker to a database using OIDs. + */ +void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags) +{ + BackgroundWorker *worker = t_thrd.bgworker_cxt.my_bgworker_entry; + + /* XXX is this the right errcode? */ + if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) { + ereport(FATAL, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("database connection requirement not indicated during registration"))); + } + + t_thrd.proc_cxt.PostInit->SetDatabaseAndUser(NULL, dboid, NULL, useroid); + t_thrd.proc_cxt.PostInit->InitBackendWorker(); + + /* it had better not gotten out of "init" mode yet */ + if (!IsInitProcessingMode()) { + ereport(ERROR, + (errmsg("invalid processing mode in background worker"))); + } + SetProcessingMode(NormalProcessing); +} + +/* + * Block/unblock signals in a background worker + */ +void BackgroundWorkerBlockSignals(void) +{ + (void)PG_SETMASK(&t_thrd.libpq_cxt.BlockSig); +} + +void BackgroundWorkerUnblockSignals(void) +{ + (void)PG_SETMASK(&t_thrd.libpq_cxt.UnBlockSig); +} + + diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index d01e8fcd51..8a0da6b553 100755 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -110,6 +110,7 @@ #include "job/job_scheduler.h" #include "job/job_worker.h" #include "postmaster/autovacuum.h" +#include "postmaster/bgworker_internals.h" #include "postmaster/pagewriter.h" #include "postmaster/fork_process.h" #include "postmaster/pgarch.h" @@ -311,6 +312,7 @@ static void reaper(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS); static void CleanupBackend(ThreadId pid, int exitstatus); +static bool CleanupBackgroundWorker(ThreadId pid, int exitstatus); static const char* GetProcName(ThreadId pid); static void LogChildExit(int lev, const char* procname, ThreadId pid, int exitstatus); static void PostmasterStateMachine(void); @@ -366,6 +368,8 @@ static void check_and_reset_ha_listen_port(void); static void* cJSON_internal_malloc(size_t size); static bool NeedHeartbeat(); static ServerMode GetHaShmemMode(void); +static bool assign_backendlist_entry(RegisteredBgWorker *rw); +static void maybe_start_bgworkers(void); bool PMstateIsRun(void); @@ -380,6 +384,7 @@ bool PMstateIsRun(void); #define BACKEND_TYPE_TEMPBACKEND \ 0x0010 /* temp thread processing cancel signal \ or stream connection */ + #define BACKEND_TYPE_ALL 0x001F /* OR of all the above */ static int CountChildren(int target); @@ -1019,6 +1024,7 @@ void SetShmemCxt(void) g_instance.shmem_cxt.MaxBackends = g_instance.shmem_cxt.MaxConnections + g_instance.attr.attr_sql.job_queue_processes + g_instance.attr.attr_storage.autovacuum_max_workers + + g_instance.attr.attr_storage.max_background_workers + AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS; g_instance.shmem_cxt.MaxReserveBackendId = g_instance.attr.attr_sql.job_queue_processes + @@ -5464,6 +5470,14 @@ static void reaper(SIGNAL_ARGS) continue; } + /* Was it one of our background workers? */ + if (CleanupBackgroundWorker(pid, (int)exitstatus)) + { + /* have it be restarted */ + g_instance.bgworker_cxt.have_crashed_worker = true; + continue; + } + /* * Else do standard backend child cleanup. */ @@ -5566,6 +5580,101 @@ static const char* GetProcName(ThreadId pid) } } +/* + * Scan the bgworkers list and see if the given PID (which has just stopped + * or crashed) is in it. Handle its shutdown if so, and return true. If not a + * bgworker, return false. + * + * This is heavily based on CleanupBackend. One important difference is that + * we don't know yet that the dying process is a bgworker, so we must be silent + * until we're sure it is. + */ +static bool CleanupBackgroundWorker(ThreadId pid, + int exitstatus) /* child's exit status */ +{ + char namebuf[MAXPGPATH]; + slist_mutable_iter iter; + + slist_foreach_modify(iter, &BackgroundWorkerList) { + RegisteredBgWorker *rw; + + rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); + + if (rw->rw_pid != pid) { + continue; + } + +#ifdef WIN32 + /* see CleanupBackend */ + if (exitstatus == ERROR_WAIT_NO_CHILDREN) { + exitstatus = 0; + } +#endif + + int rc = snprintf_s(namebuf, MAXPGPATH, MAXPGPATH - 1, _("background worker \"%s\""), rw->rw_worker.bgw_type); + securec_check_ss_c(rc, "\0", "\0"); + + if (!EXIT_STATUS_0(exitstatus)) { + /* Record timestamp, so we know when to restart the worker. */ + rw->rw_crashed_at = GetCurrentTimestamp(); + } else { + /* Zero exit status means terminate */ + rw->rw_crashed_at = 0; + rw->rw_terminate = true; + } + + /* + * Additionally, for shared-memory-connected workers, just like a + * backend, any exit status other than 0 or 1 is considered a crash + * and causes a system-wide restart. + */ + if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) { + if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) { + HandleChildCrash(pid, exitstatus, namebuf); + return true; + } + } + + /* + * We must release the postmaster child slot whether this worker is + * connected to shared memory or not, but we only treat it as a crash + * if it is in fact connected. + */ + if (!ReleasePostmasterChildSlot(rw->rw_child_slot) && + (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) { + HandleChildCrash(pid, exitstatus, namebuf); + return true; + } + + /* Get it out of the BackendList and clear out remaining data */ + DLRemove(&rw->rw_backend->elem); + + /* + * It's possible that this background worker started some OTHER + * background worker and asked to be notified when that worker started + * or stopped. If so, cancel any notifications destined for the + * now-dead backend. + */ + if (rw->rw_backend->bgworker_notify) { + BackgroundWorkerStopNotifications(rw->rw_pid); + } + + BackendArrayRemove(rw->rw_backend); + + rw->rw_backend = NULL; + rw->rw_pid = 0; + rw->rw_child_slot = 0; + ReportBackgroundWorkerExit(&iter); /* report child death */ + + LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, + namebuf, pid, exitstatus); + + return true; + } + + return false; +} + /* * CleanupBackend -- cleanup after terminated backend. * @@ -5629,6 +5738,18 @@ static void CleanupBackend(ThreadId pid, int exitstatus) /* child's exit status. BackendArrayRemove(bp); } + if (bp->bgworker_notify) + { + /* + * This backend may have been slated to receive SIGUSR1 when + * some background worker started or stopped. Cancel those + * notifications, as we don't want to signal PIDs that are not + * PostgreSQL backends. This gets skipped in the (probably + * very common) case where the backend has never requested any + * such notifications. + */ + BackgroundWorkerStopNotifications(bp->pid); + } DLRemove(curr); break; } @@ -6881,6 +7002,16 @@ static void sigusr1_handler(SIGNAL_ARGS) gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL); + /* Process background worker state change. */ + if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE)) + { + BackgroundWorkerStateChange(); + g_instance.bgworker_cxt.start_worker_needed = true; + } + if (g_instance.bgworker_cxt.start_worker_needed || g_instance.bgworker_cxt.have_crashed_worker) { + maybe_start_bgworkers(); + } + /* * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in * unexpected states. If the startup process quickly starts up, completes @@ -7699,6 +7830,300 @@ int MaxLivePostmasterChildren(void) return 6 * g_instance.shmem_cxt.MaxBackends; } +/* + * Start a new bgworker. + * Starting time conditions must have been checked already. + * + * Returns true on success, false on failure. + * In either case, update the RegisteredBgWorker's state appropriately. + * + * This code is heavily based on autovacuum.c, q.v. + */ +static bool do_start_bgworker(RegisteredBgWorker *rw) +{ + ThreadId worker_pid = InvalidPid; + + Assert(rw->rw_pid == 0); + + /* + * Allocate and assign the Backend element. Note we must do this before + * forking, so that we can handle failures (out of memory or child-process + * slots) cleanly. + * + * Treat failure as though the worker had crashed. That way, the + * postmaster will wait a bit before attempting to start it again; if we + * tried again right away, most likely we'd find ourselves hitting the + * same resource-exhaustion condition. + */ + if (!assign_backendlist_entry(rw)) { + rw->rw_crashed_at = GetCurrentTimestamp(); + return false; + } + + ereport(DEBUG1, + (errmsg("starting background worker process \"%s\"", + rw->rw_worker.bgw_name))); + + Backend* bn = rw->rw_backend; + void* bgWorkerShmAddr = GetBackgroundWorkerShmAddr(rw->rw_shmem_slot); + switch ((worker_pid = initialize_util_thread(BACKGROUND_WORKER, bgWorkerShmAddr))) { + case (ThreadId)-1: + /* in postmaster, fork failed ... */ + ereport(LOG, + (errmsg("could not fork worker process: %m"))); + /* undo what assign_backendlist_entry did */ + (void)ReleasePostmasterChildSlot(rw->rw_child_slot); + bn->pid = 0; + rw->rw_child_slot = 0; + rw->rw_backend = NULL; + /* mark entry as crashed, so we'll try again later */ + rw->rw_crashed_at = GetCurrentTimestamp(); + break; + + default: + /* in postmaster, fork successful ... */ + rw->rw_pid = worker_pid; + bn->pid = rw->rw_pid; + ReportBackgroundWorkerPID(rw); + /* add new worker to lists of backends */ + DLInitElem(&bn->elem, bn); + DLAddHead(g_instance.backend_list, &bn->elem); + + return true; + } + + return false; +} + +/* + * Does the current postmaster state require starting a worker with the + * specified start_time? + */ +static bool +bgworker_should_start_now(BgWorkerStartTime start_time) +{ + switch (pmState) { + case PM_NO_CHILDREN: + case PM_WAIT_DEAD_END: + case PM_SHUTDOWN_2: + case PM_SHUTDOWN: + case PM_WAIT_BACKENDS: + case PM_WAIT_READONLY: + case PM_WAIT_BACKUP: + break; + + case PM_RUN: + if (start_time == BgWorkerStart_RecoveryFinished) { + return true; + } + /* fall through */ + case PM_HOT_STANDBY: + if (start_time == BgWorkerStart_ConsistentState) { + return true; + } + /* fall through */ + case PM_RECOVERY: + case PM_STARTUP: + case PM_INIT: + if (start_time == BgWorkerStart_PostmasterStart) { + return true; + } + /* fall through */ + } + + return false; +} + +/* + * Allocate the Backend struct for a connected background worker, but don't + * add it to the list of backends just yet. + * + * On failure, return false without changing any worker state. + * + * Some info from the Backend is copied into the passed rw. + */ +static bool +assign_backendlist_entry(RegisteredBgWorker *rw) +{ + Backend* bn = NULL; + + /* + * Check that database state allows another connection. Currently the + * only possible failure is CAC_TOOMANY, so we just log an error message + * based on that rather than checking the error code precisely. + */ + if (canAcceptConnections(false) != CAC_OK) + { + ereport(LOG, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("no slot available for new worker process"))); + return false; + } + + int slot = AssignPostmasterChildSlot(); + + bn = AssignFreeBackEnd(slot); + + if (bn == NULL) { + ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); + return false; + } + + /* + * Compute the cancel key that will be assigned to this session. We + * probably don't need cancel keys for background workers, but we'd better + * have something random in the field to prevent unfriendly people from + * sending cancels to them. + */ + GenerateCancelKey(false); + bn->cancel_key = t_thrd.proc_cxt.MyCancelKey; + bn->child_slot = t_thrd.proc_cxt.MyPMChildSlot = slot; + bn->is_autovacuum = false; + bn->dead_end = false; + bn->bgworker_notify = false; + rw->rw_backend = bn; + rw->rw_child_slot = bn->child_slot; + + return true; +} + +/* + * If the time is right, start background worker(s). + * + * As a side effect, the bgworker control variables are set or reset + * depending on whether more workers may need to be started. + * + * We limit the number of workers started per call, to avoid consuming the + * postmaster's attention for too long when many such requests are pending. + * As long as start_worker_needed is true, ServerLoop will not block and will + * call this function again after dealing with any other issues. + */ +static void maybe_start_bgworkers(void) +{ +#define MAX_BGWORKERS_TO_LAUNCH 100 + int num_launched = 0; + TimestampTz now = 0; + slist_mutable_iter iter; + + /* + * During crash recovery, we have no need to be called until the state + * transition out of recovery. + */ + if (g_instance.fatal_error) { + g_instance.bgworker_cxt.start_worker_needed = false; + g_instance.bgworker_cxt.have_crashed_worker = false; + return; + } + + /* Don't need to be called again unless we find a reason for it below */ + g_instance.bgworker_cxt.start_worker_needed = false; + g_instance.bgworker_cxt.have_crashed_worker = false; + + slist_foreach_modify(iter, &BackgroundWorkerList) { + RegisteredBgWorker *rw; + + rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); + + /* ignore if already running */ + if (rw->rw_pid != 0) { + continue; + } + + /* if marked for death, clean up and remove from list */ + if (rw->rw_terminate) { + ForgetBackgroundWorker(&iter); + continue; + } + + /* + * If this worker has crashed previously, maybe it needs to be + * restarted (unless on registration it specified it doesn't want to + * be restarted at all). Check how long ago did a crash last happen. + * If the last crash is too recent, don't start it right away; let it + * be restarted once enough time has passed. + */ + if (rw->rw_crashed_at != 0) { + if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) { + ThreadId notify_pid = rw->rw_worker.bgw_notify_pid; + + ForgetBackgroundWorker(&iter); + + /* Report worker is gone now. */ + if (notify_pid != 0) { + (void)gs_signal_send(notify_pid, SIGUSR1); + } + + continue; + } + + /* read system time only when needed */ + if (now == 0) { + now = GetCurrentTimestamp(); + } + + if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now, + rw->rw_worker.bgw_restart_time * 1000)) { + /* Set flag to remember that we have workers to start later */ + g_instance.bgworker_cxt.have_crashed_worker = true; + continue; + } + } + + if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) { + /* reset crash time before trying to start worker */ + rw->rw_crashed_at = 0; + + /* + * Try to start the worker. + * + * On failure, give up processing workers for now, but set + * start_worker_needed so we'll come back here on the next iteration + * of ServerLoop to try again. (We don't want to wait, because + * there might be additional ready-to-run workers.) We could set + * have_crashed_worker as well, since this worker is now marked + * crashed, but there's no need because the next run of this + * function will do that. + */ + if (!do_start_bgworker(rw)) { + g_instance.bgworker_cxt.start_worker_needed = true; + return; + } + + /* + * If we've launched as many workers as allowed, quit, but have + * ServerLoop call us again to look for additional ready-to-run + * workers. There might not be any, but we'll find out the next + * time we run. + */ + if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH) { + g_instance.bgworker_cxt.start_worker_needed = true; + return; + } + } + } +} + +/* + * When a backend asks to be notified about worker state changes, we + * set a flag in its backend entry. The background worker machinery needs + * to know when such backends exit. + */ +bool +PostmasterMarkPIDForWorkerNotify(ThreadId pid) +{ + int count = MaxLivePostmasterChildren(); + for (int i = 0; i < count; ++i) { + Backend* bp = &g_instance.backend_array[i]; + if (bp->pid == pid) + { + bp->bgworker_notify = true; + return true; + } + } + return false; +} + + #ifdef EXEC_BACKEND #ifndef WIN32 #define write_inheritable_socket(dest, src) ((*(dest) = (src))) @@ -7965,6 +8390,7 @@ Backend* AssignFreeBackEnd(int slot) bn->pid = 0; bn->cancel_key = 0; bn->dead_end = false; + bn->bgworker_notify = false; return bn; } @@ -9952,7 +10378,7 @@ int GaussDbThreadMain(knl_thread_arg* arg) commAuxiliaryMain(); proc_exit(0); } break; - + #ifdef ENABLE_MULTIPLE_NODES case COMM_POOLER_CLEAN: { InitProcessAndShareMemory(); @@ -9960,6 +10386,14 @@ int GaussDbThreadMain(knl_thread_arg* arg) proc_exit(0); } break; #endif + + case BACKGROUND_WORKER: { + IsBackgroundWorker = true; + InitProcessAndShareMemory(); + StartBackgroundWorker(arg->payload); + proc_exit(0); + } break; + default: ereport(PANIC, (errmsg("unsupport thread role type %d", arg->role))); break; @@ -10011,7 +10445,8 @@ static GaussdbThreadEntry GaussdbThreadEntryGate[] = {GaussDbThreadMain, GaussDbThreadMain, GaussDbThreadMain, GaussDbThreadMain, - GaussDbThreadMain}; + GaussDbThreadMain, + GaussDbThreadMain}; const char* GaussdbThreadName[] = {"main", "worker", @@ -10055,7 +10490,8 @@ const char* GaussdbThreadName[] = {"main", "communicator receiver flower", "communicator receiver loop", "communicator auxiliary", - "communicator pooler auto cleaner"}; + "communicator pooler auto cleaner", + "background worker"}; GaussdbThreadEntry GetThreadEntry(knl_thread_role role) { diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 38794aac49..dad5820202 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -290,6 +290,7 @@ static void knl_g_wlm_init(knl_g_wlm_context* wlm_cxt) static void knl_g_shmem_init(knl_g_shmem_context* shmem_cxt) { + shmem_cxt->max_parallel_workers = 8; shmem_cxt->MaxBackends = 100; shmem_cxt->MaxReserveBackendId = (AUXILIARY_BACKENDS + AV_LAUNCHER_PROCS); shmem_cxt->ThreadPoolGroupNum = 0; @@ -316,6 +317,12 @@ static void knl_g_numa_init(knl_g_numa_context* numa_cxt) numa_cxt->allocIndex = 0; } +static void knl_g_bgworker_init(knl_g_bgworker_context* bgworker_cxt) +{ + bgworker_cxt->start_worker_needed = true; + bgworker_cxt->have_crashed_worker = false; +} + void knl_instance_init() { g_instance.binaryupgrade = false; @@ -363,6 +370,7 @@ void knl_instance_init() knl_g_dw_init(&g_instance.dw_cxt); knl_g_xlog_init(&g_instance.xlog_cxt); knl_g_numa_init(&g_instance.numa_cxt); + knl_g_bgworker_init(&g_instance.bgworker_cxt); MemoryContextSwitchTo(old_cxt); diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index 3822f053bb..1d0ca27a90 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1405,6 +1405,12 @@ void knl_thread_mot_init() knl_t_mot_init(&t_thrd.mot_cxt); } +void knl_t_bgworker_init(knl_t_bgworker_context* bgworker_cxt) +{ + bgworker_cxt->background_worker_data = NULL; + bgworker_cxt->my_bgworker_entry = NULL; +} + void knl_thread_init(knl_thread_role role) { t_thrd.role = role; diff --git a/src/gausskernel/storage/ipc/ipci.cpp b/src/gausskernel/storage/ipc/ipci.cpp index 508a9abcbd..fa8a0df839 100755 --- a/src/gausskernel/storage/ipc/ipci.cpp +++ b/src/gausskernel/storage/ipc/ipci.cpp @@ -37,6 +37,7 @@ #include "pgxc/nodemgr.h" #endif #include "postmaster/autovacuum.h" +#include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" #include "replication/slot.h" @@ -136,6 +137,7 @@ void CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, CLOGShmemSize()); size = add_size(size, CSNLOGShmemSize()); size = add_size(size, TwoPhaseShmemSize()); + size = add_size(size, BackgroundWorkerShmemSize()); size = add_size(size, MultiXactShmemSize()); size = add_size(size, LWLockShmemSize()); size = add_size(size, ProcArrayShmemSize()); @@ -275,6 +277,7 @@ void CreateSharedMemoryAndSemaphores(bool makePrivate, int port) { TwoPhaseShmemInit(); } + BackgroundWorkerShmemInit(); /* * Set up shared-inval messaging diff --git a/src/gausskernel/storage/ipc/procsignal.cpp b/src/gausskernel/storage/ipc/procsignal.cpp index d7d92a9893..3308fe14e4 100644 --- a/src/gausskernel/storage/ipc/procsignal.cpp +++ b/src/gausskernel/storage/ipc/procsignal.cpp @@ -306,6 +306,7 @@ void procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + SetLatch(&t_thrd.proc->procLatch); latch_sigusr1_handler(); errno = save_errno; diff --git a/src/gausskernel/storage/lmgr/lwlocknames.txt b/src/gausskernel/storage/lmgr/lwlocknames.txt index 602047f4d6..1489b44f6d 100644 --- a/src/gausskernel/storage/lmgr/lwlocknames.txt +++ b/src/gausskernel/storage/lmgr/lwlocknames.txt @@ -96,3 +96,4 @@ GPCCommitLock 88 GPCClearLock 89 GPCTimelineLock 90 TsTagsCacheLock 91 +BackgroundWorkerLock 92 \ No newline at end of file diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp index ce2eb19b01..aba442a0cc 100755 --- a/src/gausskernel/storage/lmgr/proc.cpp +++ b/src/gausskernel/storage/lmgr/proc.cpp @@ -212,7 +212,9 @@ static void FiniNuma(int code, Datum arg) * So, now we grab enough semaphores to support the desired max number * of backends immediately at initialization --- if the sysadmin has set * MaxConnections or autovacuum_max_workers higher than his kernel will - * support, he'll find out sooner rather than later. + * support, he'll find out sooner rather than later. (The number of + * background worker processes registered by loadable modules is also taken + * into consideration.) * * Another reason for creating semaphores here is that the semaphore * implementation typically requires us to create semaphores in the @@ -240,6 +242,7 @@ void InitProcGlobal(void) #endif g_instance.proc_base->freeProcs = NULL; g_instance.proc_base->autovacFreeProcs = NULL; + g_instance.proc_base->bgworkerFreeProcs = NULL; g_instance.proc_base->pgjobfreeProcs = NULL; g_instance.proc_base->startupProc = NULL; g_instance.proc_base->startupProcPid = 0; @@ -252,10 +255,10 @@ void InitProcGlobal(void) /* * Create and initialize all the PGPROC structures we'll need. There are - * four separate consumers: (1) normal backends, (2) autovacuum workers - * and the autovacuum launcher, (3) auxiliary processes, and (4) prepared - * transactions. Each PGPROC structure is dedicated to exactly one of - * these purposes, and they do not move between groups. + * five separate consumers: (1) normal backends, (2) autovacuum workers + * and the autovacuum launcher, (3) background workers, (4) auxiliary processes, + * and (5) prepared transactions. Each PGPROC structure is dedicated to exactly + * one of these purposes, and they do not move between groups. */ PGPROC *initProcs[MAX_NUMA_NODE] = {0}; @@ -331,7 +334,7 @@ void InitProcGlobal(void) procs[i]->nodeno = i % nNumaNodes; /* - * Newly created PGPROCs for normal backends or for autovacuum must be + * Newly created PGPROCs for normal backends, autovacuum and bgworkers must be * queued up on the appropriate free list. Because there can only * ever be a small, fixed number of auxiliary processes, no free list * is used in that case; InitAuxiliaryProcess() instead uses a linear @@ -347,13 +350,23 @@ void InitProcGlobal(void) /* PGPROC for pg_job backend, add to pgjobfreeProcs list, 1 for Job Schedule Lancher */ procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->pgjobfreeProcs; g_instance.proc_base->pgjobfreeProcs = procs[i]; - } else if (i < g_instance.shmem_cxt.MaxBackends) { + } else if (i < g_instance.shmem_cxt.MaxConnections + AUXILIARY_BACKENDS + + g_instance.attr.attr_sql.job_queue_processes + 1 + + g_instance.attr.attr_storage.autovacuum_max_workers + + AV_LAUNCHER_PROCS) { /* * PGPROC for AV launcher/worker, add to autovacFreeProcs list - * list size is autovacuum_max_workers + AUTOVACUUM_LAUNCHERS + * list size is autovacuum_max_workers + AV_LAUNCHER_PROCS */ procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->autovacFreeProcs; g_instance.proc_base->autovacFreeProcs = procs[i]; + } else if (i < g_instance.shmem_cxt.MaxBackends) { + /* + * PGPROC for bgworker, add to bgworkerFreeProcs list + * list size is max_background_workers + */ + procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->bgworkerFreeProcs; + g_instance.proc_base->bgworkerFreeProcs = procs[i]; } /* Initialize myProcLocks[] shared memory queues. */ @@ -463,6 +476,8 @@ void InitProcess(void) t_thrd.proc = g_instance.proc_base->autovacFreeProcs; else if (IsJobSchedulerProcess() || IsJobWorkerProcess()) t_thrd.proc = g_instance.proc_base->pgjobfreeProcs; + else if (IsBackgroundWorker) + t_thrd.proc = g_instance.proc_base->bgworkerFreeProcs; else { #ifndef __USE_NUMA t_thrd.proc = g_instance.proc_base->freeProcs; @@ -478,6 +493,8 @@ void InitProcess(void) g_instance.proc_base->autovacFreeProcs = (PGPROC *)t_thrd.proc->links.next; else if (IsJobSchedulerProcess() || IsJobWorkerProcess()) g_instance.proc_base->pgjobfreeProcs = (PGPROC *)t_thrd.proc->links.next; + else if (IsBackgroundWorker) + g_instance.proc_base->bgworkerFreeProcs = (PGPROC *)t_thrd.proc->links.next; else { #ifndef __USE_NUMA g_instance.proc_base->freeProcs = (PGPROC *)t_thrd.proc->links.next; @@ -1036,6 +1053,11 @@ static void ProcKill(int code, Datum arg) } else if (IsJobSchedulerProcess() || IsJobWorkerProcess()) { t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->pgjobfreeProcs; g_instance.proc_base->pgjobfreeProcs = t_thrd.proc; + } + else if (IsBackgroundWorker) + { + t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->bgworkerFreeProcs; + g_instance.proc_base->bgworkerFreeProcs = t_thrd.proc; } else { t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->freeProcs; g_instance.proc_base->freeProcs = t_thrd.proc; diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h index e32e21efe8..dc0bfaf1bd 100755 --- a/src/include/gs_thread.h +++ b/src/include/gs_thread.h @@ -97,6 +97,7 @@ typedef enum knl_thread_role { COMM_RECEIVER, COMM_AUXILIARY, COMM_POOLER_CLEAN, + BACKGROUND_WORKER, // should be last valid thread. THREAD_ENTRY_BOUND, diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index fc9805f801..ff905a4b6d 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -68,6 +68,7 @@ typedef struct knl_instance_attr_storage { int max_replication_slots; int replication_type; int autovacuum_max_workers; + int max_background_workers; int64 autovacuum_freeze_max_age; int wal_level; /* User specified maximum number of recovery threads. */ diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index a7eba7c4c8..edac6dfd48 100644 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -485,6 +485,7 @@ typedef struct knl_g_libpq_context { typedef struct knl_g_shmem_context { int MaxConnections; + int max_parallel_workers; int MaxBackends; int MaxReserveBackendId; int ThreadPoolGroupNum; @@ -511,6 +512,12 @@ typedef struct knl_g_numa_context { size_t allocIndex; } knl_g_numa_context; +typedef struct knl_g_bgworker_context { + /* set when there's a worker that needs to be started up */ + volatile bool start_worker_needed; + volatile bool have_crashed_worker; +} knl_g_bgworker_context; + typedef struct knl_instance_context { knl_virtual_role role; volatile int status; @@ -582,6 +589,7 @@ typedef struct knl_instance_context { knl_g_rto_context rto_cxt; knl_g_xlog_context xlog_cxt; knl_g_numa_context numa_cxt; + knl_g_bgworker_context bgworker_cxt; } knl_instance_context; extern void knl_instance_init(); diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 4a2939477e..21a08dde19 100644 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -69,6 +69,7 @@ #include "openssl/ossl_typ.h" #include "workload/qnode.h" #include "tcop/dest.h" +#include "postmaster/bgworker.h" #define MAX_PATH_LEN 1024 @@ -2702,6 +2703,11 @@ typedef struct knl_t_mot_context { unsigned int mbindFlags; } knl_t_mot_context; +typedef struct knl_t_bgworker_context { + BackgroundWorkerArray *background_worker_data; + BackgroundWorker *my_bgworker_entry; +} knl_t_bgworker_context; + /* thread context. */ typedef struct knl_thrd_context { knl_thread_role role; @@ -2799,6 +2805,7 @@ typedef struct knl_thrd_context { knl_t_heartbeat_context heartbeat_cxt; knl_t_poolcleaner_context poolcleaner_cxt; knl_t_mot_context mot_cxt; + knl_t_bgworker_context bgworker_cxt; } knl_thrd_context; extern void knl_thread_mot_init(); diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 3ca70ddc20..6d73c6960d 100755 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -28,6 +28,8 @@ #include "pgtime.h" /* for pg_time_t */ #include "libpq/libpq-be.h" +#define InvalidPid ((ThreadId)(-1)) + #define PG_BACKEND_VERSIONSTR "gaussdb " DEF_GS_VERSION "\n" /***************************************************************************** @@ -129,6 +131,7 @@ extern bool InplaceUpgradePrecommit; extern THR_LOCAL PGDLLIMPORT bool IsUnderPostmaster; extern THR_LOCAL PGDLLIMPORT char my_exec_path[]; +extern THR_LOCAL PGDLLIMPORT bool IsBackgroundWorker; #define MAX_QUERY_DOP (64) #define MIN_QUERY_DOP -(MAX_QUERY_DOP) @@ -232,7 +235,7 @@ extern bool InLocalUserIdChange(void); extern bool InSecurityRestrictedOperation(void); extern void GetUserIdAndContext(Oid* userid, bool* sec_def_context); extern void SetUserIdAndContext(Oid userid, bool sec_def_context); -extern void InitializeSessionUserId(const char* rolename); +extern void InitializeSessionUserId(const char* rolename, Oid role_id); extern void InitializeSessionUserIdStandalone(void); extern void SetSessionAuthorization(Oid userid, bool is_superuser); extern Oid GetCurrentRoleId(void); diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h new file mode 100644 index 0000000000..2c3d5c6e9d --- /dev/null +++ b/src/include/postmaster/bgworker.h @@ -0,0 +1,157 @@ +/* -------------------------------------------------------------------- + * bgworker.h + * POSTGRES pluggable background workers interface + * + * A background worker is a process able to run arbitrary, user-supplied code, + * including normal transactions. + * + * Any external module loaded via shared_preload_libraries can register a + * worker. Workers can also be registered dynamically at runtime. In either + * case, the worker process is forked from the postmaster and runs the + * user-supplied "main" function. This code may connect to a database and + * run transactions. Workers can remain active indefinitely, but will be + * terminated if a shutdown or crash occurs. + * + * If the fork() call fails in the postmaster, it will try again later. Note + * that the failure can only be transient (fork failure due to high load, + * memory pressure, too many processes, etc); more permanent problems, like + * failure to connect to a database, are detected later in the worker and dealt + * with just by having the worker exit normally. A worker which exits with + * a return code of 0 will never be restarted and will be removed from worker + * list. A worker which exits with a return code of 1 will be restarted after + * the configured restart interval (unless that interval is BGW_NEVER_RESTART). + * The TerminateBackgroundWorker() function can be used to terminate a + * dynamically registered background worker; the worker will be sent a SIGTERM + * and will not be restarted after it exits. Whenever the postmaster knows + * that a worker will not be restarted, it unregisters the worker, freeing up + * that worker's slot for use by a new worker. + * + * Note that there might be more than one worker in a database concurrently, + * and the same module may request more than one worker running the same (or + * different) code. + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/postmaster/bgworker.h + * -------------------------------------------------------------------- + */ +#include "gs_thread.h" +#ifndef BGWORKER_H +#define BGWORKER_H + +/*--------------------------------------------------------------------- + * External module API. + *--------------------------------------------------------------------- + */ + +/* + * Pass this flag to have your worker be able to connect to shared memory. + */ +#define BGWORKER_SHMEM_ACCESS 0x0001 + +/* + * This flag means the bgworker requires a database connection. The connection + * is not established automatically; the worker must establish it later. + * It requires that BGWORKER_SHMEM_ACCESS was passed too. + */ +#define BGWORKER_BACKEND_DATABASE_CONNECTION 0x0002 + +/* + * This class is used internally for parallel queries, to keep track of the + * number of active parallel workers and make sure we never launch more than + * max_parallel_workers parallel workers at the same time. Third party + * background workers should not use this class. + */ +#define BGWORKER_CLASS_PARALLEL 0x0010 + +/* add additional bgworker classes here */ + +typedef void (*bgworker_main_type) (Datum main_arg); + +/* + * Points in time at which a bgworker can request to be started + */ +typedef enum { + BgWorkerStart_PostmasterStart, + BgWorkerStart_ConsistentState, + BgWorkerStart_RecoveryFinished +} BgWorkerStartTime; + +#define BGW_DEFAULT_RESTART_INTERVAL 60 +#define BGW_NEVER_RESTART -1 +#define BGW_MAXLEN 96 +#define BGW_EXTRALEN 128 + +typedef struct BackgroundWorker { + char bgw_name[BGW_MAXLEN]; + char bgw_type[BGW_MAXLEN]; + int bgw_flags; + BgWorkerStartTime bgw_start_time; + int bgw_restart_time; /* in seconds, or BGW_NEVER_RESTART */ + char bgw_library_name[BGW_MAXLEN]; + char bgw_function_name[BGW_MAXLEN]; + Datum bgw_main_arg; + char bgw_extra[BGW_EXTRALEN]; + ThreadId bgw_notify_pid; /* SIGUSR1 this backend on start/stop */ +} BackgroundWorker; + +typedef enum BgwHandleStatus { + BGWH_STARTED, /* worker is running */ + BGWH_NOT_YET_STARTED, /* worker hasn't been started yet */ + BGWH_STOPPED, /* worker has exited */ + BGWH_POSTMASTER_DIED /* postmaster died; worker status unclear */ +} BgwHandleStatus; + +struct BackgroundWorkerHandle; +typedef struct BackgroundWorkerHandle BackgroundWorkerHandle; +struct BackgroundWorkerArray; + +/* Register a new bgworker during shared_preload_libraries */ +extern void RegisterBackgroundWorker(BackgroundWorker *worker); + +/* Register a new bgworker from a regular backend */ +extern bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, + BackgroundWorkerHandle **handle); + +/* Query the status of a bgworker */ +extern BgwHandleStatus GetBackgroundWorkerPid(const BackgroundWorkerHandle *handle, + ThreadId *pidp); +extern BgwHandleStatus WaitForBackgroundWorkerStartup(const BackgroundWorkerHandle *handle, ThreadId *pid); +extern BgwHandleStatus WaitForBackgroundWorkerShutdown(const BackgroundWorkerHandle *handle); +extern const char *GetBackgroundWorkerTypeByPid(ThreadId pid); + +/* Terminate a bgworker */ +extern void TerminateBackgroundWorker(const BackgroundWorkerHandle *handle); + +/* + * Connect to the specified database, as the specified user. Only a worker + * that passed BGWORKER_BACKEND_DATABASE_CONNECTION during registration may + * call this. + * + * If username is NULL, bootstrapping superuser is used. + * If dbname is NULL, connection is made to no specific database; + * only shared catalogs can be accessed. + */ +extern void BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags = 1); + +/* Just like the above, but specifying database and user by OID. */ +extern void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags = 1); + +/* + * Flags to BackgroundWorkerInitializeConnection et al + * + * + * Allow bypassing datallowconn restrictions when connecting to database + */ +#define BGWORKER_BYPASS_ALLOWCONN 1 + + +/* Block/unblock signals in a background worker process */ +extern void BackgroundWorkerBlockSignals(void); +extern void BackgroundWorkerUnblockSignals(void); + +#endif /* BGWORKER_H */ + diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h new file mode 100644 index 0000000000..fa57bdc4da --- /dev/null +++ b/src/include/postmaster/bgworker_internals.h @@ -0,0 +1,64 @@ +/* -------------------------------------------------------------------- + * bgworker_internals.h + * POSTGRES pluggable background workers internals + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/postmaster/bgworker_internals.h + * -------------------------------------------------------------------- + */ +#ifndef BGWORKER_INTERNALS_H +#define BGWORKER_INTERNALS_H + +#include "datatype/timestamp.h" +#include "lib/ilist.h" +#include "postmaster/bgworker.h" + +/* + * Maximum possible value of parallel workers. + */ +#define MAX_PARALLEL_WORKER_LIMIT 1024 + +struct BackgroundWorkerSlot; + +/* + * List of background workers, private to postmaster. + * + * A worker that requests a database connection during registration will have + * rw_backend set, and will be present in BackendList. Note: do not rely on + * rw_backend being non-NULL for shmem-connected workers! + */ +typedef struct RegisteredBgWorker { + BackgroundWorker rw_worker; /* its registry entry */ + struct Backend *rw_backend; /* its BackendList entry, or NULL */ + ThreadId rw_pid; /* 0 if not running */ + int rw_child_slot; + TimestampTz rw_crashed_at; /* if not 0, time it last crashed */ + int rw_shmem_slot; + bool rw_terminate; + slist_node rw_lnode; /* list link */ +} RegisteredBgWorker; + +extern THR_LOCAL slist_head BackgroundWorkerList; + +extern Size BackgroundWorkerShmemSize(void); +extern void BackgroundWorkerShmemInit(void); +extern void BackgroundWorkerStateChange(void); +extern void ForgetBackgroundWorker(slist_mutable_iter *cur); +extern void ReportBackgroundWorkerPID(const RegisteredBgWorker *); +extern void ReportBackgroundWorkerExit(slist_mutable_iter *cur); +extern void BackgroundWorkerStopNotifications(ThreadId pid); +extern void ResetBackgroundWorkerCrashTimes(void); + +/* Function to start a background worker, called from postmaster.c */ +extern void StartBackgroundWorker(void* bgWorkerSlotShmAddr) ; + +#ifdef EXEC_BACKEND +extern void* GetBackgroundWorkerShmAddr(int slotno); +extern BackgroundWorker *BackgroundWorkerEntry(const BackgroundWorkerSlot* bgWorkerSlotShmAddr); +#endif + +#endif /* BGWORKER_INTERNALS_H */ + diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 6c2ec7c0f1..5070a79961 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -249,4 +249,5 @@ extern void GenerateCancelKey(bool isThreadPoolSession); extern bool SignalCancelAllBackEnd(); extern bool IsLocalAddr(Port* port); extern uint64_t mc_timers_us(void); +extern bool PostmasterMarkPIDForWorkerNotify(ThreadId); #endif /* _POSTMASTER_H */ diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 07e213fac6..e78bebb203 100755 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -43,6 +43,7 @@ typedef enum { PMSIGNAL_ROLLBACK_STANDBY_PROMOTE, /* roll back standby promoting */ PMSIGNAL_START_PAGE_WRITER, /* start a new page writer thread */ PMSIGNAL_START_THREADPOOL_WORKER, /* start thread pool woker */ + PMSIGNAL_BACKGROUND_WORKER_CHANGE, /* background worker state change */ NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index b0a14ae3e5..e76d80d863 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -304,6 +304,8 @@ typedef struct PROC_HDR { PGPROC* freeProcs; /* Head of list of autovacuum's free PGPROC structures */ PGPROC* autovacFreeProcs; + /* Head of list of bgworker free PGPROC structures */ + PGPROC* bgworkerFreeProcs; /* Head of list of pg_job's free PGPROC structures */ PGPROC* pgjobfreeProcs; /* First pgproc waiting for group XID clear */ diff --git a/src/include/threadpool/threadpool_worker.h b/src/include/threadpool/threadpool_worker.h index 7ec2cf6479..2c22e5ce08 100755 --- a/src/include/threadpool/threadpool_worker.h +++ b/src/include/threadpool/threadpool_worker.h @@ -54,6 +54,7 @@ typedef struct Backend { bool is_autovacuum; /* is it an autovacuum process? */ volatile bool dead_end; /* is it going to send an quit? */ volatile int flag; + bool bgworker_notify; /* gets bgworker start/stop notifications */ Dlelem elem; /* list link in BackendList */ } Backend; diff --git a/src/include/utils/postinit.h b/src/include/utils/postinit.h index 2525a8fecc..dccc83079d 100755 --- a/src/include/utils/postinit.h +++ b/src/include/utils/postinit.h @@ -54,7 +54,7 @@ public: ~PostgresInitializer(); - void SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username); + void SetDatabaseAndUser(const char* in_dbname, Oid dboid, const char* username, Oid useroid = InvalidOid); void GetDatabaseName(char* out_dbname); @@ -91,6 +91,8 @@ public: const char* m_username; + Oid m_useroid; + private: void InitThread(); -- Gitee From 4ba086877c61fa7e069f42e0365f4d2200e5352f Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Tue, 25 Aug 2020 21:27:33 +0800 Subject: [PATCH 2/6] add message queue from PG --- src/common/backend/libpq/Makefile | 2 +- src/common/backend/libpq/pqcomm.cpp | 38 +- src/common/backend/libpq/pqformat.cpp | 28 + src/common/backend/libpq/pqmq.cpp | 270 ++++++ src/gausskernel/storage/ipc/Makefile | 2 +- src/gausskernel/storage/ipc/shm_mq.cpp | 1158 ++++++++++++++++++++++++ src/include/libpq/libpq.h | 40 +- src/include/libpq/pqformat.h | 1 + src/include/libpq/pqmq.h | 25 + src/include/storage/procsignal.h | 1 + src/include/storage/shm_mq.h | 82 ++ 11 files changed, 1625 insertions(+), 22 deletions(-) create mode 100644 src/common/backend/libpq/pqmq.cpp create mode 100644 src/gausskernel/storage/ipc/shm_mq.cpp create mode 100644 src/include/libpq/pqmq.h create mode 100644 src/include/storage/shm_mq.h diff --git a/src/common/backend/libpq/Makefile b/src/common/backend/libpq/Makefile index cfab9c855a..3969dd94a8 100644 --- a/src/common/backend/libpq/Makefile +++ b/src/common/backend/libpq/Makefile @@ -23,6 +23,6 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o sha2.o pqcomm.o \ - pqformat.o pqsignal.o + pqformat.o pqsignal.o pqmq.o include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/common/backend/libpq/pqcomm.cpp b/src/common/backend/libpq/pqcomm.cpp index 45e28cf246..b6190904df 100644 --- a/src/common/backend/libpq/pqcomm.cpp +++ b/src/common/backend/libpq/pqcomm.cpp @@ -145,6 +145,28 @@ static int Lock_AF_UNIX(unsigned short portNumber, const char* unixSocketName, b static int Setup_AF_UNIX(bool is_create_psql_sock); #endif /* HAVE_UNIX_SOCKETS */ +static void socket_comm_reset(void); +static int socket_flush(void); +static int socket_flush_if_writable(void); +static bool socket_is_send_pending(void); +static int socket_putmessage(char msgtype, const char *s, size_t len); +static int socket_putmessage_noblock(char msgtype, const char *s, size_t len); +static void socket_startcopyout(void); +static void socket_endcopyout(bool errorAbort); + +static PQcommMethods PqCommSocketMethods = { + socket_comm_reset, + socket_flush, + socket_flush_if_writable, + socket_is_send_pending, + socket_putmessage, + socket_putmessage_noblock, + socket_startcopyout, + socket_endcopyout +}; + +THR_LOCAL PQcommMethods *PqCommMethods = &PqCommSocketMethods; + extern bool FencedUDFMasterMode; /* -------------------------------- @@ -458,7 +480,7 @@ void pq_init(void) * inside a pqcomm.c routine (which ideally will never happen, but...) * -------------------------------- */ -void pq_comm_reset(void) +static void socket_comm_reset(void) { /* Do not throw away pending data, but do reset the busy flag */ t_thrd.libpq_cxt.PqCommBusy = false; @@ -1612,7 +1634,7 @@ static int internal_putbytes(const char* s, size_t len) * returns 0 if OK, EOF if trouble * -------------------------------- */ -int pq_flush(void) +static int socket_flush(void) { int res = 0; @@ -1769,7 +1791,7 @@ static int internal_flush(void) * Returns 0 if OK, or EOF if trouble. * -------------------------------- */ -int pq_flush_if_writable(void) +static int socket_flush_if_writable(void) { int res; @@ -1868,7 +1890,7 @@ void pq_flush_timedwait(int timeout) * pq_is_send_pending - is there any pending data in the output buffer? * -------------------------------- */ -bool pq_is_send_pending(void) +static bool socket_is_send_pending(void) { return (t_thrd.libpq_cxt.PqSendStart < t_thrd.libpq_cxt.PqSendPointer); } @@ -1905,7 +1927,7 @@ bool pq_is_send_pending(void) * returns 0 if OK, EOF if trouble * -------------------------------- */ -int pq_putmessage(char msgtype, const char* s, size_t len) +static int socket_putmessage(char msgtype, const char* s, size_t len) { if (t_thrd.libpq_cxt.DoingCopyOut || t_thrd.libpq_cxt.PqCommBusy) { return 0; @@ -1941,7 +1963,7 @@ fail: * If the output buffer is too small to hold the message, the buffer * is enlarged. */ -int pq_putmessage_noblock(char msgtype, const char* s, size_t len) +static int socket_putmessage_noblock(char msgtype, const char* s, size_t len) { int res; int required; @@ -1967,7 +1989,7 @@ int pq_putmessage_noblock(char msgtype, const char* s, size_t len) * is beginning * -------------------------------- */ -void pq_startcopyout(void) +static void socket_startcopyout(void) { t_thrd.libpq_cxt.DoingCopyOut = true; } @@ -1982,7 +2004,7 @@ void pq_startcopyout(void) * not allow binary transfers, so a textual terminator is always correct. * -------------------------------- */ -void pq_endcopyout(bool errorAbort) +static void socket_endcopyout(bool errorAbort) { if (!t_thrd.libpq_cxt.DoingCopyOut) { return; diff --git a/src/common/backend/libpq/pqformat.cpp b/src/common/backend/libpq/pqformat.cpp index 0828753959..3509790d09 100644 --- a/src/common/backend/libpq/pqformat.cpp +++ b/src/common/backend/libpq/pqformat.cpp @@ -610,6 +610,34 @@ const char* pq_getmsgstring(StringInfo msg) return pg_client_to_server(str, slen); } +/* -------------------------------- + * pq_getmsgrawstring - get a null-terminated text string - NO conversion + * + * Returns a pointer directly into the message buffer. + * -------------------------------- + */ +const char *pq_getmsgrawstring(StringInfo msg) +{ + char *str; + int slen; + + str = &msg->data[msg->cursor]; + + /* + * It's safe to use strlen() here because a StringInfo is guaranteed to + * have a trailing null byte. But check we found a null inside the + * message. + */ + slen = strlen(str); + if (msg->cursor + slen >= msg->len) + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid string in message"))); + msg->cursor += slen + 1; + + return str; +} + /* -------------------------------- * pq_getmsgend - verify message fully consumed * -------------------------------- diff --git a/src/common/backend/libpq/pqmq.cpp b/src/common/backend/libpq/pqmq.cpp new file mode 100644 index 0000000000..274c285a0c --- /dev/null +++ b/src/common/backend/libpq/pqmq.cpp @@ -0,0 +1,270 @@ +/*------------------------------------------------------------------------- + * + * pqmq.cpp + * Use the frontend/backend protocol for communication over a shm_mq + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/common/backend/libpq/pqmq.cpp + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "libpq/pqmq.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" + +static THR_LOCAL shm_mq *pq_mq; +static THR_LOCAL shm_mq_handle *pq_mq_handle; +static THR_LOCAL bool pq_mq_busy = false; +static THR_LOCAL ThreadId pq_mq_parallel_master_pid = 0; +static THR_LOCAL BackendId pq_mq_parallel_master_backend_id = InvalidBackendId; + +static void mq_comm_reset(void); +static int mq_flush(void); +static int mq_flush_if_writable(void); +static bool mq_is_send_pending(void); +static int mq_putmessage(char msgtype, const char *s, size_t len); +static int mq_putmessage_noblock(char msgtype, const char *s, size_t len); +static void mq_startcopyout(void); +static void mq_endcopyout(bool errorAbort); + +static THR_LOCAL PQcommMethods PqCommMqMethods = { + mq_comm_reset, + mq_flush, + mq_flush_if_writable, + mq_is_send_pending, + mq_putmessage, + mq_putmessage_noblock, + mq_startcopyout, + mq_endcopyout +}; + +static THR_LOCAL PQcommMethods *save_PqCommMethods; +static THR_LOCAL CommandDest save_whereToSendOutput; +static THR_LOCAL ProtocolVersion save_FrontendProtocol; + +/* + * Arrange to redirect frontend/backend protocol messages to a message queue. + */ +void pq_redirect_to_shm_mq(shm_mq_handle *mqh) +{ + save_PqCommMethods = PqCommMethods; + save_whereToSendOutput = CommandDest(t_thrd.postgres_cxt.whereToSendOutput); + save_FrontendProtocol = FrontendProtocol; + + PqCommMethods = &PqCommMqMethods; + pq_mq_handle = mqh; + t_thrd.postgres_cxt.whereToSendOutput = static_cast(DestRemote); + FrontendProtocol = PG_PROTOCOL_LATEST; +} + +void pq_stop_redirect_to_shm_mq(void) +{ + PqCommMethods = save_PqCommMethods; + t_thrd.postgres_cxt.whereToSendOutput = static_cast(save_whereToSendOutput); + FrontendProtocol = save_FrontendProtocol; + pq_mq = NULL; + pq_mq_handle = NULL; +} + +/* + * Arrange to SendProcSignal() to the parallel master each time we transmit + * message data via the shm_mq. + */ +void pq_set_parallel_master(ThreadId pid, BackendId backend_id) +{ + Assert(PqCommMethods == &PqCommMqMethods); + pq_mq_parallel_master_pid = pid; + pq_mq_parallel_master_backend_id = backend_id; +} + +static void mq_comm_reset(void) +{ + /* Nothing to do. */ +} + +static int mq_flush(void) +{ + /* Nothing to do. */ + return 0; +} + +static int mq_flush_if_writable(void) +{ + /* Nothing to do. */ + return 0; +} + +static bool mq_is_send_pending(void) +{ + /* There's never anything pending. */ + return false; +} + +/* + * Transmit a libpq protocol message to the shared memory message queue + * selected via pq_mq_handle. We don't include a length word, because the + * receiver will know the length of the message from shm_mq_receive(). + */ +static int mq_putmessage(char msgtype, const char *s, size_t len) +{ + shm_mq_iovec iov[2]; + shm_mq_result result; + + /* + * If we're sending a message, and we have to wait because the queue is + * full, and then we get interrupted, and that interrupt results in trying + * to send another message, we respond by detaching the queue. There's no + * way to return to the original context, but even if there were, just + * queueing the message would amount to indefinitely postponing the + * response to the interrupt. So we do this instead. + */ + if (pq_mq_busy) { + if (pq_mq_handle != NULL) + shm_mq_detach(pq_mq_handle); + pq_mq_handle = NULL; + return EOF; + } + + /* + * If the message queue is already gone, just ignore the message. This + * doesn't necessarily indicate a problem; for example, DEBUG messages can + * be generated late in the shutdown sequence, after all DSMs have already + * been detached. + */ + if (pq_mq_handle == NULL) + return 0; + + pq_mq_busy = true; + + iov[0].data = &msgtype; + iov[0].len = 1; + iov[1].data = s; + iov[1].len = len; + + Assert(pq_mq_handle != NULL); + + for (;;) { + result = shm_mq_sendv(pq_mq_handle, iov, 2, true); + + if (pq_mq_parallel_master_pid != 0) + (void)SendProcSignal(pq_mq_parallel_master_pid,PROCSIG_PARALLEL_MESSAGE, + pq_mq_parallel_master_backend_id); + + if (result != SHM_MQ_WOULD_BLOCK) + break; + + (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0); + ResetLatch(&t_thrd.proc->procLatch); + CHECK_FOR_INTERRUPTS(); + } + + pq_mq_busy = false; + + Assert(result == SHM_MQ_SUCCESS || result == SHM_MQ_DETACHED); + if (result != SHM_MQ_SUCCESS) + return EOF; + return 0; +} + +static int mq_putmessage_noblock(char msgtype, const char *s, size_t len) +{ + /* + * While the shm_mq machinery does support sending a message in + * non-blocking mode, there's currently no way to try sending beginning to + * send the message that doesn't also commit us to completing the + * transmission. This could be improved in the future, but for now we + * don't need it. + */ + elog(ERROR, "not currently supported"); + return 0; +} + +static void mq_startcopyout(void) +{ + /* Nothing to do. */ +} + +static void mq_endcopyout(bool errorAbort) +{ + /* Nothing to do. */ +} + +/* + * Parse an ErrorResponse or NoticeResponse payload and populate an ErrorData + * structure with the results. + */ +void pq_parse_errornotice(StringInfo msg, ErrorData *edata) +{ + /* Initialize edata with reasonable defaults. */ + errno_t rc = memset_s(edata, sizeof(ErrorData), 0, sizeof(ErrorData)); + securec_check(rc, "\0", "\0"); + edata->elevel = ERROR; + + /* Loop over fields and extract each one. */ + for (;;) { + char code = pq_getmsgbyte(msg); + const char *value = NULL; + + if (code == '\0') { + pq_getmsgend(msg); + break; + } + value = pq_getmsgrawstring(msg); + + switch (code) { + case PG_DIAG_SEVERITY: + /* ignore, trusting we'll get a nonlocalized version */ + break; + case PG_DIAG_SQLSTATE: + if (strlen(value) != 5) { + elog(ERROR, "invalid SQLSTATE: \"%s\"", value); + } + edata->sqlerrcode = MAKE_SQLSTATE(value[0], value[1], value[2], + value[3], value[4]); + break; + case PG_DIAG_MESSAGE_PRIMARY: + edata->message = pstrdup(value); + break; + case PG_DIAG_MESSAGE_DETAIL: + edata->detail = pstrdup(value); + break; + case PG_DIAG_MESSAGE_HINT: + edata->hint = pstrdup(value); + break; + case PG_DIAG_STATEMENT_POSITION: + edata->cursorpos = pg_atoi(const_cast(value), sizeof(int), '\0'); + break; + case PG_DIAG_INTERNAL_POSITION: + edata->internalpos = pg_atoi(const_cast(value), sizeof(int), '\0'); + break; + case PG_DIAG_INTERNAL_QUERY: + edata->internalquery = pstrdup(value); + break; + case PG_DIAG_CONTEXT: + edata->context = pstrdup(value); + break; + case PG_DIAG_SOURCE_FILE: + edata->filename = pstrdup(value); + break; + case PG_DIAG_SOURCE_LINE: + edata->lineno = pg_atoi(const_cast(value), sizeof(int), '\0'); + break; + case PG_DIAG_SOURCE_FUNCTION: + edata->funcname = pstrdup(value); + break; + default: + elog(ERROR, "unrecognized error field code: %d", (int) code); + break; + } + } +} + diff --git a/src/gausskernel/storage/ipc/Makefile b/src/gausskernel/storage/ipc/Makefile index 5ce67f7673..010ea8de9a 100644 --- a/src/gausskernel/storage/ipc/Makefile +++ b/src/gausskernel/storage/ipc/Makefile @@ -17,6 +17,6 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \ - sinval.o sinvaladt.o standby.o + sinval.o sinvaladt.o standby.o shm_mq.o include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/ipc/shm_mq.cpp b/src/gausskernel/storage/ipc/shm_mq.cpp new file mode 100644 index 0000000000..7c731336b3 --- /dev/null +++ b/src/gausskernel/storage/ipc/shm_mq.cpp @@ -0,0 +1,1158 @@ +/*------------------------------------------------------------------------- + * + * shm_mq.cpp + * single-reader, single-writer message queue + * + * Both the sender and the receiver must have a PGPROC; their respective + * process latches are used for synchronization. Only the sender may send, + * and only the receiver may receive. This is intended to allow a user + * backend to communicate with worker backends that it has registered. + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/gausskernel/storage/ipc/shm_mq.cpp + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgworker.h" +#include "storage/procsignal.h" +#include "storage/shm_mq.h" +#include "storage/spin.h" +#include "gs_threadlocal.h" +#include "gs_thread.h" + +/* + * This structure represents the actual queue. + * + * Some notes on synchronization: + * + * mq_receiver and mq_bytes_read can only be changed by the receiver; and + * mq_sender and mq_bytes_written can only be changed by the sender. + * mq_receiver and mq_sender are protected by mq_mutex, although, importantly, + * they cannot change once set, and thus may be read without a lock once this + * is known to be the case. + * + * mq_bytes_read and mq_bytes_written are not protected by the mutex. Instead, + * they are written atomically using 8 byte loads and stores. Memory barriers + * must be carefully used to synchronize reads and writes of these values with + * reads and writes of the actual data in mq_ring. + * + * mq_detached needs no locking. It can be set by either the sender or the + * receiver, but only ever from false to true, so redundant writes don't + * matter. It is important that if we set mq_detached and then set the + * counterparty's latch, the counterparty must be certain to see the change + * after waking up. Since SetLatch begins with a memory barrier and ResetLatch + * ends with one, this should be OK. + * + * mq_ring_size and mq_ring_offset never change after initialization, and + * can therefore be read without the lock. + * + * Importantly, mq_ring can be safely read and written without a lock. + * At any given time, the difference between mq_bytes_read and + * mq_bytes_written defines the number of bytes within mq_ring that contain + * unread data, and mq_bytes_read defines the position where those bytes + * begin. The sender can increase the number of unread bytes at any time, + * but only the receiver can give license to overwrite those bytes, by + * incrementing mq_bytes_read. Therefore, it's safe for the receiver to read + * the unread bytes it knows to be present without the lock. Conversely, + * the sender can write to the unused portion of the ring buffer without + * the lock, because nobody else can be reading or writing those bytes. The + * receiver could be making more bytes unused by incrementing mq_bytes_read, + * but that's OK. Note that it would be unsafe for the receiver to read any + * data it's already marked as read, or to write any data; and it would be + * unsafe for the sender to reread any data after incrementing + * mq_bytes_written, but fortunately there's no need for any of that. + */ +struct shm_mq { + slock_t mq_mutex; + PGPROC *mq_receiver; + PGPROC *mq_sender; + pg_atomic_uint64 mq_bytes_read; + pg_atomic_uint64 mq_bytes_written; + Size mq_ring_size; + bool mq_detached; + uint8 mq_ring_offset; + char mq_ring[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * This structure is a backend-private handle for access to a queue. + * + * mqh_queue is a pointer to the queue we've attached. + * + * If this queue is intended to connect the current process with a background + * worker that started it, the user can pass a pointer to the worker handle + * to shm_mq_attach(), and we'll store it in mqh_handle. The point of this + * is to allow us to begin sending to or receiving from that queue before the + * process we'll be communicating with has even been started. If it fails + * to start, the handle will allow us to notice that and fail cleanly, rather + * than waiting forever; see shm_mq_wait_internal. This is mostly useful in + * simple cases - e.g. where there are just 2 processes communicating; in + * more complex scenarios, every process may not have a BackgroundWorkerHandle + * available, or may need to watch for the failure of more than one other + * process at a time. + * + * When a message exists as a contiguous chunk of bytes in the queue - that is, + * it is smaller than the size of the ring buffer and does not wrap around + * the end - we return the message to the caller as a pointer into the buffer. + * For messages that are larger or happen to wrap, we reassemble the message + * locally by copying the chunks into a backend-local buffer. mqh_buffer is + * the buffer, and mqh_buflen is the number of bytes allocated for it. + * + * mqh_partial_bytes, mqh_expected_bytes, and mqh_length_word_complete + * are used to track the state of non-blocking operations. When the caller + * attempts a non-blocking operation that returns SHM_MQ_WOULD_BLOCK, they + * are expected to retry the call at a later time with the same argument; + * we need to retain enough state to pick up where we left off. + * mqh_length_word_complete tracks whether we are done sending or receiving + * (whichever we're doing) the entire length word. mqh_partial_bytes tracks + * the number of bytes read or written for either the length word or the + * message itself, and mqh_expected_bytes - which is used only for reads - + * tracks the expected total size of the payload. + * + * mqh_counterparty_attached tracks whether we know the counterparty to have + * attached to the queue at some previous point. This lets us avoid some + * mutex acquisitions. + * + * mqh_context is the memory context in effect at the time we attached to + * the shm_mq. The shm_mq_handle itself is allocated in this context, and + * we make sure any other allocations we do happen in this context as well, + * to avoid nasty surprises. + */ +struct shm_mq_handle { + shm_mq *mqh_queue; + char *mqh_segment; + BackgroundWorkerHandle *mqh_handle; + char *mqh_buffer; + Size mqh_buflen; + Size mqh_consume_pending; + Size mqh_partial_bytes; + Size mqh_expected_bytes; + bool mqh_length_word_complete; + bool mqh_counterparty_attached; + MemoryContext mqh_context; +}; + +static void shm_mq_detach_internal(shm_mq *mq); +static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, + const void *data, bool nowait, Size *bytes_written); +static shm_mq_result shm_mq_receive_bytes(shm_mq_handle *mqh,Size bytes_needed, bool nowait, + Size *nbytesp, void **datap); +static bool shm_mq_counterparty_gone(shm_mq *mq, + BackgroundWorkerHandle *handle); +static bool shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr, + BackgroundWorkerHandle *handle); +static void shm_mq_inc_bytes_read(shm_mq *mq, Size n); +static void shm_mq_inc_bytes_written(shm_mq *mq, Size n); + +/* Minimum queue size is enough for header and at least one chunk of data. */ +const Size shm_mq_minimum_size = MAXALIGN(offsetof(shm_mq, mq_ring)) + MAXIMUM_ALIGNOF; + +#define MQH_INITIAL_BUFSIZE 8192 + +/* + * Initialize a new shared message queue. + */ +shm_mq *shm_mq_create(void *address, Size size) +{ + shm_mq *mq = (shm_mq*)address; + Size data_offset = MAXALIGN(offsetof(shm_mq, mq_ring)); + + /* If the size isn't MAXALIGN'd, just discard the odd bytes. */ + size = MAXALIGN_DOWN(size); + + /* Queue size must be large enough to hold some data. */ + Assert(size > data_offset); + + /* Initialize queue header. */ + SpinLockInit(&mq->mq_mutex); + mq->mq_receiver = NULL; + mq->mq_sender = NULL; + pg_atomic_init_u64(&mq->mq_bytes_read, 0); + pg_atomic_init_u64(&mq->mq_bytes_written, 0); + mq->mq_ring_size = size - data_offset; + mq->mq_detached = false; + mq->mq_ring_offset = data_offset - offsetof(shm_mq, mq_ring); + + return mq; +} + +/* + * Set the identity of the process that will receive from a shared message + * queue. + */ +void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc) +{ + PGPROC *sender = NULL; + + SpinLockAcquire(&mq->mq_mutex); + Assert(mq->mq_receiver == NULL); + mq->mq_receiver = proc; + sender = mq->mq_sender; + SpinLockRelease(&mq->mq_mutex); + + if (sender != NULL) + SetLatch(&sender->procLatch); +} + +/* + * Set the identity of the process that will send to a shared message queue. + */ +void shm_mq_set_sender(shm_mq *mq, PGPROC *proc) +{ + PGPROC *receiver = NULL; + + SpinLockAcquire(&mq->mq_mutex); + Assert(mq->mq_sender == NULL); + mq->mq_sender = proc; + receiver = mq->mq_receiver; + SpinLockRelease(&mq->mq_mutex); + + if (receiver != NULL) + SetLatch(&receiver->procLatch); +} + +/* + * Get the configured receiver. + */ +PGPROC *shm_mq_get_receiver(shm_mq *mq) +{ + PGPROC *receiver = NULL; + + SpinLockAcquire(&mq->mq_mutex); + receiver = mq->mq_receiver; + SpinLockRelease(&mq->mq_mutex); + + return receiver; +} + +/* + * Get the configured sender. + */ +PGPROC *shm_mq_get_sender(shm_mq *mq) +{ + PGPROC *sender = NULL; + + SpinLockAcquire(&mq->mq_mutex); + sender = mq->mq_sender; + SpinLockRelease(&mq->mq_mutex); + + return sender; +} + +/* + * Attach to a shared message queue so we can send or receive messages. + * + * The memory context in effect at the time this function is called should + * be one which will last for at least as long as the message queue itself. + * We'll allocate the handle in that context, and future allocations that + * are needed to buffer incoming data will happen in that context as well. + * + * + * If handle != NULL, the queue can be read or written even before the + * other process has attached. We'll wait for it to do so if needed. The + * handle must be for a background worker initialized with bgw_notify_pid + * equal to our PID. + * + * shm_mq_detach() should be called when done. This will free the + * shm_mq_handle and mark the queue itself as detached, so that our + * counterpart won't get stuck waiting for us to fill or drain the queue + * after we've already lost interest. + */ +shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg, BackgroundWorkerHandle *handle) +{ + shm_mq_handle *mqh = (shm_mq_handle*)palloc(sizeof(shm_mq_handle)); + + Assert(mq->mq_receiver == t_thrd.proc || mq->mq_sender == t_thrd.proc); + mqh->mqh_queue = mq; + mqh->mqh_segment = seg; + mqh->mqh_handle = handle; + mqh->mqh_buffer = NULL; + mqh->mqh_buflen = 0; + mqh->mqh_consume_pending = 0; + mqh->mqh_partial_bytes = 0; + mqh->mqh_expected_bytes = 0; + mqh->mqh_length_word_complete = false; + mqh->mqh_counterparty_attached = false; + mqh->mqh_context = CurrentMemoryContext; + + return mqh; +} + +/* + * Associate a BackgroundWorkerHandle with a shm_mq_handle just as if it had + * been passed to shm_mq_attach. + */ +void shm_mq_set_handle(shm_mq_handle *mqh, BackgroundWorkerHandle *handle) +{ + Assert(mqh->mqh_handle == NULL); + mqh->mqh_handle = handle; +} + +/* + * Write a message into a shared message queue. + */ +shm_mq_result shm_mq_send(shm_mq_handle *mqh, Size nbytes, const void *data, bool nowait) +{ + shm_mq_iovec iov; + + iov.data = (const char*)data; + iov.len = nbytes; + + return shm_mq_sendv(mqh, &iov, 1, nowait); +} + +/* + * Write a message into a shared message queue, gathered from multiple + * addresses. + * + * When nowait = false, we'll wait on our process latch when the ring buffer + * fills up, and then continue writing once the receiver has drained some data. + * The process latch is reset after each wait. + * + * When nowait = true, we do not manipulate the state of the process latch; + * instead, if the buffer becomes full, we return SHM_MQ_WOULD_BLOCK. In + * this case, the caller should call this function again, with the same + * arguments, each time the process latch is set. (Once begun, the sending + * of a message cannot be aborted except by detaching from the queue; changing + * the length or payload will corrupt the queue.) + */ +shm_mq_result shm_mq_sendv(shm_mq_handle *mqh, shm_mq_iovec *iov, int iovcnt, bool nowait) +{ + shm_mq_result res; + shm_mq *mq = mqh->mqh_queue; + PGPROC *receiver = NULL; + Size nbytes = 0; + Size bytes_written; + int i; + int which_iov = 0; + Size offset; + + Assert(mq->mq_sender == t_thrd.proc); + + /* Compute total size of write. */ + for (i = 0; i < iovcnt; ++i) + nbytes += iov[i].len; + + /* Try to write, or finish writing, the length word into the buffer. */ + while (!mqh->mqh_length_word_complete) { + Assert(mqh->mqh_partial_bytes < sizeof(Size)); + res = shm_mq_send_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes, + ((char *)&nbytes) + mqh->mqh_partial_bytes, + nowait, &bytes_written); + if (res == SHM_MQ_DETACHED) { + /* Reset state in case caller tries to send another message. */ + mqh->mqh_partial_bytes = 0; + mqh->mqh_length_word_complete = false; + return res; + } + mqh->mqh_partial_bytes += bytes_written; + + if (mqh->mqh_partial_bytes >= sizeof(Size)) { + Assert(mqh->mqh_partial_bytes == sizeof(Size)); + + mqh->mqh_partial_bytes = 0; + mqh->mqh_length_word_complete = true; + } + + if (res != SHM_MQ_SUCCESS) + return res; + + /* Length word can't be split unless bigger than required alignment. */ + Assert(mqh->mqh_length_word_complete || sizeof(Size) > MAXIMUM_ALIGNOF); + } + + /* Write the actual data bytes into the buffer. */ + Assert(mqh->mqh_partial_bytes <= nbytes); + offset = mqh->mqh_partial_bytes; + do { + Size chunksize; + + /* Figure out which bytes need to be sent next. */ + if (offset >= iov[which_iov].len) { + offset -= iov[which_iov].len; + ++which_iov; + if (which_iov >= iovcnt) + break; + continue; + } + + /* + * We want to avoid copying the data if at all possible, but every + * chunk of bytes we write into the queue has to be MAXALIGN'd, except + * the last. Thus, if a chunk other than the last one ends on a + * non-MAXALIGN'd boundary, we have to combine the tail end of its + * data with data from one or more following chunks until we either + * reach the last chunk or accumulate a number of bytes which is + * MAXALIGN'd. + */ + if (which_iov + 1 < iovcnt && + offset + MAXIMUM_ALIGNOF > iov[which_iov].len) { + char tmpbuf[MAXIMUM_ALIGNOF]; + Size j = 0; + + for (;;) { + if (offset < iov[which_iov].len) { + tmpbuf[j] = iov[which_iov].data[offset]; + j++; + offset++; + if (j == MAXIMUM_ALIGNOF) + break; + } else { + offset -= iov[which_iov].len; + which_iov++; + if (which_iov >= iovcnt) + break; + } + } + + res = shm_mq_send_bytes(mqh, j, tmpbuf, nowait, &bytes_written); + if (res == SHM_MQ_DETACHED) { + /* Reset state in case caller tries to send another message. */ + mqh->mqh_partial_bytes = 0; + mqh->mqh_length_word_complete = false; + return res; + } + + mqh->mqh_partial_bytes += bytes_written; + if (res != SHM_MQ_SUCCESS) + return res; + continue; + } + + /* + * If this is the last chunk, we can write all the data, even if it + * isn't a multiple of MAXIMUM_ALIGNOF. Otherwise, we need to + * MAXALIGN_DOWN the write size. + */ + chunksize = iov[which_iov].len - offset; + if (which_iov + 1 < iovcnt) + chunksize = MAXALIGN_DOWN(chunksize); + res = shm_mq_send_bytes(mqh, chunksize, &iov[which_iov].data[offset], + nowait, &bytes_written); + if (res == SHM_MQ_DETACHED) { + /* Reset state in case caller tries to send another message. */ + mqh->mqh_length_word_complete = false; + mqh->mqh_partial_bytes = 0; + return res; + } + + mqh->mqh_partial_bytes += bytes_written; + offset += bytes_written; + if (res != SHM_MQ_SUCCESS) + return res; + } while (mqh->mqh_partial_bytes < nbytes); + + /* Reset for next message. */ + mqh->mqh_partial_bytes = 0; + mqh->mqh_length_word_complete = false; + + /* If queue has been detached, let caller know. */ + if (mq->mq_detached) + return SHM_MQ_DETACHED; + + /* + * If the counterparty is known to have attached, we can read mq_receiver + * without acquiring the spinlock and assume it isn't NULL. Otherwise, + * more caution is needed. + */ + if (mqh->mqh_counterparty_attached) { + receiver = mq->mq_receiver; + } else { + SpinLockAcquire(&mq->mq_mutex); + receiver = mq->mq_receiver; + SpinLockRelease(&mq->mq_mutex); + if (receiver == NULL) + return SHM_MQ_SUCCESS; + mqh->mqh_counterparty_attached = true; + } + + /* Notify receiver of the newly-written data, and return. */ + SetLatch(&receiver->procLatch); + return SHM_MQ_SUCCESS; +} + +/* + * Receive a message from a shared message queue. + * + * We set *nbytes to the message length and *data to point to the message + * payload. If the entire message exists in the queue as a single, + * contiguous chunk, *data will point directly into shared memory; otherwise, + * it will point to a temporary buffer. This mostly avoids data copying in + * the hoped-for case where messages are short compared to the buffer size, + * while still allowing longer messages. In either case, the return value + * remains valid until the next receive operation is performed on the queue. + * + * When nowait = false, we'll wait on our process latch when the ring buffer + * is empty and we have not yet received a full message. The sender will + * set our process latch after more data has been written, and we'll resume + * processing. Each call will therefore return a complete message + * (unless the sender detaches the queue). + * + * When nowait = true, we do not manipulate the state of the process latch; + * instead, whenever the buffer is empty and we need to read from it, we + * return SHM_MQ_WOULD_BLOCK. In this case, the caller should call this + * function again after the process latch has been set. + */ +shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait) +{ + shm_mq *mq = mqh->mqh_queue; + shm_mq_result res; + Size rb = 0; + Size nbytes; + void *rawdata = NULL; + + Assert(mq->mq_receiver == t_thrd.proc); + + /* We can't receive data until the sender has attached. */ + if (!mqh->mqh_counterparty_attached) { + if (nowait) { + int counterparty_gone; + + /* + * We shouldn't return at this point at all unless the sender + * hasn't attached yet. However, the correct return value depends + * on whether the sender is still attached. If we first test + * whether the sender has ever attached and then test whether the + * sender has detached, there's a race condition: a sender that + * attaches and detaches very quickly might fool us into thinking + * the sender never attached at all. So, test whether our + * counterparty is definitively gone first, and only afterwards + * check whether the sender ever attached in the first place. + */ + counterparty_gone = (int)shm_mq_counterparty_gone(mq, mqh->mqh_handle); + if (shm_mq_get_sender(mq) == NULL) { + if (counterparty_gone) + return SHM_MQ_DETACHED; + else + return SHM_MQ_WOULD_BLOCK; + } + } else if (!shm_mq_wait_internal(mq, &mq->mq_sender, mqh->mqh_handle) + && shm_mq_get_sender(mq) == NULL) { + mq->mq_detached = true; + return SHM_MQ_DETACHED; + } + mqh->mqh_counterparty_attached = true; + } + + /* + * If we've consumed an amount of data greater than 1/4th of the ring + * size, mark it consumed in shared memory. We try to avoid doing this + * unnecessarily when only a small amount of data has been consumed, + * because SetLatch() is fairly expensive and we don't want to do it too + * often. + */ + if (mqh->mqh_consume_pending > mq->mq_ring_size / 4) { + shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending); + mqh->mqh_consume_pending = 0; + } + + /* Try to read, or finish reading, the length word from the buffer. */ + while (!mqh->mqh_length_word_complete) { + /* Try to receive the message length word. */ + Assert(mqh->mqh_partial_bytes < sizeof(Size)); + res = shm_mq_receive_bytes(mqh, sizeof(Size) - mqh->mqh_partial_bytes, + nowait, &rb, &rawdata); + if (res != SHM_MQ_SUCCESS) + return res; + + /* + * Hopefully, we'll receive the entire message length word at once. + * But if sizeof(Size) > MAXIMUM_ALIGNOF, then it might be split over + * multiple reads. + */ + if (mqh->mqh_partial_bytes == 0 && rb >= sizeof(Size)) { + Size needed; + + nbytes = *(Size *)rawdata; + + /* If we've already got the whole message, we're done. */ + needed = MAXALIGN(sizeof(Size)) + MAXALIGN(nbytes); + if (rb >= needed) { + mqh->mqh_consume_pending += needed; + *nbytesp = nbytes; + *datap = ((char *)rawdata) + MAXALIGN(sizeof(Size)); + return SHM_MQ_SUCCESS; + } + + /* + * We don't have the whole message, but we at least have the whole + * length word. + */ + mqh->mqh_expected_bytes = nbytes; + mqh->mqh_length_word_complete = true; + mqh->mqh_consume_pending += MAXALIGN(sizeof(Size)); + rb -= MAXALIGN(sizeof(Size)); + } else { + Size lengthbytes; + + /* Can't be split unless bigger than required alignment. */ + Assert(sizeof(Size) > MAXIMUM_ALIGNOF); + + /* Message word is split; need buffer to reassemble. */ + if (mqh->mqh_buffer == NULL) { + mqh->mqh_buffer = (char*)MemoryContextAlloc(mqh->mqh_context, + MQH_INITIAL_BUFSIZE); + mqh->mqh_buflen = MQH_INITIAL_BUFSIZE; + } + Assert(mqh->mqh_buflen >= sizeof(Size)); + + /* Copy partial length word; remember to consume it. */ + if (mqh->mqh_partial_bytes + rb > sizeof(Size)) + lengthbytes = sizeof(Size) - mqh->mqh_partial_bytes; + else + lengthbytes = rb; + errno_t rc = memcpy_s(&mqh->mqh_buffer[mqh->mqh_partial_bytes], lengthbytes, + rawdata, lengthbytes); + securec_check(rc, "\0", "\0"); + mqh->mqh_partial_bytes += lengthbytes; + mqh->mqh_consume_pending += MAXALIGN(lengthbytes); + rb -= lengthbytes; + + /* If we now have the whole word, we're ready to read payload. */ + if (mqh->mqh_partial_bytes >= sizeof(Size)) { + Assert(mqh->mqh_partial_bytes == sizeof(Size)); + mqh->mqh_expected_bytes = *(Size *)mqh->mqh_buffer; + mqh->mqh_length_word_complete = true; + mqh->mqh_partial_bytes = 0; + } + } + } + nbytes = mqh->mqh_expected_bytes; + + if (mqh->mqh_partial_bytes == 0) { + /* + * Try to obtain the whole message in a single chunk. If this works, + * we need not copy the data and can return a pointer directly into + * shared memory. + */ + res = shm_mq_receive_bytes(mqh, nbytes, nowait, &rb, &rawdata); + if (res != SHM_MQ_SUCCESS) + return res; + if (rb >= nbytes) { + mqh->mqh_length_word_complete = false; + mqh->mqh_consume_pending += MAXALIGN(nbytes); + *nbytesp = nbytes; + *datap = rawdata; + return SHM_MQ_SUCCESS; + } + + /* + * The message has wrapped the buffer. We'll need to copy it in order + * to return it to the client in one chunk. First, make sure we have + * a large enough buffer available. + */ + if (mqh->mqh_buflen < nbytes) { + Size newbuflen = Max(mqh->mqh_buflen, MQH_INITIAL_BUFSIZE); + + while (newbuflen < nbytes) + newbuflen *= 2; + + if (mqh->mqh_buffer != NULL) { + pfree(mqh->mqh_buffer); + mqh->mqh_buffer = NULL; + mqh->mqh_buflen = 0; + } + mqh->mqh_buffer = (char*)MemoryContextAlloc(mqh->mqh_context, newbuflen); + mqh->mqh_buflen = newbuflen; + } + } + + /* Loop until we've copied the entire message. */ + for (;;) { + Size still_needed; + + /* Copy as much as we can. */ + Assert(mqh->mqh_partial_bytes + rb <= nbytes); + errno_t rc = memcpy_s(&mqh->mqh_buffer[mqh->mqh_partial_bytes], rb, rawdata, rb); + securec_check(rc, "\0", "\0"); + mqh->mqh_partial_bytes += rb; + + /* + * Update count of bytes that can be consumed, accounting for + * alignment padding. Note that this will never actually insert any + * padding except at the end of a message, because the buffer size is + * a multiple of MAXIMUM_ALIGNOF, and each read and write is as well. + */ + Assert(mqh->mqh_partial_bytes == nbytes || rb == MAXALIGN(rb)); + mqh->mqh_consume_pending += MAXALIGN(rb); + + /* If we got all the data, exit the loop. */ + if (mqh->mqh_partial_bytes >= nbytes) + break; + + /* Wait for some more data. */ + still_needed = nbytes - mqh->mqh_partial_bytes; + res = shm_mq_receive_bytes(mqh, still_needed, nowait, &rb, &rawdata); + if (res != SHM_MQ_SUCCESS) + return res; + if (rb > still_needed) + rb = still_needed; + } + + /* Return the complete message, and reset for next message. */ + *nbytesp = nbytes; + *datap = mqh->mqh_buffer; + mqh->mqh_length_word_complete = false; + mqh->mqh_partial_bytes = 0; + return SHM_MQ_SUCCESS; +} + +/* + * Wait for the other process that's supposed to use this queue to attach + * to it. + * + * The return value is SHM_MQ_DETACHED if the worker has already detached or + * if it dies; it is SHM_MQ_SUCCESS if we detect that the worker has attached. + * Note that we will only be able to detect that the worker has died before + * attaching if a background worker handle was passed to shm_mq_attach(). + */ +shm_mq_result shm_mq_wait_for_attach(shm_mq_handle *mqh) +{ + shm_mq *mq = mqh->mqh_queue; + PGPROC **victim; + + if (shm_mq_get_receiver(mq) == t_thrd.proc) { + victim = &mq->mq_sender; + } else { + Assert(shm_mq_get_sender(mq) == t_thrd.proc); + victim = &mq->mq_receiver; + } + + if (shm_mq_wait_internal(mq, victim, mqh->mqh_handle)) + return SHM_MQ_SUCCESS; + else + return SHM_MQ_DETACHED; +} + +/* + * Detach from a shared message queue, and destroy the shm_mq_handle. + */ +void shm_mq_detach(shm_mq_handle *mqh) +{ + /* Notify counterparty that we're outta here. */ + shm_mq_detach_internal(mqh->mqh_queue); + + /* Release local memory associated with handle. */ + if (mqh->mqh_buffer != NULL) + pfree(mqh->mqh_buffer); + pfree(mqh); +} + +/* + * Notify counterparty that we're detaching from shared message queue. + * + * The purpose of this function is to make sure that the process + * with which we're communicating doesn't block forever waiting for us to + * fill or drain the queue once we've lost interest. When the sender + * detaches, the receiver can read any messages remaining in the queue; + * further reads will return SHM_MQ_DETACHED. If the receiver detaches, + * further attempts to send messages will likewise return SHM_MQ_DETACHED. + * + * This is separated out from shm_mq_detach() because if the on_dsm_detach + * callback fires, we only want to do this much. We do not try to touch + * the local shm_mq_handle, as it may have been pfree'd already. + */ +static void shm_mq_detach_internal(shm_mq *mq) +{ + PGPROC *victim = NULL; + + SpinLockAcquire(&mq->mq_mutex); + if (mq->mq_sender == t_thrd.proc) { + victim = mq->mq_receiver; + } else { + Assert(mq->mq_receiver == t_thrd.proc); + victim = mq->mq_sender; + } + mq->mq_detached = true; + SpinLockRelease(&mq->mq_mutex); + + if (victim != NULL) { + SetLatch(&victim->procLatch); + } +} + +/* + * Get the shm_mq from handle. + */ +shm_mq *shm_mq_get_queue(shm_mq_handle *mqh) +{ + return mqh->mqh_queue; +} + +/* + * Write bytes into a shared message queue. + */ +static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data, + bool nowait, Size *bytes_written) +{ + shm_mq *mq = mqh->mqh_queue; + Size sent = 0; + uint64 used; + Size ringsize = mq->mq_ring_size; + Size available; + + while (sent < nbytes) { + uint64 rb; + uint64 wb; + + /* Compute number of ring buffer bytes used and available. */ + rb = pg_atomic_read_u64(&mq->mq_bytes_read); + wb = pg_atomic_read_u64(&mq->mq_bytes_written); + Assert(wb >= rb); + used = wb - rb; + Assert(used <= ringsize); + available = Min(ringsize - used, nbytes - sent); + + /* + * Bail out if the queue has been detached. Note that we would be in + * trouble if the compiler decided to cache the value of + * mq->mq_detached in a register or on the stack across loop + * iterations. It probably shouldn't do that anyway since we'll + * always return, call an external function that performs a system + * call, or reach a memory barrier at some point later in the loop, + * but just to be sure, insert a compiler barrier here. + */ + pg_compiler_barrier(); + if (mq->mq_detached) { + *bytes_written = sent; + return SHM_MQ_DETACHED; + } + + if (available == 0 && !mqh->mqh_counterparty_attached) { + /* + * The queue is full, so if the receiver isn't yet known to be + * attached, we must wait for that to happen. + */ + if (nowait) { + if (shm_mq_counterparty_gone(mq, mqh->mqh_handle)) { + *bytes_written = sent; + return SHM_MQ_DETACHED; + } + if (shm_mq_get_receiver(mq) == NULL) { + *bytes_written = sent; + return SHM_MQ_WOULD_BLOCK; + } + } else if (!shm_mq_wait_internal(mq, &mq->mq_receiver, mqh->mqh_handle)) { + mq->mq_detached = true; + *bytes_written = sent; + return SHM_MQ_DETACHED; + } + mqh->mqh_counterparty_attached = true; + + /* + * The receiver may have read some data after attaching, so we + * must not wait without rechecking the queue state. + */ + } else if (available == 0) { + /* + * Since mq->mqh_counterparty_attached is known to be true at this + * point, mq_receiver has been set, and it can't change once set. + * Therefore, we can read it without acquiring the spinlock. + */ + Assert(mqh->mqh_counterparty_attached); + SetLatch(&mq->mq_receiver->procLatch); + + /* Skip manipulation of our latch if nowait = true. */ + if (nowait) { + *bytes_written = sent; + return SHM_MQ_WOULD_BLOCK; + } + + /* + * Wait for our latch to be set. It might already be set for some + * unrelated reason, but that'll just result in one extra trip + * through the loop. It's worth it to avoid resetting the latch + * at top of loop, because setting an already-set latch is much + * cheaper than setting one that has been reset. + */ + (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0); + + /* Reset the latch so we don't spin. */ + ResetLatch(&t_thrd.proc->procLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); + } else { + Size offset; + Size sendnow; + + offset = wb % (uint64)ringsize; + sendnow = Min(available, ringsize - offset); + + /* + * Write as much data as we can via a single memcpy(). Make sure + * these writes happen after the read of mq_bytes_read, above. + * This barrier pairs with the one in shm_mq_inc_bytes_read. + * (Since we're separating the read of mq_bytes_read from a + * subsequent write to mq_ring, we need a full barrier here.) + */ + pg_memory_barrier(); + errno_t rc = memcpy_s(&mq->mq_ring[mq->mq_ring_offset + offset], sendnow, + (char*)data + sent, sendnow); + securec_check(rc, "\0", "\0"); + sent += sendnow; + + /* + * Update count of bytes written, with alignment padding. Note + * that this will never actually insert any padding except at the + * end of a run of bytes, because the buffer size is a multiple of + * MAXIMUM_ALIGNOF, and each read is as well. + */ + Assert(sent == nbytes || sendnow == MAXALIGN(sendnow)); + shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow)); + + /* + * For efficiency, we don't set the reader's latch here. We'll do + * that only when the buffer fills up or after writing an entire + * message. + */ + } + } + + *bytes_written = sent; + return SHM_MQ_SUCCESS; +} + +/* + * Wait until at least *nbytesp bytes are available to be read from the + * shared message queue, or until the buffer wraps around. If the queue is + * detached, returns SHM_MQ_DETACHED. If nowait is specified and a wait + * would be required, returns SHM_MQ_WOULD_BLOCK. Otherwise, *datap is set + * to the location at which data bytes can be read, *nbytesp is set to the + * number of bytes which can be read at that address, and the return value + * is SHM_MQ_SUCCESS. + */ +static shm_mq_result shm_mq_receive_bytes(shm_mq_handle *mqh, Size bytes_needed, bool nowait, + Size *nbytesp, void **datap) +{ + shm_mq *mq = mqh->mqh_queue; + Size ringsize = mq->mq_ring_size; + uint64 used; + uint64 written; + + for (;;) { + Size offset; + uint64 read; + + /* Get bytes written, so we can compute what's available to read. */ + written = pg_atomic_read_u64(&mq->mq_bytes_written); + + /* + * Get bytes read. Include bytes we could consume but have not yet + * consumed. + */ + read = pg_atomic_read_u64(&mq->mq_bytes_read) + + mqh->mqh_consume_pending; + used = written - read; + Assert(used <= ringsize); + offset = read % (uint64)ringsize; + + /* If we have enough data or buffer has wrapped, we're done. */ + if (used >= bytes_needed || offset + used >= ringsize) { + *nbytesp = Min(used, ringsize - offset); + *datap = &mq->mq_ring[mq->mq_ring_offset + offset]; + + /* + * Separate the read of mq_bytes_written, above, from caller's + * attempt to read the data itself. Pairs with the barrier in + * shm_mq_inc_bytes_written. + */ + pg_read_barrier(); + return SHM_MQ_SUCCESS; + } + + /* + * Fall out before waiting if the queue has been detached. + * + * Note that we don't check for this until *after* considering whether + * the data already available is enough, since the receiver can finish + * receiving a message stored in the buffer even after the sender has + * detached. + */ + if (mq->mq_detached) { + /* + * If the writer advanced mq_bytes_written and then set + * mq_detached, we might not have read the final value of + * mq_bytes_written above. Insert a read barrier and then check + * again if mq_bytes_written has advanced. + */ + pg_read_barrier(); + if (written != pg_atomic_read_u64(&mq->mq_bytes_written)) + continue; + + return SHM_MQ_DETACHED; + } + + /* + * We didn't get enough data to satisfy the request, so mark any data + * previously-consumed as read to make more buffer space. + */ + if (mqh->mqh_consume_pending > 0) { + shm_mq_inc_bytes_read(mq, mqh->mqh_consume_pending); + mqh->mqh_consume_pending = 0; + } + + /* Skip manipulation of our latch if nowait = true. */ + if (nowait) + return SHM_MQ_WOULD_BLOCK; + + /* + * Wait for our latch to be set. It might already be set for some + * unrelated reason, but that'll just result in one extra trip through + * the loop. It's worth it to avoid resetting the latch at top of + * loop, because setting an already-set latch is much cheaper than + * setting one that has been reset. + */ + (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0); + + /* Reset the latch so we don't spin. */ + ResetLatch(&t_thrd.proc->procLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); + } +} + +/* + * Test whether a counterparty who may not even be alive yet is definitely gone. + */ +static bool shm_mq_counterparty_gone(shm_mq *mq, BackgroundWorkerHandle *handle) +{ + ThreadId pid; + + /* If the queue has been detached, counterparty is definitely gone. */ + if (mq->mq_detached) { + return true; + } + + /* If there's a handle, check worker status. */ + if (handle != NULL) { + BgwHandleStatus status; + + /* Check for unexpected worker death. */ + status = GetBackgroundWorkerPid(handle, &pid); + if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) { + /* Mark it detached, just to make it official. */ + mq->mq_detached = true; + return true; + } + } + + /* Counterparty is not definitively gone. */ + return false; +} + +/* + * This is used when a process is waiting for its counterpart to attach to the + * queue. We exit when the other process attaches as expected, or, if + * handle != NULL, when the referenced background process or the postmaster + * dies. Note that if handle == NULL, and the process fails to attach, we'll + * potentially get stuck here forever waiting for a process that may never + * start. We do check for interrupts, though. + * + * ptr is a pointer to the memory address that we're expecting to become + * non-NULL when our counterpart attaches to the queue. + */ +static bool shm_mq_wait_internal(shm_mq *mq, PGPROC **ptr, BackgroundWorkerHandle *handle) +{ + bool result = false; + + for (;;) { + BgwHandleStatus status; + ThreadId pid; + + /* Acquire the lock just long enough to check the pointer. */ + SpinLockAcquire(&mq->mq_mutex); + result = (*ptr != NULL); + SpinLockRelease(&mq->mq_mutex); + + /* Fail if detached; else succeed if initialized. */ + if (mq->mq_detached) { + result = false; + break; + } + if (result) { + break; + } + if (handle != NULL) { + /* Check for unexpected worker death. */ + status = GetBackgroundWorkerPid(handle, &pid); + if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) { + result = false; + break; + } + } + + /* Wait to be signalled. */ + (void)WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET, 0); + + /* Reset the latch so we don't spin. */ + ResetLatch(&t_thrd.proc->procLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); + } + + return result; +} + +/* + * Increment the number of bytes read. + */ +static void shm_mq_inc_bytes_read(shm_mq *mq, Size n) +{ + PGPROC *sender = NULL; + + /* + * Separate prior reads of mq_ring from the increment of mq_bytes_read + * which follows. This pairs with the full barrier in + * shm_mq_send_bytes(). We only need a read barrier here because the + * increment of mq_bytes_read is actually a read followed by a dependent + * write. + */ + pg_read_barrier(); + + /* + * There's no need to use pg_atomic_fetch_add_u64 here, because nobody + * else can be changing this value. This method should be cheaper. + */ + pg_atomic_write_u64(&mq->mq_bytes_read, + pg_atomic_read_u64(&mq->mq_bytes_read) + n); + + /* + * We shouldn't have any bytes to read without a sender, so we can read + * mq_sender here without a lock. Once it's initialized, it can't change. + */ + sender = mq->mq_sender; + Assert(sender != NULL); + SetLatch(&sender->procLatch); +} + +/* + * Increment the number of bytes written. + */ +static void shm_mq_inc_bytes_written(shm_mq *mq, Size n) +{ + /* + * Separate prior reads of mq_ring from the write of mq_bytes_written + * which we're about to do. Pairs with the read barrier found in + * shm_mq_get_receive_bytes. + */ + pg_write_barrier(); + + /* + * There's no need to use pg_atomic_fetch_add_u64 here, because nobody + * else can be changing this value. This method avoids taking the bus + * lock unnecessarily. + */ + pg_atomic_write_u64(&mq->mq_bytes_written, + pg_atomic_read_u64(&mq->mq_bytes_written) + n); +} + diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h index e04ea17ae9..3988224f46 100755 --- a/src/include/libpq/libpq.h +++ b/src/include/libpq/libpq.h @@ -1,7 +1,7 @@ /* ------------------------------------------------------------------------- * * libpq.h - * POSTGRES LIBPQ buffer structure definitions. + * POSTGRES LIBPQ buffer structure definitions. * * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group @@ -22,19 +22,43 @@ /* ---------------- * PQArgBlock - * Information (pointer to array of this structure) required - * for the PQfn() call. (This probably ought to go somewhere else...) + * Information (pointer to array of this structure) required + * for the PQfn() call. (This probably ought to go somewhere else...) * ---------------- */ typedef struct { int len; int isint; union { - int* ptr; /* can't use void (dec compiler barfs) */ + int *ptr; /* can't use void (dec compiler barfs) */ int integer; } u; } PQArgBlock; +typedef struct { + void (*comm_reset) (void); + int (*flush) (void); + int (*flush_if_writable) (void); + bool (*is_send_pending) (void); + int (*putmessage) (char msgtype, const char* s, size_t len); + int (*putmessage_noblock) (char msgtype, const char* s, size_t len); + void (*startcopyout) (void); + void (*endcopyout) (bool errorAbort); +} PQcommMethods; + +extern PGDLLIMPORT THR_LOCAL PQcommMethods *PqCommMethods; + +#define pq_comm_reset() (PqCommMethods->comm_reset()) +#define pq_flush() (PqCommMethods->flush()) +#define pq_flush_if_writable() (PqCommMethods->flush_if_writable()) +#define pq_is_send_pending() (PqCommMethods->is_send_pending()) +#define pq_putmessage(msgtype, s, len) \ + (PqCommMethods->putmessage(msgtype, s, len)) +#define pq_putmessage_noblock(msgtype, s, len) \ + (PqCommMethods->putmessage_noblock(msgtype, s, len)) +#define pq_startcopyout() (PqCommMethods->startcopyout()) +#define pq_endcopyout(errorAbort) (PqCommMethods->endcopyout(errorAbort)) + /* * External functions. */ @@ -49,7 +73,6 @@ extern int StreamConnection(pgsocket server_fd, Port* port); extern void StreamClose(pgsocket sock); extern void TouchSocketFile(void); extern void pq_init(void); -extern void pq_comm_reset(void); extern int pq_getbytes(char* s, size_t len); extern int pq_getstring(StringInfo s); extern int pq_getmessage(StringInfo s, int maxlen); @@ -57,14 +80,7 @@ extern int pq_getbyte(void); extern int pq_peekbyte(void); extern int pq_getbyte_if_available(unsigned char* c); extern int pq_putbytes(const char* s, size_t len); -extern int pq_flush(void); -extern int pq_flush_if_writable(void); extern void pq_flush_timedwait(int timeout); -extern bool pq_is_send_pending(void); -extern int pq_putmessage(char msgtype, const char* s, size_t len); -extern int pq_putmessage_noblock(char msgtype, const char* s, size_t len); -extern void pq_startcopyout(void); -extern void pq_endcopyout(bool errorAbort); extern bool pq_select(int timeout_ms); extern void pq_abandon_sendbuffer(void); extern void pq_abandon_recvbuffer(void); diff --git a/src/include/libpq/pqformat.h b/src/include/libpq/pqformat.h index 28f9eca5f2..0a14b3b39e 100755 --- a/src/include/libpq/pqformat.h +++ b/src/include/libpq/pqformat.h @@ -46,6 +46,7 @@ extern const char* pq_getmsgbytes(StringInfo msg, int datalen); extern void pq_copymsgbytes(StringInfo msg, char* buf, int datalen); extern char* pq_getmsgtext(StringInfo msg, int rawbytes, int* nbytes); extern const char* pq_getmsgstring(StringInfo msg); +extern const char* pq_getmsgrawstring(StringInfo msg); extern void pq_getmsgend(StringInfo msg); /* diff --git a/src/include/libpq/pqmq.h b/src/include/libpq/pqmq.h new file mode 100644 index 0000000000..2a749790ee --- /dev/null +++ b/src/include/libpq/pqmq.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * pqmq.h + * Use the frontend/backend protocol for communication over a shm_mq + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/libpq/pqmq.h + * + *------------------------------------------------------------------------- + */ +#ifndef PQMQ_H +#define PQMQ_H + +#include "lib/stringinfo.h" +#include "storage/shm_mq.h" + +extern void pq_redirect_to_shm_mq(shm_mq_handle* mqh); +extern void pq_stop_redirect_to_shm_mq(void); +extern void pq_set_parallel_master(pid_t pid, BackendId backend_id); + +extern void pq_parse_errornotice(StringInfo str, ErrorData* edata); + +#endif /* PQMQ_H */ diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 0a3efda844..1c2df987d4 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -55,6 +55,7 @@ typedef enum { PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, PROCSIG_EXECUTOR_FLAG, + PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */ NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; diff --git a/src/include/storage/shm_mq.h b/src/include/storage/shm_mq.h new file mode 100644 index 0000000000..27f98af449 --- /dev/null +++ b/src/include/storage/shm_mq.h @@ -0,0 +1,82 @@ +/*------------------------------------------------------------------------- + * + * shm_mq.h + * single-reader, single-writer shared memory message queue + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/shm_mq.h + * + *------------------------------------------------------------------------- + */ +#ifndef SHM_MQ_H +#define SHM_MQ_H + +#include "postmaster/bgworker.h" +#include "storage/proc.h" + +/* The queue itself, in shared memory. */ +struct shm_mq; +typedef struct shm_mq shm_mq; + +/* Backend-private state. */ +struct shm_mq_handle; +typedef struct shm_mq_handle shm_mq_handle; + +/* Descriptors for a single write spanning multiple locations. */ +typedef struct { + const char *data; + Size len; +} shm_mq_iovec; + +/* Possible results of a send or receive operation. */ +typedef enum { + SHM_MQ_SUCCESS, /* Sent or received a message. */ + SHM_MQ_WOULD_BLOCK, /* Not completed; retry later. */ + SHM_MQ_DETACHED /* Other process has detached queue. */ +} shm_mq_result; + +/* + * Primitives to create a queue and set the sender and receiver. + * + * Both the sender and the receiver must be set before any messages are read + * or written, but they need not be set by the same process. Each must be + * set exactly once. + */ +extern shm_mq *shm_mq_create(void *address, Size size); +extern void shm_mq_set_receiver(shm_mq *mq, PGPROC *); +extern void shm_mq_set_sender(shm_mq *mq, PGPROC *); + +/* Accessor methods for sender and receiver. */ +extern PGPROC *shm_mq_get_receiver(shm_mq *); +extern PGPROC *shm_mq_get_sender(shm_mq *); + +/* Set up backend-local queue state. */ +extern shm_mq_handle *shm_mq_attach(shm_mq *mq, char *seg, + BackgroundWorkerHandle *handle); + +/* Associate worker handle with shm_mq. */ +extern void shm_mq_set_handle(shm_mq_handle *, BackgroundWorkerHandle *); + +/* Break connection, release handle resources. */ +extern void shm_mq_detach(shm_mq_handle *mqh); + +/* Get the shm_mq from handle. */ +extern shm_mq *shm_mq_get_queue(shm_mq_handle *mqh); + +/* Send or receive messages. */ +extern shm_mq_result shm_mq_send(shm_mq_handle *mqh, + Size nbytes, const void *data, bool nowait); +extern shm_mq_result shm_mq_sendv(shm_mq_handle *mqh, + shm_mq_iovec *iov, int iovcnt, bool nowait); +extern shm_mq_result shm_mq_receive(shm_mq_handle *mqh, + Size *nbytesp, void **datap, bool nowait); + +/* Wait for our counterparty to attach to the queue. */ +extern shm_mq_result shm_mq_wait_for_attach(shm_mq_handle *mqh); + +/* Smallest possible queue. */ +extern PGDLLIMPORT const Size shm_mq_minimum_size; + +#endif /* SHM_MQ_H */ -- Gitee From ecbb5313784d6c7103c16df6ce173ce782e0375c Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Tue, 25 Aug 2020 21:36:32 +0800 Subject: [PATCH 3/6] GUC serialization for Autonomous Transaction --- src/common/backend/utils/misc/guc.cpp | 548 ++++++++++++++++++++++++++ src/include/utils/guc.h | 4 + 2 files changed, 552 insertions(+) diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index a7977f8784..000be09fb8 100644 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -198,6 +198,18 @@ #define MAX_PASSWORD_ASSIGNED_CHARACTER 999 /* max length of password */ #define MAX_PASSWORD_LENGTH 999 +/* + * Precision with which REAL type guc values are to be printed for GUC + * serialization. + */ +static const int REALTYPE_PRECISION = 17; + +static const int TYPICAL_LEN_RANGE_OF_VALUE = 1000; +static const int MAX_DISPLAY_LEN_OF_BOOL = 5; +static const int TYPICAL_DISPLAY_LEN_OF_INT = 4; +static const int MAX_DISPLAY_LEN_OF_INT = 11; +static const int MAX_DISPLAY_LEN_OF_INT64 = 20; +static const int LEN_OF_REAL_EXCEPT_PRECISION = 8; extern volatile int synchronous_commit; extern volatile bool most_available_sync; @@ -17767,6 +17779,542 @@ ArrayType* GUCArrayReset(ArrayType* array) return newarray; } +/* GUC serialization */ +static bool CanSkipGucvar(const struct config_generic* gconf); +static Size EstimateVariableSize(const struct config_generic* gconf); +static void DoSerialize(char** destptr, Size& maxbytes, const char* fmt, ...); +static void DoSerializeBinary(char** destptr, Size& maxbytes, const char* val, Size valsize); +static void SerializeVariable(char** destptr, Size& maxbytes, const struct config_generic* gconf); +static void InitializeOneGUCOption(struct config_generic& gconf); +static char* ReadGucstate(char** srcptr, const char* srcend); +static void ReadGucstateBinary(char** srcptr, const char* srcend, char* dest, Size size); + +/* + * CanSkipGucvar: + * When serializing, determine whether to skip this GUC. When restoring, the + * negation of this test determines whether to restore the compiled-in default + * value before processing serialized values. + * + * A PGC_S_DEFAULT setting on the serialize side will typically match new + * postmaster children, but that can be false when got_SIGHUP == true and the + * pending configuration change modifies this setting. Nonetheless, we omit + * PGC_S_DEFAULT settings from serialization and make up for that by restoring + * defaults before applying serialized values. + * + * PGC_POSTMASTER variables always have the same value in every child of a + * particular postmaster. Most PGC_INTERNAL variables are compile-time + * constants; a few, like server_encoding and lc_ctype, are handled specially + * outside the serialize/restore procedure. Therefore, SerializeGUCState() + * never sends these, and RestoreGUCState() never changes them. + */ +static bool CanSkipGucvar(const struct config_generic* gconf) +{ + return gconf->context == PGC_POSTMASTER || + gconf->context == PGC_INTERNAL || gconf->source == PGC_S_DEFAULT || + strcmp(gconf->name, "role") == 0; +} + + +/* + * EstimateVariableSize: + * Estimate max size for dumping the given GUC variable. + */ +static Size EstimateVariableSize(const struct config_generic* gconf) +{ + Size size; + Size valsize = 0; + + if (CanSkipGucvar(gconf)) { + return 0; + } + + size = strlen(gconf->name) + 1; + + /* Get the maximum display length of the GUC value. */ + switch (gconf->vartype) { + case PGC_BOOL: { + valsize = MAX_DISPLAY_LEN_OF_BOOL; + break; + } + + case PGC_INT: { + const struct config_int* conf = (const struct config_int*)gconf; + + /* + * Instead of getting the exact display length, use max + * length. Also reduce the max length for typical ranges of + * small values. Maximum value is 2147483647, i.e. 10 chars. + * Include one byte for sign. + */ + if (Abs(*conf->variable) < TYPICAL_LEN_RANGE_OF_VALUE) { + valsize = TYPICAL_DISPLAY_LEN_OF_INT; + } else { + valsize = MAX_DISPLAY_LEN_OF_INT; + } + break; + } + + case PGC_INT64: { + const struct config_int* conf = (const struct config_int*)gconf; + + if (Abs(*conf->variable) < TYPICAL_LEN_RANGE_OF_VALUE) { + valsize = TYPICAL_DISPLAY_LEN_OF_INT; + } else { + valsize = MAX_DISPLAY_LEN_OF_INT64; /* Maximum value is 9,223,372,036,854,775,807, i.e. 19 chars. */ + } + break; + } + + case PGC_REAL: { + /* + * We are going to print it with %.17g. Account for sign, + * decimal point, and e+nnn notation. E.g. + * -3.9932904234000002e+110 + */ + valsize = LEN_OF_REAL_EXCEPT_PRECISION + REALTYPE_PRECISION; + break; + } + + case PGC_STRING: { + const struct config_string* conf = (const struct config_string*)gconf; + /* + * If the value is NULL, we transmit it as an empty string. + * Although this is not physically the same value, GUC + * generally treats a NULL the same as empty string. + */ + if (*conf->variable) { + valsize = strlen(*conf->variable); + } else { + valsize = 0; + } + break; + } + + case PGC_ENUM: { + struct config_enum* conf = (struct config_enum*) gconf; + valsize = strlen(config_enum_lookup_by_value(conf, *conf->variable)); + break; + } + default: + break; + } + + /* Allow space for terminating zero-byte */ + size = add_size(size, valsize + 1); + + if (gconf->sourcefile) { + size = add_size(size, strlen(gconf->sourcefile)); + } + + /* Allow space for terminating zero-byte */ + size = add_size(size, 1); + + /* Include line whenever we include file. */ + if (gconf->sourcefile && gconf->sourcefile[0]) { + size = add_size(size, sizeof(gconf->sourceline)); + } + + size = add_size(size, sizeof(gconf->source)); + size = add_size(size, sizeof(gconf->scontext)); + + return size; +} + +/* + * EstimateGUCStateSpace: + * Returns the size needed to store the GUC state for the current process + */ +Size EstimateGUCStateSpace(void) +{ + Size size; + int i; + + /* Add space reqd for saving the data size of the guc state */ + size = sizeof(Size); + + /* Add up the space needed for each GUC variable */ + for (i = 0; i < u_sess->num_guc_variables; i++) { + size = add_size(size, EstimateVariableSize(u_sess->guc_variables[i])); + } + + return size; +} + +/* + * DoSerialize: + * Copies the formatted string into the destination. Moves ahead the + * destination pointer, and decrements the maxbytes by that many bytes. If + * maxbytes is not sufficient to copy the string, error out. + */ +static void DoSerialize(char** destptr, Size& maxbytes, const char* fmt, ...) +{ + va_list vargs; + int nRet; + + if (maxbytes == 0) { + elog(ERROR, "not enough space to serialize GUC state"); + } + + va_start(vargs, fmt); + nRet = vsnprintf_s(*destptr, maxbytes, maxbytes - 1, fmt, vargs); + securec_check_ss(nRet, "\0", "\0"); + va_end(vargs); + + /* + * Cater to portability hazards in the vsnprintf() return value just like + * appendPQExpBufferVA() does. Note that this requires an extra byte of + * slack at the end of the buffer. Since serialize_variable() ends with a + * do_serialize_binary() rather than a do_serialize(), we'll always have + * that slack; estimate_variable_size() need not add a byte for it. + */ + if (nRet < 0) { + /* Shouldn't happen. Better show errno description. */ + elog(ERROR, "vsnprintf failed: %s with format string \"%s\"", strerror(nRet), fmt); + } + if (nRet >= static_cast(maxbytes)) { + /* This shouldn't happen either, really. */ + elog(ERROR, "not enough space to serialize GUC state"); + } + + /* Shift the destptr ahead of the null terminator */ + *destptr += nRet + 1; + maxbytes -= static_cast(nRet) + 1; +} + +/* Binary copy version of DoSerialize() */ +static void DoSerializeBinary(char** destptr, Size& maxbytes, const char* val, Size valsize) +{ + if (valsize > maxbytes) { + elog(ERROR, "not enough space to serialize GUC state"); + } + + errno_t rc = memcpy_s(*destptr, maxbytes, val, valsize); + securec_check(rc, "\0", "\0"); + *destptr += valsize; + maxbytes -= valsize; +} + +/* + * SerializeVariable: + * Dumps name, value and other information of a GUC variable into destptr. + */ +static void SerializeVariable(char** destptr, Size& maxbytes, const struct config_generic* gconf) +{ + if (CanSkipGucvar(gconf)) { + return; + } + + DoSerialize(destptr, maxbytes, "%s", gconf->name); + + switch (gconf->vartype) { + case PGC_BOOL: { + const struct config_bool* conf = (const struct config_bool*)gconf; + DoSerialize(destptr, maxbytes, (*conf->variable ? "true" : "false")); + break; + } + + case PGC_INT: { + const struct config_int* conf = (const struct config_int*)gconf; + DoSerialize(destptr, maxbytes, "%d", *conf->variable); + break; + } + + case PGC_INT64: { + const struct config_int64* conf = (const struct config_int64*)gconf; + DoSerialize(destptr, maxbytes, "%ld", *conf->variable); + break; + } + + case PGC_REAL: { + const struct config_real* conf = (const struct config_real*)gconf; + DoSerialize(destptr, maxbytes, "%.*e", REALTYPE_PRECISION, *conf->variable); + break; + } + + case PGC_STRING:{ + const struct config_string* conf = (const struct config_string*)gconf; + DoSerialize(destptr, maxbytes, "%s", *conf->variable ? *conf->variable : ""); + break; + } + + case PGC_ENUM:{ + struct config_enum* conf = (struct config_enum*)gconf; + DoSerialize(destptr, maxbytes, "%s", config_enum_lookup_by_value(conf, *conf->variable)); + break; + } + default: + break; + } + + DoSerialize(destptr, maxbytes, "%s", (gconf->sourcefile ? gconf->sourcefile : "")); + + if (gconf->sourcefile) { + DoSerializeBinary(destptr, maxbytes, reinterpret_cast(&gconf->sourceline), + sizeof(gconf->sourceline)); + } + + DoSerializeBinary(destptr, maxbytes, reinterpret_cast(&gconf->source), sizeof(gconf->source)); + DoSerializeBinary(destptr, maxbytes, reinterpret_cast(&gconf->scontext), sizeof(gconf->scontext)); +} + +/* + * SerializeGUCState: + * Dumps the complete GUC state onto the memory location at startAddress. + */ +void SerializeGUCState(Size maxsize, char* startAddress) +{ + char *curptr; + Size actualSize; + Size bytesLeft; + int i; + + /* Reserve space for saving the actual size of the guc state */ + Assert(maxsize > sizeof(actualSize)); + curptr = startAddress + sizeof(actualSize); + bytesLeft = maxsize - sizeof(actualSize); + + for (i = 0; i < u_sess->num_guc_variables; i++) { + SerializeVariable(&curptr, bytesLeft, u_sess->guc_variables[i]); + } + + /* Store actual size without assuming alignment of startAddress. */ + actualSize = maxsize - bytesLeft - sizeof(actualSize); + errno_t rc = memcpy_s(startAddress, maxsize, &actualSize, sizeof(actualSize)); + securec_check(rc, "\0", "\0"); +} + +/* + * Initialize one GUC option variable to its compiled-in default. + * + * Note: the reason for calling check_hooks is not that we think the boot_val + * might fail, but that the hooks might wish to compute an "extra" struct. + */ +static void InitializeOneGUCOption(struct config_generic& gconf) +{ + gconf.status = 0; + gconf.source = PGC_S_DEFAULT; + gconf.reset_source = PGC_S_DEFAULT; + gconf.scontext = PGC_INTERNAL; + gconf.reset_scontext = PGC_INTERNAL; + gconf.stack = NULL; + gconf.extra = NULL; + gconf.sourcefile = NULL; + gconf.sourceline = 0; + + switch (gconf.vartype) { + case PGC_BOOL: { + struct config_bool *conf = (struct config_bool*)&gconf; + bool newval = conf->boot_val; + void* extra = NULL; + + if (!call_bool_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to %d", conf->gen.name, static_cast(newval)); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + + case PGC_INT: { + struct config_int* conf = (struct config_int*)&gconf; + int newval = conf->boot_val; + void* extra = NULL; + + Assert(newval >= conf->min); + Assert(newval <= conf->max); + if (!call_int_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to %d", conf->gen.name, newval); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + + case PGC_INT64: { + struct config_int64* conf = (struct config_int64*)&gconf; + int64 newval = conf->boot_val; + void* extra = NULL; + + Assert(newval >= conf->min); + Assert(newval <= conf->max); + if (!call_int64_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to %ld", conf->gen.name, newval); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + + case PGC_REAL: { + struct config_real* conf = (struct config_real*)&gconf; + double newval = conf->boot_val; + void* extra = NULL; + + Assert(newval >= conf->min); + Assert(newval <= conf->max); + if (!call_real_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to %g", conf->gen.name, newval); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + + case PGC_STRING: { + struct config_string* conf = (struct config_string*)&gconf; + char* newval; + void* extra = NULL; + + /* non-NULL boot_val must always get strdup'd */ + if (conf->boot_val != NULL) { + newval = guc_strdup(FATAL, conf->boot_val); + } else { + newval = NULL; + } + + if (!call_string_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to \"%s\"", conf->gen.name, newval ? newval : ""); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + + case PGC_ENUM: { + struct config_enum *conf = (struct config_enum*)&gconf; + int newval = conf->boot_val; + void* extra = NULL; + + if (!call_enum_check_hook(conf, &newval, &extra, PGC_S_DEFAULT, LOG)) { + elog(FATAL, "failed to initialize %s to %d", conf->gen.name, newval); + } + if (conf->assign_hook) { + (*conf->assign_hook) (newval, extra); + } + *conf->variable = conf->reset_val = newval; + conf->gen.extra = conf->reset_extra = extra; + break; + } + default: + break; + } +} + +/* + * ReadGucstate: + * Actually it does not read anything, just returns the srcptr. But it does + * move the srcptr past the terminating zero byte, so that the caller is ready + * to read the next string. + */ +static char* ReadGucstate(char** srcptr, const char* srcend) +{ + char* retptr = *srcptr; + char* ptr; + + if (*srcptr >= srcend) { + elog(ERROR, "incomplete GUC state"); + } + + /* The string variables are all null terminated */ + for (ptr = *srcptr; ptr < srcend && *ptr != '\0'; ptr++) {} + + if (ptr > srcend) { + elog(ERROR, "could not find null terminator in GUC state"); + } + + /* Set the new position to the byte following the terminating NUL */ + *srcptr = ptr + 1; + + return retptr; +} + +/* Binary read version of ReadGucstate(). Copies into dest */ +static void ReadGucstateBinary(char** srcptr, const char* srcend, char* dest, Size size) +{ + if (*srcptr + size > srcend) { + elog(ERROR, "incomplete GUC state"); + } + + errno_t rc = memcpy_s(dest, size, *srcptr, size); + securec_check(rc, "\0", "\0"); + *srcptr += size; +} + +/* + * RestoreGUCState: + * Reads the GUC state at the specified address and updates the GUCs with the + * values read from the GUC state. + */ +void RestoreGUCState(char* gucstate) +{ + char* varname; + char* varvalue; + char* varsourcefile; + int varsourceline; + GucSource varsource; + GucContext varscontext; + char* srcptr = gucstate; + char* srcend; + Size len; + int i; + + /* See comment at can_skip_gucvar(). */ + for (i = 0; i < u_sess->num_guc_variables; i++) { + if (!CanSkipGucvar(u_sess->guc_variables[i])) { + InitializeOneGUCOption(*u_sess->guc_variables[i]); + } + } + /* First item is the length of the subsequent data */ + errno_t rc = memcpy_s(&len, sizeof(len), gucstate, sizeof(len)); + securec_check(rc, "\0", "\0"); + srcptr += sizeof(len); + srcend = srcptr + len; + + while (srcptr < srcend) { + int result; + varname = ReadGucstate(&srcptr, srcend); + varvalue = ReadGucstate(&srcptr, srcend); + varsourcefile = ReadGucstate(&srcptr, srcend); + + if (varsourcefile[0]) { + ReadGucstateBinary(&srcptr, srcend, + reinterpret_cast(&varsourceline), sizeof(varsourceline)); + } else { + varsourceline = 0; + } + ReadGucstateBinary(&srcptr, srcend, + reinterpret_cast(&varsource), sizeof(varsource)); + ReadGucstateBinary(&srcptr, srcend, + reinterpret_cast(&varscontext), sizeof(varscontext)); + + result = set_config_option(varname, varvalue, varscontext, varsource, + GUC_ACTION_SET, true, ERROR, true); + if (result <= 0) { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("parameter \"%s\" could not be set", varname))); + } + if (varsourcefile[0]) { + set_config_sourcefile(varname, varsourcefile, varsourceline); + } + } +} + /* * Validate a proposed option setting for GUCArrayAdd/Delete/Reset. * diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 14640be878..d99e9e4f8d 100755 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -266,6 +266,10 @@ extern ArrayType* GUCArrayAdd(ArrayType* array, const char* name, const char* va extern ArrayType* GUCArrayDelete(ArrayType* array, const char* name); extern ArrayType* GUCArrayReset(ArrayType* array); +extern Size EstimateGUCStateSpace(void); +extern void SerializeGUCState(Size maxsize, char *start_address); +extern void RestoreGUCState(char *gucstate); + #ifdef EXEC_BACKEND extern void write_nondefault_variables(GucContext context); extern void read_nondefault_variables(void); -- Gitee From e763a80cc5e77c8cb8c8c7308afade0625215074 Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Tue, 25 Aug 2020 22:12:31 +0800 Subject: [PATCH 4/6] add autonomous transaction. The original patch is from: https://www.postgresql.org/message-id/attachment/45863/autonomous.patch --- src/common/backend/lib/stringinfo.cpp | 1 + src/common/backend/parser/analyze.cpp | 4 +- src/common/backend/parser/parse_param.cpp | 44 +- src/common/backend/utils/error/elog.cpp | 57 ++ src/common/backend/utils/init/miscinit.cpp | 7 +- src/common/backend/utils/misc/guc.cpp | 18 +- src/common/pl/plpgsql/src/gram.y | 16 + src/common/pl/plpgsql/src/pl_exec.cpp | 164 +++- src/common/pl/plpgsql/src/pl_scanner.cpp | 3 +- src/common/pl/plpgsql/src/plpgsql.h | 4 +- src/gausskernel/optimizer/commands/async.cpp | 3 +- .../optimizer/commands/prepare.cpp | 2 +- .../optimizer/commands/variable.cpp | 4 +- .../process/postmaster/bgworker.cpp | 5 + src/gausskernel/process/tcop/Makefile | 2 +- src/gausskernel/process/tcop/autonomous.cpp | 857 ++++++++++++++++++ src/gausskernel/process/tcop/postgres.cpp | 16 +- src/gausskernel/storage/ipc/Makefile | 4 +- src/gausskernel/storage/ipc/shm_toc.cpp | 242 +++++ src/include/commands/async.h | 1 + src/include/commands/variable.h | 1 + src/include/parser/analyze.h | 2 +- src/include/parser/parse_param.h | 2 +- src/include/postgres.h | 2 + src/include/storage/shm_toc.h | 59 ++ src/include/tcop/autonomous.h | 43 + src/include/tcop/tcopprot.h | 6 + src/include/utils/elog.h | 1 + src/include/utils/plpgsql.h | 4 +- .../expected/autonomous_transaction.out | 351 +++++++ src/test/regress/parallel_schedule | 3 + .../regress/sql/autonomous_transaction.sql | 189 ++++ 32 files changed, 2082 insertions(+), 35 deletions(-) create mode 100644 src/gausskernel/process/tcop/autonomous.cpp create mode 100644 src/gausskernel/storage/ipc/shm_toc.cpp create mode 100644 src/include/storage/shm_toc.h create mode 100644 src/include/tcop/autonomous.h create mode 100755 src/test/regress/expected/autonomous_transaction.out create mode 100755 src/test/regress/sql/autonomous_transaction.sql diff --git a/src/common/backend/lib/stringinfo.cpp b/src/common/backend/lib/stringinfo.cpp index 8bd086eda1..202c76fe77 100755 --- a/src/common/backend/lib/stringinfo.cpp +++ b/src/common/backend/lib/stringinfo.cpp @@ -59,6 +59,7 @@ void initStringInfo(StringInfo str) */ void resetStringInfo(StringInfo str) { + str->data[0] = '\0'; str->len = 0; str->cursor = 0; diff --git a/src/common/backend/parser/analyze.cpp b/src/common/backend/parser/analyze.cpp index 338693cfee..b10bf5744e 100644 --- a/src/common/backend/parser/analyze.cpp +++ b/src/common/backend/parser/analyze.cpp @@ -154,7 +154,7 @@ Query* parse_analyze( * symbol datatypes from context. The passed-in paramTypes[] array can * be modified or enlarged (via repalloc). */ -Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams) +Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams, char** paramTypeNames) { ParseState* pstate = make_parsestate(NULL); Query* query = NULL; @@ -164,7 +164,7 @@ Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** pa pstate->p_sourcetext = sourceText; - parse_variable_parameters(pstate, paramTypes, numParams); + parse_variable_parameters(pstate, paramTypes, numParams, paramTypeNames); query = transformTopLevelStmt(pstate, parseTree); diff --git a/src/common/backend/parser/parse_param.cpp b/src/common/backend/parser/parse_param.cpp index 3658ab2cce..6eeef12a06 100755 --- a/src/common/backend/parser/parse_param.cpp +++ b/src/common/backend/parser/parse_param.cpp @@ -47,6 +47,7 @@ typedef struct FixedParamState { typedef struct VarParamState { Oid** paramTypes; /* array of parameter type OIDs */ int* numParams; /* number of array entries */ + char **paramTypeNames; } VarParamState; static Node* fixed_paramref_hook(ParseState* pstate, ParamRef* pref); @@ -54,6 +55,7 @@ static Node* variable_paramref_hook(ParseState* pstate, ParamRef* pref); static Node* variable_coerce_param_hook( ParseState* pstate, Param* param, Oid targetTypeId, int32 targetTypeMod, int location); static bool check_parameter_resolution_walker(Node* node, ParseState* pstate); +static Node *variable_post_column_ref_hook(ParseState *pstate, ColumnRef *cref, Node *var); static bool query_contains_extern_params_walker(Node* node, void* context); /* @@ -73,17 +75,57 @@ void parse_fixed_parameters(ParseState* pstate, Oid* paramTypes, int numParams) /* * Set up to process a query containing references to variable parameters. */ -void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams) +void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams, char** paramTypeNames) { VarParamState* parstate = (VarParamState*)palloc(sizeof(VarParamState)); parstate->paramTypes = paramTypes; parstate->numParams = numParams; + parstate->paramTypeNames = paramTypeNames; + pstate->p_post_columnref_hook = variable_post_column_ref_hook; pstate->p_ref_hook_state = (void*)parstate; pstate->p_paramref_hook = variable_paramref_hook; pstate->p_coerce_param_hook = variable_coerce_param_hook; } +static Node * variable_post_column_ref_hook(ParseState *pstate, ColumnRef *cref, Node *var) +{ + VarParamState *parstate = (VarParamState *) pstate->p_ref_hook_state; + + /* already resolved */ + if (var != NULL) + return NULL; + + /* did not supply parameter names */ + if (!parstate->paramTypeNames) + return NULL; + + if (list_length(cref->fields) == 1) + { + Node *field1 = (Node *) linitial(cref->fields); + char *name1; + int i; + Param *param; + + Assert(IsA(field1, String)); + name1 = strVal(field1); + for (i = 0; i < *parstate->numParams; i++) + if (strcmp(name1, parstate->paramTypeNames[i]) == 0) + { + param = makeNode(Param); + param->paramkind = PARAM_EXTERN; + param->paramid = i + 1; + param->paramtype = (*parstate->paramTypes)[i]; + param->paramtypmod = -1; + param->paramcollid = InvalidOid; + param->location = -1; + return (Node *) param; + } + } + + return NULL; +} + /* * Transform a ParamRef using fixed parameter types. */ diff --git a/src/common/backend/utils/error/elog.cpp b/src/common/backend/utils/error/elog.cpp index 749b7ddce0..fb8b5ccf25 100644 --- a/src/common/backend/utils/error/elog.cpp +++ b/src/common/backend/utils/error/elog.cpp @@ -1638,6 +1638,63 @@ void FlushErrorStateWithoutDeleteChildrenContext(void) MemoryContextReset(ErrorContext); } +/* + * ThrowErrorData --- report an error described by an ErrorData structure + * + * This is somewhat like ReThrowError, but it allows elevels besides ERROR, + * and the boolean flags such as output_to_server are computed via the + * default rules rather than being copied from the given ErrorData. + * This is primarily used to re-report errors originally reported by + * background worker processes and then propagated (with or without + * modification) to the backend responsible for them. + */ +void +ThrowErrorData(ErrorData *edata) +{ + ErrorData *newedata; + MemoryContext oldcontext; + + if (!errstart(edata->elevel, edata->filename, edata->lineno, + edata->funcname, NULL)) + return; /* error is not to be reported at all */ + + newedata = &t_thrd.log_cxt.errordata[t_thrd.log_cxt.errordata_stack_depth]; + t_thrd.log_cxt.recursion_depth++; + oldcontext = MemoryContextSwitchTo(ErrorContext); + + /* Copy the supplied fields to the error stack entry. */ + if (edata->sqlerrcode != 0) + newedata->sqlerrcode = edata->sqlerrcode; + if (edata->message) + newedata->message = pstrdup(edata->message); + if (edata->detail) + newedata->detail = pstrdup(edata->detail); + if (edata->detail_log) + newedata->detail_log = pstrdup(edata->detail_log); + if (edata->hint) + newedata->hint = pstrdup(edata->hint); + if (edata->context) + newedata->context = pstrdup(edata->context); + /* assume message_id is not available */ + if (newedata->filename) + newedata->filename = pstrdup(edata->filename); + if (newedata->funcname) + newedata->funcname = pstrdup(edata->funcname); + if (newedata->backtrace_log) + newedata->backtrace_log = pstrdup(edata->backtrace_log); + + newedata->cursorpos = edata->cursorpos; + newedata->internalpos = edata->internalpos; + if (edata->internalquery) + newedata->internalquery = pstrdup(edata->internalquery); + + MemoryContextSwitchTo(oldcontext); + t_thrd.log_cxt.recursion_depth--; + + /* Process the error. */ + errfinish(0); +} + /* * ReThrowError --- re-throw a previously copied error * diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp index 8eb2842083..f19f3bd2ef 100755 --- a/src/common/backend/utils/init/miscinit.cpp +++ b/src/common/backend/utils/init/miscinit.cpp @@ -728,6 +728,7 @@ void InitializeSessionUserId(const char* role_name, Oid role_id) { HeapTuple role_tup; Form_pg_authid rform; + //Oid role_id; char* rname = NULL; /* Audit user login */ char details[PGAUDIT_MAXLENGTH]; @@ -763,14 +764,16 @@ void InitializeSessionUserId(const char* role_name, Oid role_id) role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id)); if (!HeapTupleIsValid(role_tup)) { ereport(FATAL, - (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), - errmsg("role with OID %u does not exist", role_id))); + (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), + errmsg("role with OID %u does not exist", role_id))); } } rform = (Form_pg_authid)GETSTRUCT(role_tup); role_id = HeapTupleGetOid(role_tup); rname = NameStr(rform->rolname); + ereport(LOG, + (errmsg("InitializeSessionUserId role name: %s with OID %u", rname, role_id))); u_sess->misc_cxt.AuthenticatedUserId = role_id; u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper; diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index 000be09fb8..ed6d7c0956 100644 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -17876,7 +17876,7 @@ static Size EstimateVariableSize(const struct config_generic* gconf) } case PGC_STRING: { - const struct config_string* conf = (const struct config_string*)gconf; + const struct config_string *conf = (const struct config_string*)gconf; /* * If the value is NULL, we transmit it as an empty string. * Although this is not physically the same value, GUC @@ -17927,7 +17927,7 @@ static Size EstimateVariableSize(const struct config_generic* gconf) Size EstimateGUCStateSpace(void) { Size size; - int i; + int i; /* Add space reqd for saving the data size of the guc state */ size = sizeof(Size); @@ -18008,25 +18008,25 @@ static void SerializeVariable(char** destptr, Size& maxbytes, const struct confi switch (gconf->vartype) { case PGC_BOOL: { - const struct config_bool* conf = (const struct config_bool*)gconf; + const struct config_bool *conf = (const struct config_bool*)gconf; DoSerialize(destptr, maxbytes, (*conf->variable ? "true" : "false")); break; } case PGC_INT: { - const struct config_int* conf = (const struct config_int*)gconf; + const struct config_int *conf = (const struct config_int*)gconf; DoSerialize(destptr, maxbytes, "%d", *conf->variable); break; } case PGC_INT64: { - const struct config_int64* conf = (const struct config_int64*)gconf; + const struct config_int64 *conf = (const struct config_int64*)gconf; DoSerialize(destptr, maxbytes, "%ld", *conf->variable); break; } case PGC_REAL: { - const struct config_real* conf = (const struct config_real*)gconf; + const struct config_real *conf = (const struct config_real*)gconf; DoSerialize(destptr, maxbytes, "%.*e", REALTYPE_PRECISION, *conf->variable); break; } @@ -18293,14 +18293,14 @@ void RestoreGUCState(char* gucstate) if (varsourcefile[0]) { ReadGucstateBinary(&srcptr, srcend, - reinterpret_cast(&varsourceline), sizeof(varsourceline)); + reinterpret_cast(&varsourceline), sizeof(varsourceline)); } else { varsourceline = 0; } ReadGucstateBinary(&srcptr, srcend, - reinterpret_cast(&varsource), sizeof(varsource)); + reinterpret_cast(&varsource), sizeof(varsource)); ReadGucstateBinary(&srcptr, srcend, - reinterpret_cast(&varscontext), sizeof(varscontext)); + reinterpret_cast(&varscontext), sizeof(varscontext)); result = set_config_option(varname, varvalue, varscontext, varsource, GUC_ACTION_SET, true, ERROR, true); diff --git a/src/common/pl/plpgsql/src/gram.y b/src/common/pl/plpgsql/src/gram.y index c8fb857bbb..e7a9331389 100755 --- a/src/common/pl/plpgsql/src/gram.y +++ b/src/common/pl/plpgsql/src/gram.y @@ -176,6 +176,7 @@ static void check_labels(const char *start_label, static PLpgSQL_expr *read_cursor_args(PLpgSQL_var *cursor, int until, const char *expected); static List *read_raise_options(void); +static bool last_pragma; %} @@ -213,6 +214,7 @@ static List *read_raise_options(void); char *label; int n_initvars; int *initvarnos; + bool autonomous; } declhdr; struct { @@ -399,6 +401,7 @@ static List *read_raise_options(void); %token K_PG_EXCEPTION_CONTEXT %token K_PG_EXCEPTION_DETAIL %token K_PG_EXCEPTION_HINT +%token K_PRAGMA %token K_PRIOR %token K_QUERY %token K_RAISE @@ -477,6 +480,7 @@ pl_block : decl_sect K_BEGIN proc_sect exception_sect K_END opt_label newp->cmd_type = PLPGSQL_STMT_BLOCK; newp->lineno = plpgsql_location_to_lineno(@2); newp->label = $1.label; + newp->autonomous = $1.autonomous; newp->n_initvars = $1.n_initvars; newp->initvarnos = $1.initvarnos; newp->body = $3; @@ -500,6 +504,7 @@ decl_sect : opt_block_label $$.label = $1; $$.n_initvars = 0; $$.initvarnos = NULL; + $$.autonomous = false; } | opt_block_label decl_start { @@ -507,6 +512,7 @@ decl_sect : opt_block_label $$.label = $1; $$.n_initvars = 0; $$.initvarnos = NULL; + $$.autonomous = false; } | opt_block_label decl_start decl_stmts { @@ -514,6 +520,8 @@ decl_sect : opt_block_label $$.label = $1; /* Remember variables declared in decl_stmts */ $$.n_initvars = plpgsql_add_initdatums(&($$.initvarnos)); + $$.autonomous = last_pragma; + last_pragma = false; } ; @@ -521,6 +529,7 @@ decl_start : K_DECLARE { /* Forget any variables created before block */ plpgsql_add_initdatums(NULL); + last_pragma = false; /* * Disable scanner lookup of identifiers while * we process the decl_stmts @@ -720,6 +729,13 @@ decl_statement : decl_varname decl_const decl_datatype decl_collate decl_notnull errmsg("build variable failed"))); pfree_ext($1.name); } + | K_PRAGMA any_identifier ';' + { + if (pg_strcasecmp($2, "autonomous_transaction") == 0) + last_pragma = true; + else + elog(ERROR, "invalid pragma"); + } ; record_attr_list : record_attr diff --git a/src/common/pl/plpgsql/src/pl_exec.cpp b/src/common/pl/plpgsql/src/pl_exec.cpp index a18b898282..15eca2aa7f 100755 --- a/src/common/pl/plpgsql/src/pl_exec.cpp +++ b/src/common/pl/plpgsql/src/pl_exec.cpp @@ -1,6 +1,6 @@ /* ------------------------------------------------------------------------- * - * pl_exec.c - Executor for the PL/pgSQL + * pl_exec.cpp - Executor for the PL/pgSQL * procedural language * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * src/pl/plpgsql/src/pl_exec.c + * src/pl/plpgsql/src/pl_exec.cpp * * ------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "pgstat.h" #include "optimizer/clauses.h" #include "storage/proc.h" +#include "tcop/autonomous.h" #include "tcop/tcopprot.h" #include "utils/array.h" #include "utils/builtins.h" @@ -197,6 +198,7 @@ static int check_line_validity_in_for_query(PLpgSQL_stmt_forq* stmt, int, int); static void bind_cursor_with_portal(Portal portal, PLpgSQL_execstate *estate, int varno); static char* transform_anonymous_block(char* query); static bool need_recompile_plan(SPIPlanPtr plan); +static THR_LOCAL PLpgSQL_expr* sqlstmt = NULL; /* ---------- * plpgsql_check_line_validity Called by the debugger plugin for @@ -1412,6 +1414,17 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block) bool savedIsStp = u_sess->SPI_cxt.is_stp; TransactionId oldTransactionId = InvalidTransactionId; + if (block->autonomous) { + if (estate->func->fn_is_trigger) { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Un-support feature"), + errdetail("Trigger doesnot support autonomous transaction"))); + } else { + estate->autonomous_session = AutonomousSessionStart(); + } + } + /* * First initialize all variables declared in this block */ @@ -1732,6 +1745,8 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block) } estate->err_text = NULL; + if (block->autonomous) + AutonomousSessionEnd(estate->autonomous_session); /* * Handle the return code. @@ -3664,6 +3679,7 @@ static void plpgsql_estate_setup(PLpgSQL_execstate* estate, PLpgSQL_function* fu estate->rettupdesc = NULL; estate->exitlabel = NULL; estate->cur_error = NULL; + estate->autonomous_session = NULL; estate->tuple_store = NULL; estate->cursor_return_data = NULL; @@ -3810,6 +3826,59 @@ static void exec_prepare_plan(PLpgSQL_execstate* estate, PLpgSQL_expr* expr, int exec_simple_check_plan(expr); } +static void build_symbol_table(PLpgSQL_execstate *estate, + PLpgSQL_nsitem *ns_start, + int *ret_nitems, + const char ***ret_names, + Oid **ret_types) +{ + PLpgSQL_nsitem *nsitem = NULL; + List *names = NIL; + List *types = NIL; + ListCell *lc1, *lc2; + int i, nitems; + const char **names_vector; + Oid *types_vector = NULL; + + for (nsitem = ns_start; nsitem; nsitem = nsitem->prev) { + if (nsitem->itemtype == PLPGSQL_NSTYPE_VAR) { + PLpgSQL_datum *datum; + PLpgSQL_var *var; + Oid typoid; + Value *name; + + if (strcmp(nsitem->name, "found") == 0) + continue; // XXX + elog(LOG, "namespace item variable itemno %d, name %s", + nsitem->itemno, nsitem->name); + datum = estate->datums[nsitem->itemno]; + Assert(datum->dtype == PLPGSQL_DTYPE_VAR); + var = (PLpgSQL_var *) datum; + name = makeString(nsitem->name); + typoid = var->datatype->typoid; + if (!list_member(names, name)) { + names = lappend(names, name); + types = lappend_oid(types, typoid); + } + } + } + + Assert(list_length(names) == list_length(types)); + nitems = list_length(names); + names_vector = (const char **)palloc(nitems * sizeof(char *)); + types_vector = (Oid *)palloc(nitems * sizeof(Oid)); + i = 0; + forboth(lc1, names, lc2, types) { + names_vector[i] = pstrdup(strVal(lfirst(lc1))); + types_vector[i] = lfirst_oid(lc2); + i++; + } + + *ret_nitems = nitems; + *ret_names = names_vector; + *ret_types = types_vector; +} + /* ---------- * exec_stmt_execsql Execute an SQL statement (possibly with INTO). * ---------- @@ -3827,6 +3896,29 @@ static int exec_stmt_execsql(PLpgSQL_execstate* estate, PLpgSQL_stmt_execsql* st oldTransactionId = GetTopTransactionId(); } + if (estate->autonomous_session) { + int nparams = 0; + int i; + const char **param_names = NULL; + Oid *param_types = NULL; + AutonomousPreparedStatement *astmt = NULL; + Datum *values = NULL; + bool *nulls = NULL; + AutonomousResult *aresult = NULL; + sqlstmt = stmt->sqlstmt; + build_symbol_table(estate, stmt->sqlstmt->ns, &nparams, ¶m_names, ¶m_types); + astmt = AutonomousSessionPrepare(estate->autonomous_session, stmt->sqlstmt->query, (int16)nparams, param_types, param_names); + + values = (Datum *)palloc(nparams * sizeof(*values)); + nulls = (bool *)palloc(nparams * sizeof(*nulls)); + for (i = 0; i < nparams; i++) { + nulls[i] = true; + } + aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls); + exec_set_found(estate, (list_length(aresult->tuples) != 0)); + return PLPGSQL_RC_OK; + } + /* * On the first call for this statement generate the plan, and detect * whether the statement is INSERT/UPDATE/DELETE/MERGE @@ -4240,6 +4332,12 @@ static int exec_stmt_dynexecute(PLpgSQL_execstate* estate, PLpgSQL_stmt_dynexecu exec_eval_cleanup(estate); + if (estate->autonomous_session) + { + (void *)AutonomousSessionExecute(estate->autonomous_session, querystr); + return PLPGSQL_RC_OK; + } + if (stmt->params != NULL) { stmt->ppd = (void*)exec_eval_using_params(estate, stmt->params); } @@ -4984,6 +5082,37 @@ static int exec_stmt_null(PLpgSQL_execstate* estate, PLpgSQL_stmt* stmt) */ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt) { + if (estate->autonomous_session) { + if (sqlstmt) { + int nparams = 0; + int i; + const char **param_names = NULL; + Oid *param_types = NULL; + AutonomousPreparedStatement *astmt = NULL; + Datum *values = NULL; + bool *nulls = NULL; + AutonomousResult *aresult = NULL; + ereport(LOG, (errmsg("query COMMIT"))); + build_symbol_table(estate, sqlstmt->ns, &nparams, ¶m_names, ¶m_types); + astmt = AutonomousSessionPrepare(estate->autonomous_session, "COMMIT", (int16)nparams, param_types, param_names); + + values = (Datum *)palloc(nparams * sizeof(*values)); + nulls = (bool *)palloc(nparams * sizeof(*nulls)); + for (i = 0; i < nparams; i++) + { + nulls[i] = true; + } + aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls); + exec_set_found(estate, (list_length(aresult->tuples) != 0)); + sqlstmt = NULL; + return PLPGSQL_RC_OK; + } else { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("syntax error"), + errdetail("In antonomous transaction, commit/rollback must match start transaction"))); + } + } + const char* PORTAL = "Portal"; int subTransactionCount = u_sess->SPI_cxt.portal_stp_exception_counter; @@ -5046,6 +5175,37 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt */ static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* stmt) { + if (estate->autonomous_session) { + if (sqlstmt) { + int nparams = 0; + int i; + const char **param_names = NULL; + Oid *param_types = NULL; + AutonomousPreparedStatement *astmt = NULL; + Datum *values = NULL; + bool *nulls = NULL; + AutonomousResult *aresult = NULL; + ereport(LOG, (errmsg("query ROLLBACK"))); + build_symbol_table(estate, sqlstmt->ns, &nparams, ¶m_names, ¶m_types); + astmt = AutonomousSessionPrepare(estate->autonomous_session, "ROLLBACK", (int16)nparams, param_types, param_names); + + values = (Datum *)palloc(nparams * sizeof(*values)); + nulls = (bool *)palloc(nparams * sizeof(*nulls)); + for (i = 0; i < nparams; i++) + { + nulls[i] = true; + } + aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls); + exec_set_found(estate, (list_length(aresult->tuples) != 0)); + sqlstmt = NULL; + return PLPGSQL_RC_OK; + } else { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("syntax error"), + errdetail("In antonomous transaction, commit/rollback must match start transaction"))); + } + } + const char* PORTAL = "Portal"; int subTransactionCount = u_sess->SPI_cxt.portal_stp_exception_counter; diff --git a/src/common/pl/plpgsql/src/pl_scanner.cpp b/src/common/pl/plpgsql/src/pl_scanner.cpp index 0e79839cab..37a10697ed 100755 --- a/src/common/pl/plpgsql/src/pl_scanner.cpp +++ b/src/common/pl/plpgsql/src/pl_scanner.cpp @@ -100,7 +100,8 @@ static const ScanKeyword unreserved_keywords[] = { UNRESERVED_KEYWORD) PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD) PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD) PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD) PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD) PG_KEYWORD("pg_exception_hint", - K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD) + K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD) PG_KEYWORD("pragma", K_PRAGMA, UNRESERVED_KEYWORD) + PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD) PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD) PG_KEYWORD("record", K_RECORD, UNRESERVED_KEYWORD) PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD) PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD) PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD) diff --git a/src/common/pl/plpgsql/src/plpgsql.h b/src/common/pl/plpgsql/src/plpgsql.h index cc9d19f643..22c936e286 100755 --- a/src/common/pl/plpgsql/src/plpgsql.h +++ b/src/common/pl/plpgsql/src/plpgsql.h @@ -23,6 +23,7 @@ #include "catalog/namespace.h" #include "commands/trigger.h" #include "executor/spi.h" +#include "tcop/autonomous.h" /********************************************************************** * Definitions @@ -382,6 +383,7 @@ typedef struct PLpgSQL_stmt_block { /* Block of statements */ int cmd_type; int lineno; char* label; + bool autonomous; List* body; /* List of statements */ int n_initvars; int* initvarnos; @@ -775,7 +777,7 @@ typedef struct PLpgSQL_execstate { /* Runtime execution data */ MemoryContext tuple_store_cxt; ResourceOwner tuple_store_owner; ReturnSetInfo* rsi; - + AutonomousSession *autonomous_session; int found_varno; /* diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp index e04af32a11..8dad9eb7e1 100755 --- a/src/gausskernel/optimizer/commands/async.cpp +++ b/src/gausskernel/optimizer/commands/async.cpp @@ -328,7 +328,6 @@ static void asyncQueueReadAllNotifications(void); static bool asyncQueueProcessPageEntries(QueuePosition* current, const QueuePosition &stop, char* page_buffer); static void asyncQueueAdvanceTail(void); static void ProcessIncomingNotify(void); -static void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid); static bool AsyncExistsPendingNotify(const char* channel, const char* payload); static void ClearPendingActionsAndNotifies(void); @@ -1835,7 +1834,7 @@ static void ProcessIncomingNotify(void) /* * Send NOTIFY message to my front end. */ -static void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid) +void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid) { if (t_thrd.postgres_cxt.whereToSendOutput == DestRemote) { StringInfoData buf; diff --git a/src/gausskernel/optimizer/commands/prepare.cpp b/src/gausskernel/optimizer/commands/prepare.cpp index 2c84b48673..d37f3c3d44 100755 --- a/src/gausskernel/optimizer/commands/prepare.cpp +++ b/src/gausskernel/optimizer/commands/prepare.cpp @@ -114,7 +114,7 @@ void PrepareQuery(PrepareStmt* stmt, const char* queryString) * Because parse analysis scribbles on the raw querytree, we must make a * copy to ensure we don't modify the passed-in tree. */ - query = parse_analyze_varparams((Node*)copyObject(stmt->query), queryString, &argtypes, &nargs); + query = parse_analyze_varparams((Node*)copyObject(stmt->query), queryString, &argtypes, &nargs, NULL); /* * Check that all parameter types were determined. diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp index f07028fe04..4172da8487 100755 --- a/src/gausskernel/optimizer/commands/variable.cpp +++ b/src/gausskernel/optimizer/commands/variable.cpp @@ -686,11 +686,13 @@ bool check_mix_replication_param(bool* newval, void** extra, GucSource source) /* * SET CLIENT_ENCODING */ +void (*check_client_encoding_hook)(void); bool check_client_encoding(char** newval, void** extra, GucSource source) { int encoding; const char* canonical_name = NULL; - + if (check_client_encoding_hook) + check_client_encoding_hook(); /* Look up the encoding by name */ encoding = pg_valid_client_encoding(*newval); if (encoding < 0) { diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp index 580bf35a5c..d38484b433 100644 --- a/src/gausskernel/process/postmaster/bgworker.cpp +++ b/src/gausskernel/process/postmaster/bgworker.cpp @@ -28,6 +28,7 @@ #include "storage/procsignal.h" #include "storage/shmem.h" #include "tcop/tcopprot.h" +#include "tcop/autonomous.h" #include "utils/ascii.h" #include "utils/ps_status.h" #include "utils/postinit.h" @@ -110,6 +111,10 @@ static const struct { } InternalBGWorkers[] = { + { + "autonomous_worker_main", + autonomous_worker_main + } }; /* Private functions. */ diff --git a/src/gausskernel/process/tcop/Makefile b/src/gausskernel/process/tcop/Makefile index 8cd077bf23..8d68139eb3 100755 --- a/src/gausskernel/process/tcop/Makefile +++ b/src/gausskernel/process/tcop/Makefile @@ -29,7 +29,7 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS= stmt_retry.o dest.o fastpath.o postgres.o pquery.o utility.o auditfuncs.o +OBJS= autonomous.o stmt_retry.o dest.o fastpath.o postgres.o pquery.o utility.o auditfuncs.o ifneq (,$(filter $(PORTNAME),cygwin win32)) override CPPFLAGS += -fPIC -DWIN32_STACK_RLIMIT=$(WIN32_STACK_RLIMIT) diff --git a/src/gausskernel/process/tcop/autonomous.cpp b/src/gausskernel/process/tcop/autonomous.cpp new file mode 100644 index 0000000000..44b25d1d5d --- /dev/null +++ b/src/gausskernel/process/tcop/autonomous.cpp @@ -0,0 +1,857 @@ +/*-------------------------------------------------------------------------- + * + * autonomous.cpp + * Run SQL commands using a background worker. + * + * Copyright (C) 2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/gausskernel/process/tcop/autonomous.cpp + * + * + * This implements a C API to open an autonomous session and run SQL queries + * in it. The session looks much like a normal database connection, but it is + * always to the same database, and there is no authentication needed. The + * "backend" for that connection is a background worker. The normal backend + * and the autonomous session worker communicate over the normal FE/BE + * protocol. + * + * Types: + * + * AutonomousSession -- opaque connection handle + * AutonomousPreparedStatement -- opaque prepared statement handle + * AutonomousResult -- query result + * + * Functions: + * + * AutonomousSessionStart() -- start a session (launches background worker) + * and return a handle + * + * AutonomousSessionEnd() -- close session and free resources + * + * AutonomousSessionExecute() -- run SQL string and return result (rows or + * status) + * + * AutonomousSessionPrepare() -- prepare an SQL string for subsequent + * execution + * + * AutonomousSessionExecutePrepared() -- run prepared statement + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "gs_thread.h" + +#include "access/htup.h" +#include "access/tupdesc.h" +#include "access/xact.h" +#include "commands/async.h" +#include "commands/variable.h" +#include "lib/stringinfo.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "libpq/pqmq.h" +#include "libpq/pqsignal.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "pgstat.h" +#include "postmaster/bgworker.h" +#include "storage/shm_mq.h" +#include "storage/shm_toc.h" +#include "tcop/autonomous.h" +#include "tcop/tcopprot.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/resowner.h" + +/* Table-of-contents constants for our dynamic shared memory segment. */ +#define AUTONOMOUS_MAGIC 0x50674267 + +#define AUTONOMOUS_KEY_FIXED_DATA 0 +#define AUTONOMOUS_KEY_GUC 1 +#define AUTONOMOUS_KEY_COMMAND_QUEUE 2 +#define AUTONOMOUS_KEY_RESPONSE_QUEUE 3 +#define AUTONOMOUS_NKEYS 4 + +#define AUTONOMOUS_QUEUE_SIZE 16384 + +/* Fixed-size data passed via our dynamic shared memory segment. */ +struct autonomous_session_fixed_data { + Oid database_id; + Oid authenticated_user_id; + Oid current_user_id; + int sec_context; +}; + +struct AutonomousSession { + char *seg; + BackgroundWorkerHandle *worker_handle; + shm_mq_handle *command_qh; + shm_mq_handle *response_qh; + int transaction_status; +}; + +struct AutonomousPreparedStatement { + AutonomousSession *session; + Oid *argtypes; + TupleDesc tupdesc; +}; + +static void shm_mq_receive_stringinfo(shm_mq_handle *qh, StringInfoData *msg); +static void autonomous_check_client_encoding_hook(void); +static TupleDesc TupleDesc_from_RowDescription(StringInfo msg); +static HeapTuple HeapTuple_from_DataRow(TupleDesc tupdesc, StringInfo msg); +static void forward_NotifyResponse(StringInfo msg); +static void rethrow_errornotice(StringInfo msg); +static void invalid_protocol_message(char msgtype); + +AutonomousSession * AutonomousSessionStart(void) +{ + BackgroundWorker worker = {0}; + ThreadId pid; + AutonomousSession *session = NULL; + shm_toc_estimator e; + Size segsize; + Size guc_len; + char *gucstate = NULL; + char *seg = NULL; + shm_toc *toc = NULL; + autonomous_session_fixed_data *fdata = NULL; + shm_mq *command_mq = NULL; + shm_mq *response_mq = NULL; + BgwHandleStatus bgwstatus; + StringInfoData msg; + char msgtype; + errno_t rc; + + session = (AutonomousSession *)palloc(sizeof(*session)); + + shm_toc_initialize_estimator(&e); + shm_toc_estimate_chunk(&e, sizeof(autonomous_session_fixed_data)); + shm_toc_estimate_chunk(&e, AUTONOMOUS_QUEUE_SIZE); + shm_toc_estimate_chunk(&e, AUTONOMOUS_QUEUE_SIZE); + guc_len = EstimateGUCStateSpace(); + shm_toc_estimate_chunk(&e, guc_len); + shm_toc_estimate_keys(&e, AUTONOMOUS_NKEYS); + segsize = shm_toc_estimate(&e); + seg = (char *)palloc(sizeof(char) * segsize); + + session->seg = seg; + + toc = shm_toc_create(AUTONOMOUS_MAGIC, seg, segsize); + + /* Store fixed-size data in dynamic shared memory. */ + fdata = (autonomous_session_fixed_data *)shm_toc_allocate(toc, sizeof(*fdata)); + fdata->database_id = u_sess->proc_cxt.MyDatabaseId; + fdata->authenticated_user_id = GetAuthenticatedUserId(); + GetUserIdAndSecContext(&fdata->current_user_id, &fdata->sec_context); + shm_toc_insert(toc, AUTONOMOUS_KEY_FIXED_DATA, fdata); + + /* Store GUC state in dynamic shared memory. */ + gucstate = (char *)shm_toc_allocate(toc, guc_len); + SerializeGUCState(guc_len, gucstate); + shm_toc_insert(toc, AUTONOMOUS_KEY_GUC, gucstate); + + command_mq = shm_mq_create(shm_toc_allocate(toc, AUTONOMOUS_QUEUE_SIZE), + AUTONOMOUS_QUEUE_SIZE); + shm_toc_insert(toc, AUTONOMOUS_KEY_COMMAND_QUEUE, command_mq); + shm_mq_set_sender(command_mq, t_thrd.proc); + + response_mq = shm_mq_create(shm_toc_allocate(toc, AUTONOMOUS_QUEUE_SIZE), + AUTONOMOUS_QUEUE_SIZE); + shm_toc_insert(toc, AUTONOMOUS_KEY_RESPONSE_QUEUE, response_mq); + shm_mq_set_receiver(response_mq, t_thrd.proc); + + session->command_qh = shm_mq_attach(command_mq, seg, NULL); + session->response_qh = shm_mq_attach(response_mq, seg, NULL); + + worker.bgw_flags = + BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + worker.bgw_start_time = BgWorkerStart_ConsistentState; + worker.bgw_restart_time = BGW_NEVER_RESTART; + rc = snprintf_s(worker.bgw_library_name, BGW_MAXLEN, BGW_MAXLEN, "postgres"); + securec_check_ss(rc, "\0", "\0"); + rc = snprintf_s(worker.bgw_function_name, BGW_MAXLEN, BGW_MAXLEN, "autonomous_worker_main"); + securec_check_ss(rc, "\0", "\0"); + rc = snprintf_s(worker.bgw_name, BGW_MAXLEN, BGW_MAXLEN, "autonomous session by PID %lu", + t_thrd.proc_cxt.MyProcPid); + securec_check_ss(rc, "\0", "\0"); + worker.bgw_main_arg = PointerGetDatum(seg); + worker.bgw_notify_pid = t_thrd.proc_cxt.MyProcPid; + + if (!RegisterDynamicBackgroundWorker(&worker, &session->worker_handle)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("could not register background process"), + errhint("You might need to increase max_background_workers."))); + + shm_mq_set_handle(session->command_qh, session->worker_handle); + shm_mq_set_handle(session->response_qh, session->worker_handle); + + bgwstatus = WaitForBackgroundWorkerStartup(session->worker_handle, &pid); + if (bgwstatus != BGWH_STARTED) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("could not start background worker"))); + + do { + ereport(LOG, (errmsg("front begin receive msg"))); + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front end receive msg"))); + ereport(LOG, (errmsg("front function AutonomousSessionStart receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'E': + case 'N': + rethrow_errornotice(&msg); + break; + case 'Z': + session->transaction_status = pq_getmsgbyte(&msg); + pq_getmsgend(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + } + while (msgtype != 'Z'); + + return session; +} + +void AutonomousSessionEnd(AutonomousSession *session) +{ + StringInfoData msg; + BgwHandleStatus bgwstatus; + if (session->transaction_status == 'T') + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("autonomous session ended with transaction block open"))); + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'X'); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + bgwstatus = WaitForBackgroundWorkerShutdown(session->worker_handle); + if (bgwstatus != BGWH_STOPPED) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("could not stop background worker"))); + pfree(session->worker_handle); + pfree(session->seg); + pfree(session); +} + +AutonomousResult *AutonomousSessionExecute(AutonomousSession *session, const char *sql) +{ + StringInfoData msg; + char msgtype; + AutonomousResult *result = NULL; + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'Q'); + pq_sendstring(&msg, sql); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + + result = (AutonomousResult *)palloc0(sizeof(*result)); + + do { + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionExecute receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'A': + forward_NotifyResponse(&msg); + break; + case 'C': + { + const char *tag = pq_getmsgstring(&msg); + result->command = pstrdup(tag); + pq_getmsgend(&msg); + break; + } + case 'D': + if (!result->tupdesc) + elog(ERROR, "no T before D"); + result->tuples = lappend(result->tuples, HeapTuple_from_DataRow(result->tupdesc, &msg)); + pq_getmsgend(&msg); + break; + case 'E': + case 'N': + rethrow_errornotice(&msg); + break; + case 'T': + if (result->tupdesc) + elog(ERROR, "already received a T message"); + result->tupdesc = TupleDesc_from_RowDescription(&msg); + pq_getmsgend(&msg); + break; + case 'Z': + session->transaction_status = pq_getmsgbyte(&msg); + pq_getmsgend(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + } + while (msgtype != 'Z'); + return result; +} + +AutonomousPreparedStatement *AutonomousSessionPrepare(AutonomousSession *session, const char *sql, int16 nargs, + Oid argtypes[], const char *argnames[]) +{ + AutonomousPreparedStatement *result = NULL; + StringInfoData msg; + int16 i; + char msgtype; + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'P'); + pq_sendstring(&msg, ""); + pq_sendstring(&msg, sql); + pq_sendint16(&msg, (uint16)nargs); + for (i = 0; i < nargs; i++) + pq_sendint32(&msg, (uint32)argtypes[i]); + if (argnames) + for (i = 0; i < nargs; i++) + pq_sendstring(&msg, argnames[i]); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + + result = (AutonomousPreparedStatement *)palloc0(sizeof(*result)); + result->session = session; + result->argtypes = (Oid *)palloc(nargs * sizeof(*result->argtypes)); + errno_t rc; + rc = memcpy_s(result->argtypes, nargs * sizeof(*result->argtypes), argtypes, nargs * sizeof(*result->argtypes)); + securec_check(rc, "\0", "\0"); + + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionPrepare receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case '1': + break; + case 'E': + rethrow_errornotice(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'D'); + pq_sendbyte(&msg, 'S'); + pq_sendstring(&msg, ""); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + + do { + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionPrepare receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'A': + forward_NotifyResponse(&msg); + break; + case 'E': + rethrow_errornotice(&msg); + break; + case 'n': + break; + case 't': + /* ignore for now */ + break; + case 'T': + if (result->tupdesc) + elog(ERROR, "already received a T message"); + result->tupdesc = TupleDesc_from_RowDescription(&msg); + pq_getmsgend(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + } + while (msgtype != 'n' && msgtype != 'T'); + + return result; +} + +AutonomousResult *AutonomousSessionExecutePrepared(AutonomousPreparedStatement *stmt, int16 nargs, + Datum *values, bool *nulls) +{ + AutonomousSession *session = NULL; + StringInfoData msg; + AutonomousResult *result = NULL; + char msgtype; + int16 i; + + session = stmt->session; + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'B'); + pq_sendstring(&msg, ""); + pq_sendstring(&msg, ""); + pq_sendint16(&msg, 1); /* number of parameter format codes */ + pq_sendint16(&msg, 1); + pq_sendint16(&msg, (uint16)nargs); /* number of parameter values */ + for (i = 0; i < nargs; i++) { + if (nulls[i]) + pq_sendint32(&msg, -1); + else { + Oid typsend; + bool typisvarlena; + bytea *outputbytes = NULL; + + getTypeBinaryOutputInfo(stmt->argtypes[i], &typsend, &typisvarlena); + outputbytes = OidSendFunctionCall(typsend, values[i]); + pq_sendint32(&msg, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(&msg, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); + pfree(outputbytes); + } + } + pq_sendint16(&msg, 1); /* number of result column format codes */ + pq_sendint16(&msg, 1); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case '2': + break; + case 'E': + rethrow_errornotice(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + + pq_redirect_to_shm_mq(session->command_qh); + pq_beginmessage(&msg, 'E'); + pq_sendstring(&msg, ""); + pq_sendint32(&msg, 0); + pq_endmessage(&msg); + pq_stop_redirect_to_shm_mq(); + + result = (AutonomousResult *)palloc0(sizeof(*result)); + result->tupdesc = stmt->tupdesc; + + do { + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'A': + forward_NotifyResponse(&msg); + break; + case 'C': + { + const char *tag = pq_getmsgstring(&msg); + result->command = pstrdup(tag); + pq_getmsgend(&msg); + break; + } + case 'D': + if (!stmt->tupdesc) + elog(ERROR, "did not expect any rows"); + result->tuples = lappend(result->tuples, HeapTuple_from_DataRow(stmt->tupdesc, &msg)); + pq_getmsgend(&msg); + break; + case 'E': + case 'N': + rethrow_errornotice(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + } + while (msgtype != 'C'); + + pq_redirect_to_shm_mq(session->command_qh); + pq_putemptymessage('S'); + pq_stop_redirect_to_shm_mq(); + + shm_mq_receive_stringinfo(session->response_qh, &msg); + ereport(LOG, (errmsg("front function AutonomousSessionExecutePrepared receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'A': + forward_NotifyResponse(&msg); + break; + case 'Z': + session->transaction_status = pq_getmsgbyte(&msg); + pq_getmsgend(&msg); + break; + default: + invalid_protocol_message(msgtype); + break; + } + + return result; +} + +void autonomous_worker_main(Datum main_arg) +{ + char *seg = NULL; + shm_toc *toc = NULL; + autonomous_session_fixed_data *fdata = NULL; + char *gucstate = NULL; + shm_mq *command_mq = NULL; + shm_mq *response_mq = NULL; + shm_mq_handle *command_qh = NULL; + shm_mq_handle *response_qh = NULL; + StringInfoData msg; + + char msgtype; + + (void)gspqsignal(SIGTERM, die); + BackgroundWorkerUnblockSignals(); + + /* Set up a memory context and resource owner. */ + Assert(t_thrd.utils_cxt.CurrentResourceOwner == NULL); + t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "autonomous"); + CurrentMemoryContext = AllocSetContextCreate(t_thrd.top_mem_cxt, + "autonomous session", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf)); + seg = (char *)DatumGetPointer(main_arg); + if (seg == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("could not map dynamic shared memory segment"))); + + toc = shm_toc_attach(AUTONOMOUS_MAGIC, seg); + if (toc == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("bad magic number in dynamic shared memory segment"))); + + /* Find data structures in dynamic shared memory. */ + fdata = (autonomous_session_fixed_data *)shm_toc_lookup(toc, AUTONOMOUS_KEY_FIXED_DATA); + + gucstate = (char *)shm_toc_lookup(toc, AUTONOMOUS_KEY_GUC); + + command_mq = (shm_mq *)shm_toc_lookup(toc, AUTONOMOUS_KEY_COMMAND_QUEUE); + shm_mq_set_receiver(command_mq, t_thrd.proc); + command_qh = shm_mq_attach(command_mq, seg, NULL); + + response_mq = (shm_mq *)shm_toc_lookup(toc, AUTONOMOUS_KEY_RESPONSE_QUEUE); + shm_mq_set_sender(response_mq, t_thrd.proc); + response_qh = shm_mq_attach(response_mq, seg, NULL); + + pq_redirect_to_shm_mq(response_qh); + BackgroundWorkerInitializeConnectionByOid(fdata->database_id, + fdata->authenticated_user_id); + + (void)SetClientEncoding(GetDatabaseEncoding()); + + StartTransactionCommand(); + RestoreGUCState(gucstate); + CommitTransactionCommand(); + + process_local_preload_libraries(); + + SetUserIdAndSecContext(fdata->current_user_id, fdata->sec_context); + + t_thrd.postgres_cxt.whereToSendOutput = DestRemote; + ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput); + + t_thrd.mem_cxt.msg_mem_cxt = AllocSetContextCreate(t_thrd.top_mem_cxt, + "MessageContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + do { + (void)MemoryContextSwitchTo(t_thrd.mem_cxt.msg_mem_cxt); + MemoryContextResetAndDeleteChildren(t_thrd.mem_cxt.msg_mem_cxt); + + ProcessCompletedNotifies(); + pgstat_report_stat(false); + pgstat_report_activity(STATE_IDLE, NULL); + + shm_mq_receive_stringinfo(command_qh, &msg); + ereport(LOG, (errmsg("bgworker receive msg %s", msg.data))); + msgtype = pq_getmsgbyte(&msg); + + switch (msgtype) { + case 'B': + { + SetCurrentStatementStartTimestamp(); + exec_bind_message(&msg); + break; + } + case 'D': + { + int describe_type; + const char *describe_target; + + SetCurrentStatementStartTimestamp(); + + describe_type = pq_getmsgbyte(&msg); + describe_target = pq_getmsgstring(&msg); + pq_getmsgend(&msg); + + switch (describe_type) { + case 'S': + exec_describe_statement_message(describe_target); + break; +#ifdef TODO + case 'P': + exec_describe_portal_message(describe_target); + break; +#endif + default: + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid DESCRIBE message subtype %d", + describe_type))); + break; + } + } + break; + case 'E': + { + const char *portal_name; + int max_rows; + + SetCurrentStatementStartTimestamp(); + + portal_name = pq_getmsgstring(&msg); + max_rows = (int)pq_getmsgint(&msg, 4); + pq_getmsgend(&msg); + + exec_execute_message(portal_name, max_rows); + } + break; + + case 'P': + { + const char *stmt_name; + const char *query_string; + uint16 numParams; + Oid *paramTypes = NULL; + char **paramTypeNames = NULL; + + SetCurrentStatementStartTimestamp(); + + stmt_name = pq_getmsgstring(&msg); + query_string = pq_getmsgstring(&msg); + numParams = pq_getmsgint(&msg, 2); + if (numParams > 0) { + int i; + + paramTypes = (Oid *)palloc(numParams * sizeof(Oid)); + for (i = 0; i < numParams; i++) + paramTypes[i] = pq_getmsgint(&msg, 4); + } + /* If data left in message, read parameter names. */ + if (msg.cursor != msg.len) { + int i; + + paramTypeNames = (char **)palloc(numParams * sizeof(char *)); + for (i = 0; i < numParams; i++) + paramTypeNames[i] = (char *)pq_getmsgstring(&msg); + } + pq_getmsgend(&msg); + + exec_parse_message(query_string, stmt_name, paramTypes, paramTypeNames, (int)numParams); + break; + } + case 'Q': + { + const char *sql; + int save_log_statement; + bool save_log_duration; + int save_log_min_duration_statement; + + sql = pq_getmsgstring(&msg); + pq_getmsgend(&msg); + + /* XXX room for improvement */ + save_log_statement = u_sess->attr.attr_common.log_statement; + save_log_duration = u_sess->attr.attr_sql.log_duration; + save_log_min_duration_statement = u_sess->attr.attr_storage.log_min_duration_statement; + + check_client_encoding_hook = autonomous_check_client_encoding_hook; + u_sess->attr.attr_common.log_statement = LOGSTMT_NONE; + u_sess->attr.attr_sql.log_duration = false; + u_sess->attr.attr_storage.log_min_duration_statement = -1; + + SetCurrentStatementStartTimestamp(); + exec_simple_query(sql, QUERY_MESSAGE); + + u_sess->attr.attr_common.log_statement = save_log_statement; + u_sess->attr.attr_sql.log_duration = save_log_duration; + u_sess->attr.attr_storage.log_min_duration_statement = save_log_min_duration_statement; + check_client_encoding_hook = NULL; + + ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput); + break; + } + case 'S': + { + pq_getmsgend(&msg); + finish_xact_command(); + ReadyForQuery((CommandDest)t_thrd.postgres_cxt.whereToSendOutput); + break; + } + case 'X': + break; + default: + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid protocol message type from autonomous session leader: %c", + msgtype))); + break; + } + } + while (msgtype != 'X'); +} + +static void shm_mq_receive_stringinfo(shm_mq_handle *qh, StringInfoData *msg) +{ + shm_mq_result res; + Size nbytes = 0; + void *data = NULL; + + res = shm_mq_receive(qh, &nbytes, &data, false); + if (res != SHM_MQ_SUCCESS) + elog(ERROR, "shm_mq_receive failed: %d", res); + initStringInfo(msg); + appendBinaryStringInfo(msg, (const char*)data, (int)nbytes); +} + +static void autonomous_check_client_encoding_hook(void) +{ + elog(ERROR, "cannot set client encoding in autonomous session"); +} + +static TupleDesc TupleDesc_from_RowDescription(StringInfo msg) +{ + TupleDesc tupdesc; + int16 natts = pq_getmsgint(msg, 2); + int16 i; + + tupdesc = CreateTemplateTupleDesc(natts, false); + for (i = 0; i < natts; i++) { + const char *colname; + Oid type_oid; + uint32 typmod; + uint16 format; + + colname = pq_getmsgstring(msg); + (void) pq_getmsgint(msg, 4); /* table OID */ + (void) pq_getmsgint(msg, 2); /* table attnum */ + type_oid = pq_getmsgint(msg, 4); + (void) pq_getmsgint(msg, 2); /* type length */ + typmod = pq_getmsgint(msg, 4); + format = pq_getmsgint(msg, 2); + (void) format; +#ifdef TODO + /* XXX The protocol sometimes sends 0 (text) if the format is not + * determined yet. We always use binary, so this check is probably + * not useful. */ + if (format != 1) + elog(ERROR, "format must be binary"); +#endif + + TupleDescInitEntry(tupdesc, i + 1, colname, type_oid, typmod, 0); + } + return tupdesc; +} + +static HeapTuple HeapTuple_from_DataRow(TupleDesc tupdesc, StringInfo msg) +{ + int16 natts = pq_getmsgint(msg, 2); + int16 i; + Datum *values; + bool *nulls; + StringInfoData buf; + + Assert(tupdesc); + + if (natts != tupdesc->natts) + elog(ERROR, "malformed DataRow"); + + values = (Datum *)palloc(natts * sizeof(*values)); + nulls = (bool *)palloc(natts * sizeof(*nulls)); + initStringInfo(&buf); + + for (i = 0; i < natts; i++) { + int32 len = pq_getmsgint(msg, 4); + + if (len < 0) + nulls[i] = true; + else { + Oid recvid; + Oid typioparams; + + nulls[i] = false; + + getTypeBinaryInputInfo(tupdesc->attrs[i]->atttypid, + &recvid, + &typioparams); + resetStringInfo(&buf); + appendBinaryStringInfo(&buf, pq_getmsgbytes(msg, len), len); + values[i] = OidReceiveFunctionCall(recvid, &buf, typioparams, + tupdesc->attrs[i]->atttypmod); + } + } + + return heap_form_tuple(tupdesc, values, nulls); +} + +static void forward_NotifyResponse(StringInfo msg) +{ + int32 pid; + const char *channel; + const char *payload; + + pid = (int32)pq_getmsgint(msg, 4); + channel = pq_getmsgrawstring(msg); + payload = pq_getmsgrawstring(msg); + pq_endmessage(msg); + + NotifyMyFrontEnd(channel, payload, pid); +} + + +static void rethrow_errornotice(StringInfo msg) +{ + ErrorData edata; + + pq_parse_errornotice(msg, &edata); + edata.elevel = Min(edata.elevel, ERROR); + ThrowErrorData(&edata); +} + + +static void invalid_protocol_message(char msgtype) +{ + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid protocol message type from autonomous session: %c", + msgtype))); +} + diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp index 3a3c8b119f..737217687d 100755 --- a/src/gausskernel/process/tcop/postgres.cpp +++ b/src/gausskernel/process/tcop/postgres.cpp @@ -203,7 +203,7 @@ static void get_query_result(TupleTableSlot* slot, DestReceiver* self); * @hdfs * Define different mesage type used for exec_simple_query */ -typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType; +//typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType; /* ---------------------------------------------------------------- * decls for routines only used in this file @@ -235,6 +235,8 @@ extern void CancelAutoAnalyze(); extern List* RevalidateCachedQuery(CachedPlanSource* plansource); static void InitRecursiveCTEGlobalVariables(const PlannedStmt* planstmt); +THR_LOCAL bool needEnd = true; + bool StreamThreadAmI() { return (t_thrd.role == STREAM_WORKER); @@ -1874,7 +1876,7 @@ void exec_init_poolhandles(void) * hybridmesage, this parameter will be set to 1 to tell us the normal query string * followed by information string. query_string = normal querystring + message. */ -static void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg = NULL) +void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg) { CommandDest dest = (CommandDest)t_thrd.postgres_cxt.whereToSendOutput; MemoryContext oldcontext; @@ -2883,7 +2885,7 @@ static void exec_plan_with_params(StringInfo input_message) * If paramTypeNames is specified, paraTypes is filled with corresponding OIDs. * The caller is expected to allocate space for the paramTypes. */ -static void exec_parse_message(const char* query_string, /* string to execute */ +void exec_parse_message(const char* query_string, /* string to execute */ const char* stmt_name, /* name for prepared stmt */ Oid* paramTypes, /* parameter types */ char** paramTypeNames, /* parameter type names */ @@ -3104,7 +3106,7 @@ static void exec_parse_message(const char* query_string, /* string to execute */ if (u_sess->attr.attr_common.log_parser_stats) ResetUsage(); - query = parse_analyze_varparams(raw_parse_tree, query_string, ¶mTypes, &numParams); + query = parse_analyze_varparams(raw_parse_tree, query_string, ¶mTypes, &numParams, paramTypeNames); /* check cross engine queries */ StorageEngineType storageEngineType = SE_TYPE_UNSPECIFIED; @@ -3766,7 +3768,7 @@ static void exec_get_ddl_params(StringInfo input_message) * * Process a "Bind" message to create a portal from a prepared statement */ -static void exec_bind_message(StringInfo input_message) +void exec_bind_message(StringInfo input_message) { const char* portal_name = NULL; const char* stmt_name = NULL; @@ -4325,7 +4327,7 @@ static void exec_bind_message(StringInfo input_message) * * Process an "Execute" message for a portal */ -static void exec_execute_message(const char* portal_name, long max_rows) +void exec_execute_message(const char* portal_name, long max_rows) { CommandDest dest; DestReceiver* receiver = NULL; @@ -4790,7 +4792,7 @@ static int errdetail_recovery_conflict(void) * * Process a "Describe" message for a prepared statement */ -static void exec_describe_statement_message(const char* stmt_name) +void exec_describe_statement_message(const char* stmt_name) { CachedPlanSource* psrc = NULL; int i; diff --git a/src/gausskernel/storage/ipc/Makefile b/src/gausskernel/storage/ipc/Makefile index 010ea8de9a..4b09ca2ce3 100644 --- a/src/gausskernel/storage/ipc/Makefile +++ b/src/gausskernel/storage/ipc/Makefile @@ -17,6 +17,6 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \ - sinval.o sinvaladt.o standby.o shm_mq.o + sinval.o sinvaladt.o standby.o shm_mq.o shm_toc.o -include $(top_srcdir)/src/gausskernel/common.mk +include $(top_srcdir)/src/gausskernel/common.mk \ No newline at end of file diff --git a/src/gausskernel/storage/ipc/shm_toc.cpp b/src/gausskernel/storage/ipc/shm_toc.cpp new file mode 100644 index 0000000000..dcf6cbad31 --- /dev/null +++ b/src/gausskernel/storage/ipc/shm_toc.cpp @@ -0,0 +1,242 @@ +/*------------------------------------------------------------------------- + * + * shm_toc.cpp + * shared memory segment table of contents + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/gausskernel/storage/ipc/shm_toc.cpp + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "storage/barrier.h" +#include "storage/shm_toc.h" +#include "storage/spin.h" + +struct shm_toc_entry +{ + uint64 key; /* Arbitrary identifier */ + uint64 offset; /* Bytes offset */ +}; + +struct shm_toc +{ + uint64 toc_magic; /* Magic number for this TOC */ + slock_t toc_mutex; /* Spinlock for mutual exclusion */ + Size toc_total_bytes; /* Bytes managed by this TOC */ + Size toc_allocated_bytes; /* Bytes allocated of those managed */ + Size toc_nentry; /* Number of entries in TOC */ + shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Initialize a region of shared memory with a table of contents. + */ +shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes) +{ + shm_toc *toc = (shm_toc *) address; + + Assert(nbytes > offsetof(shm_toc, toc_entry)); + toc->toc_magic = magic; + SpinLockInit(&toc->toc_mutex); + toc->toc_total_bytes = nbytes; + toc->toc_allocated_bytes = 0; + toc->toc_nentry = 0; + + return toc; +} + +/* + * Attach to an existing table of contents. If the magic number found at + * the target address doesn't match our expectations, returns NULL. + */ +extern shm_toc *shm_toc_attach(uint64 magic, void *address) +{ + shm_toc *toc = (shm_toc *) address; + + if (toc->toc_magic != magic) + return NULL; + + Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes); + Assert(toc->toc_total_bytes >= offsetof(shm_toc, toc_entry)); + + return toc; +} + +/* + * Allocate shared memory from a segment managed by a table of contents. + * + * This is not a full-blown allocator; there's no way to free memory. It's + * just a way of dividing a single physical shared memory segment into logical + * chunks that may be used for different purposes. + * + * We allocated backwards from the end of the segment, so that the TOC entries + * can grow forward from the start of the segment. + */ +extern void *shm_toc_allocate(shm_toc *toc, Size nbytes) +{ + volatile shm_toc *vtoc = toc; + Size total_bytes; + Size allocated_bytes; + Size nentry; + Size toc_bytes; + + /* Make sure request is well-aligned. */ + nbytes = BUFFERALIGN(nbytes); + + SpinLockAcquire(&toc->toc_mutex); + + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry) + + allocated_bytes; + + /* Check for memory exhaustion and overflow. */ + if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes) + { + SpinLockRelease(&toc->toc_mutex); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"))); + } + vtoc->toc_allocated_bytes += nbytes; + + SpinLockRelease(&toc->toc_mutex); + + return ((char *) toc) + (total_bytes - allocated_bytes - nbytes); +} + +/* + * Return the number of bytes that can still be allocated. + */ +extern Size shm_toc_freespace(shm_toc *toc) +{ + volatile shm_toc *vtoc = toc; + Size total_bytes; + Size allocated_bytes; + Size nentry; + Size toc_bytes; + + SpinLockAcquire(&toc->toc_mutex); + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + SpinLockRelease(&toc->toc_mutex); + + toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry); + Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); + return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); +} + +/* + * Insert a TOC entry. + * + * The idea here is that process setting up the shared memory segment will + * register the addresses of data structures within the segment using this + * function. Each data structure will be identified using a 64-bit key, which + * is assumed to be a well-known or discoverable integer. Other processes + * accessing the shared memory segment can pass the same key to + * shm_toc_lookup() to discover the addresses of those data structures. + * + * Since the shared memory segment may be mapped at different addresses within + * different backends, we store relative rather than absolute pointers. + * + * This won't scale well to a large number of keys. Hopefully, that isn't + * necessary; if it proves to be, we might need to provide a more sophisticated + * data structure here. But the real idea here is just to give someone mapping + * a dynamic shared memory the ability to find the bare minimum number of + * pointers that they need to bootstrap. If you're storing a lot of stuff in + * here, you're doing it wrong. + */ +void +shm_toc_insert(shm_toc *toc, uint64 key, void *address) +{ + volatile shm_toc *vtoc = toc; + uint64 total_bytes; + uint64 allocated_bytes; + uint64 nentry; + uint64 toc_bytes; + uint64 offset; + + /* Relativize pointer. */ + Assert(address > (void *) toc); + offset = ((char *) address) - (char *) toc; + + SpinLockAcquire(&toc->toc_mutex); + + total_bytes = vtoc->toc_total_bytes; + allocated_bytes = vtoc->toc_allocated_bytes; + nentry = vtoc->toc_nentry; + toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry) + + allocated_bytes; + + /* Check for memory exhaustion and overflow. */ + if (toc_bytes + sizeof(shm_toc_entry) > total_bytes || + toc_bytes + sizeof(shm_toc_entry) < toc_bytes) + { + SpinLockRelease(&toc->toc_mutex); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"))); + } + + Assert(offset < total_bytes); + vtoc->toc_entry[nentry].key = key; + vtoc->toc_entry[nentry].offset = offset; + + /* + * By placing a write barrier after filling in the entry and before + * updating the number of entries, we make it safe to read the TOC + * unlocked. + */ + pg_write_barrier(); + + vtoc->toc_nentry++; + + SpinLockRelease(&toc->toc_mutex); +} + +/* + * Look up a TOC entry. + * + * Unlike the other functions in this file, this operation acquires no lock; + * it uses only barriers. It probably wouldn't hurt concurrency very much even + * if it did get a lock, but since it's reasonably likely that a group of + * worker processes could each read a series of entries from the same TOC + * right around the same time, there seems to be some value in avoiding it. + */ +void *shm_toc_lookup(shm_toc *toc, uint64 key) +{ + uint64 nentry; + uint64 i; + + /* Read the number of entries before we examine any entry. */ + nentry = toc->toc_nentry; + pg_read_barrier(); + + /* Now search for a matching entry. */ + for (i = 0; i < nentry; ++i) + if (toc->toc_entry[i].key == key) + return ((char *) toc) + toc->toc_entry[i].offset; + + /* No matching entry was found. */ + return NULL; +} + +/* + * Estimate how much shared memory will be required to store a TOC and its + * dependent data structures. + */ +Size +shm_toc_estimate(shm_toc_estimator *e) +{ + return add_size(offsetof(shm_toc, toc_entry), + add_size(mul_size(e->number_of_keys, sizeof(shm_toc_entry)), + e->space_for_chunks)); +} + diff --git a/src/include/commands/async.h b/src/include/commands/async.h index a109973968..adb8e4b484 100755 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -27,6 +27,7 @@ extern Size AsyncShmemSize(void); extern void AsyncShmemInit(void); /* notify-related SQL statements */ +extern void NotifyMyFrontEnd(const char* channel, const char* payload, int32 srcPid); extern void Async_Notify(const char* channel, const char* payload); extern void Async_Listen(const char* channel); extern void Async_Unlisten(const char* channel); diff --git a/src/include/commands/variable.h b/src/include/commands/variable.h index 06e93e73fb..8cb4b1a963 100755 --- a/src/include/commands/variable.h +++ b/src/include/commands/variable.h @@ -28,6 +28,7 @@ extern bool check_transaction_deferrable(bool* newval, void** extra, GucSource s extern bool check_random_seed(double* newval, void** extra, GucSource source); extern void assign_random_seed(double newval, void* extra); extern const char* show_random_seed(void); +extern void (*check_client_encoding_hook)(void); extern bool check_client_encoding(char** newval, void** extra, GucSource source); extern void assign_client_encoding(const char* newval, void* extra); extern bool check_mix_replication_param(bool* newval, void** extra, GucSource source); diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h index acd6a56371..3c7ce32cb4 100755 --- a/src/include/parser/analyze.h +++ b/src/include/parser/analyze.h @@ -24,7 +24,7 @@ extern THR_LOCAL PGDLLIMPORT post_parse_analyze_hook_type post_parse_analyze_hoo extern Query* parse_analyze(Node* parseTree, const char* sourceText, Oid* paramTypes, int numParams, bool isFirstNode = true, bool isCreateView = false); -extern Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams); +extern Query* parse_analyze_varparams(Node* parseTree, const char* sourceText, Oid** paramTypes, int* numParams, char** paramTypeNames); extern Query* parse_sub_analyze(Node* parseTree, ParseState* parentParseState, CommonTableExpr* parentCTE, bool locked_from_parent, bool resolve_unknowns); diff --git a/src/include/parser/parse_param.h b/src/include/parser/parse_param.h index 2798a0ad91..5f5788d802 100644 --- a/src/include/parser/parse_param.h +++ b/src/include/parser/parse_param.h @@ -16,7 +16,7 @@ #include "parser/parse_node.h" extern void parse_fixed_parameters(ParseState* pstate, Oid* paramTypes, int numParams); -extern void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams); +extern void parse_variable_parameters(ParseState* pstate, Oid** paramTypes, int* numParams, char** paramTypeNames); extern void check_variable_parameters(ParseState* pstate, Query* query); extern bool query_contains_extern_params(Query* query); diff --git a/src/include/postgres.h b/src/include/postgres.h index f3cbff2a3c..9cf80535b7 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -241,6 +241,8 @@ typedef enum { SKEW_OPT_OFF, SKEW_OPT_NORMAL, SKEW_OPT_LAZY } SkewStrategy; typedef enum { RESOURCE_TRACK_NONE, RESOURCE_TRACK_QUERY, RESOURCE_TRACK_OPERATOR } ResourceTrackOption; +typedef enum { QUERY_MESSAGE = 0, HYBRID_MESSAGE } MessageType; + typedef enum { CODEGEN_PARTIAL, /* allow to call c-function in codegen */ CODEGEN_PURE /* do not allow to call c-function in codegen */ diff --git a/src/include/storage/shm_toc.h b/src/include/storage/shm_toc.h new file mode 100644 index 0000000000..eb7c4d7502 --- /dev/null +++ b/src/include/storage/shm_toc.h @@ -0,0 +1,59 @@ +/*------------------------------------------------------------------------- + * + * shm_toc.h + * shared memory segment table of contents + * + * This is intended to provide a simple way to divide a chunk of shared + * memory (probably dynamic shared memory allocated via dsm_create) into + * a number of regions and keep track of the addresses of those regions or + * key data structures within those regions. This is not intended to + * scale to a large number of keys and will perform poorly if used that + * way; if you need a large number of pointers, store them within some + * other data structure within the segment and only put the pointer to + * the data structure itself in the table of contents. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/shm_toc.h + * + *------------------------------------------------------------------------- + */ +#ifndef SHM_TOC_H +#define SHM_TOC_H + +#include "storage/shmem.h" + +struct shm_toc; +typedef struct shm_toc shm_toc; +struct shm_toc_entry; +typedef struct shm_toc_entry shm_toc_entry; + +extern shm_toc *shm_toc_create(uint64 magic, void *address, Size nbytes); +extern shm_toc *shm_toc_attach(uint64 magic, void *address); +extern void *shm_toc_allocate(shm_toc *toc, Size nbytes); +extern Size shm_toc_freespace(shm_toc *toc); +extern void shm_toc_insert(shm_toc *toc, uint64 key, void *address); +extern void *shm_toc_lookup(shm_toc *toc, uint64 key); + +/* + * Tools for estimating how large a chunk of shared memory will be needed + * to store a TOC and its dependent objects. + */ +typedef struct +{ + Size space_for_chunks; + Size number_of_keys; +} shm_toc_estimator; + +#define shm_toc_initialize_estimator(e) \ + ((e)->space_for_chunks = 0, (e)->number_of_keys = 0) +#define shm_toc_estimate_chunk(e, sz) \ + ((e)->space_for_chunks = add_size((e)->space_for_chunks, \ + BUFFERALIGN((sz)))) +#define shm_toc_estimate_keys(e, cnt) \ + ((e)->number_of_keys = add_size((e)->number_of_keys, (cnt))) + +extern Size shm_toc_estimate(shm_toc_estimator *); + +#endif /* SHM_TOC_H */ diff --git a/src/include/tcop/autonomous.h b/src/include/tcop/autonomous.h new file mode 100644 index 0000000000..9a59cf416b --- /dev/null +++ b/src/include/tcop/autonomous.h @@ -0,0 +1,43 @@ +/*-------------------------------------------------------------------------- + * + * autonomous.h + * Run SQL commands using a background worker. + * + * Copyright (C) 2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/tcop/autonomous.h + * + * ------------------------------------------------------------------------- + */ +#ifndef AUTONOMOUS_H +#define AUTONOMOUS_H + +#include "access/tupdesc.h" +#include "nodes/pg_list.h" + +struct AutonomousSession; +typedef struct AutonomousSession AutonomousSession; + +struct AutonomousPreparedStatement; +typedef struct AutonomousPreparedStatement AutonomousPreparedStatement; + +struct autonomous_session_fixed_data; +typedef struct autonomous_session_fixed_data autonomous_session_fixed_data; + +typedef struct AutonomousResult +{ + TupleDesc tupdesc; + List *tuples; + char *command; +} AutonomousResult; + +AutonomousSession *AutonomousSessionStart(void); +void AutonomousSessionEnd(AutonomousSession *session); +AutonomousResult *AutonomousSessionExecute(AutonomousSession *session, const char *sql); +AutonomousPreparedStatement *AutonomousSessionPrepare(AutonomousSession *session, const char *sql, int16 nargs, + Oid argtypes[], const char *argnames[]); +AutonomousResult *AutonomousSessionExecutePrepared(AutonomousPreparedStatement *stmt, int16 nargs, Datum *values, bool *nulls); +extern void autonomous_worker_main(Datum main_arg); + +#endif /* AUTONOMOUS_H */ diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index e9cf57d6f0..871c6009ae 100755 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -23,6 +23,7 @@ #include "nodes/parsenodes.h" #include "storage/procsignal.h" #include "utils/guc.h" +#include "postgres.h" /* Required daylight between max_stack_depth and the kernel limit, in bytes */ #define STACK_DEPTH_SLOP (640 * 1024L) @@ -67,5 +68,10 @@ extern int check_log_duration(char* msec_str, bool was_logged); extern void set_debug_options(int debug_flag, GucContext context, GucSource source); extern bool set_plan_disabling_options(const char* arg, GucContext context, GucSource source); extern const char* get_stats_option_name(const char* arg); +extern void exec_simple_query(const char* query_string, MessageType messageType, StringInfo msg = NULL); +extern void exec_parse_message(const char* query_string, const char* stmt_name, Oid* paramTypes, char** paramTypeNames, int numParams); +extern void exec_bind_message(StringInfo input_message); +extern void exec_execute_message(const char *portal_name, long max_rows); +extern void exec_describe_statement_message(const char *stmt_name); #endif /* TCOPPROT_H */ diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index 737c908b24..6532c407be 100755 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -495,6 +495,7 @@ extern void UpdateErrorData(ErrorData* edata, ErrorData* newData); extern void FreeErrorData(ErrorData* edata); extern void FlushErrorState(void); extern void FlushErrorStateWithoutDeleteChildrenContext(void); +extern void ThrowErrorData(ErrorData *edata); extern void ReThrowError(ErrorData* edata) __attribute__((noreturn)); extern void pg_re_throw(void) __attribute__((noreturn)); diff --git a/src/include/utils/plpgsql.h b/src/include/utils/plpgsql.h index 898ad986d1..9a1b625394 100755 --- a/src/include/utils/plpgsql.h +++ b/src/include/utils/plpgsql.h @@ -21,6 +21,7 @@ #include "catalog/namespace.h" #include "commands/trigger.h" #include "executor/spi.h" +#include "tcop/autonomous.h" /********************************************************************** * Definitions @@ -380,6 +381,7 @@ typedef struct PLpgSQL_stmt_block { /* Block of statements */ int cmd_type; int lineno; char* label; + bool autonomous; List* body; /* List of statements */ int n_initvars; int* initvarnos; @@ -776,7 +778,7 @@ typedef struct PLpgSQL_execstate { /* Runtime execution data */ MemoryContext tuple_store_cxt; ResourceOwner tuple_store_owner; ReturnSetInfo* rsi; - + AutonomousSession *autonomous_session; int found_varno; /* diff --git a/src/test/regress/expected/autonomous_transaction.out b/src/test/regress/expected/autonomous_transaction.out new file mode 100755 index 0000000000..859fd3fab3 --- /dev/null +++ b/src/test/regress/expected/autonomous_transaction.out @@ -0,0 +1,351 @@ +create table at_tb2(id int, val varchar(20)); +create or replace function at_test2(i int) returns integer +LANGUAGE plpgsql +as $$ +declare +pragma autonomous_transaction; +begin +START TRANSACTION; +insert into at_tb2 values(1, 'before s1'); +if i > 10 then +rollback; +else +commit; +end if; +return i; +end; +$$; +select at_test2(15); + at_test2 +---------- + 15 +(1 row) + +select * from at_tb2; + id | val +----+----- +(0 rows) + +select at_test2(5); + at_test2 +---------- + 5 +(1 row) + +select * from at_tb2; + id | val +----+----------- + 1 | before s1 +(1 row) + +truncate table at_tb2; +create or replace procedure at_test3(i int) +AS +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + insert into at_tb2 values(1, 'before s1'); + insert into at_tb2 values(2, 'after s1'); + if i > 10 then + rollback; + else + commit; + end if; +end; +/ +call at_test3(6); + at_test3 +---------- + +(1 row) + +select * from at_tb2; + id | val +----+----------- + 1 | before s1 + 2 | after s1 +(2 rows) + +truncate table at_tb2; +create or replace procedure at_test4(i int) +AS +DECLARE +BEGIN + insert into at_tb2 values(3, 'klk'); + PERFORM at_test3(6); + insert into at_tb2 values(4, 'klk'); + PERFORM at_test3(15); +end; +/ +select at_test4(6); + at_test4 +---------- + +(1 row) + +select * from at_tb2; + id | val +----+----------- + 3 | klk + 1 | before s1 + 2 | after s1 + 4 | klk +(4 rows) + +truncate table at_tb2; +DECLARE +begin +insert into at_tb2 values(1, 'begin'); +PERFORM at_test3(6); +end; +/ +select * from at_tb2; + id | val +----+----------- + 1 | begin + 1 | before s1 + 2 | after s1 +(3 rows) + +truncate table at_tb2; +begin; +insert into at_tb2 values(1, 'begin'); +select * from at_tb2; + id | val +----+------- + 1 | begin +(1 row) + +call at_test3(6); + at_test3 +---------- + +(1 row) + +select * from at_tb2; + id | val +----+----------- + 1 | begin + 1 | before s1 + 2 | after s1 +(3 rows) + +rollback; +select * from at_tb2; + id | val +----+----------- + 1 | before s1 + 2 | after s1 +(2 rows) + +create table at_test1 (a int); +create or replace procedure autonomous_test() +AS +declare +PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; +end; +/ +truncate table at_test1; +begin; +insert into at_test1 values(1); +select * from at_test1; + a +--- + 1 +(1 row) + +call autonomous_test(); + autonomous_test +----------------- + +(1 row) + +select * from at_test1; + a +--- + 1 + 0 + 2 + 4 + 6 + 8 +(6 rows) + +rollback; +select * from at_test1; + a +--- + 0 + 2 + 4 + 6 + 8 +(5 rows) + +create or replace function autonomous_test2() returns integer +LANGUAGE plpgsql +as $$ +declare +PRAGMA AUTONOMOUS_TRANSACTION; +begin +START TRANSACTION; +for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; + return 42; + end; +$$; +truncate table at_test1; +begin; +insert into at_test1 values(20); +select * from at_test1; + a +---- + 20 +(1 row) + +select autonomous_test2(); + autonomous_test2 +------------------ + 42 +(1 row) + +select * from at_test1; + a +---- + 20 + 0 + 2 + 4 + 6 + 8 +(6 rows) + +rollback; +select * from at_test1; + a +--- + 0 + 2 + 4 + 6 + 8 +(5 rows) + +create or replace function autonomous_test3() returns text +LANGUAGE plpgsql +as $$ +declare +PRAGMA AUTONOMOUS_TRANSACTION; +begin +START TRANSACTION; +for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; + return 'autonomous_test3 end'; + end; +$$; +truncate table at_test1; +begin; +insert into at_test1 values(30); +select * from at_test1; + a +---- + 30 +(1 row) + +select autonomous_test3(); + autonomous_test3 +---------------------- + autonomous_test3 end +(1 row) + +select * from at_test1; + a +---- + 30 + 0 + 2 + 4 + 6 + 8 +(6 rows) + +rollback; +select * from at_test1; + a +--- + 0 + 2 + 4 + 6 + 8 +(5 rows) + +CREATE TABLE cp_test1 (a int, b text); +CREATE TABLE cp_test2 (a int, b text); +CREATE TABLE cp_test3 (a int, b text); +CREATE OR REPLACE FUNCTION autonomous_cp() RETURNS integer +LANGUAGE plpgsql +AS $$ +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + insert into cp_test1 values(1,'a'),(2,'b'); + insert into cp_test2 values(1,'c'),(2,'d'); + with s1 as (select cp_test1.a, cp_test1.b from cp_test1 left join cp_test2 on cp_test1.a = cp_test2.a) insert into cp_test3 select * from s1; + COMMIT; + RETURN 42; +END; +$$; +select autonomous_cp(); + autonomous_cp +--------------- + 42 +(1 row) + +select * from cp_test3; + a | b +---+--- + 1 | a + 2 | b +(2 rows) + +CREATE TABLE tg_test1 (a int, b varchar(25), c timestamp, d int); +CREATE TABLE tg_test2 (a int, b varchar(25), c timestamp, d int); +CREATE OR REPLACE FUNCTION tri_insert_test2_func() RETURNS TRIGGER AS +$$ +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN +insert into tg_test2 values(new.a,new.b,new.c,new.d); +RETURN NEW; +commit; +END +$$ LANGUAGE PLPGSQL; +CREATE TRIGGER TG_TEST2_TEMP +before insert +ON tg_test1 +FOR EACH ROW +EXECUTE PROCEDURE tri_insert_test2_func(); +insert into tg_test1 values(1,'a','2020-08-13 09:00:00', 1); +ERROR: Un-support feature +DETAIL: Trigger doesnot support autonomous transaction +CONTEXT: PL/pgSQL function tri_insert_test2_func() line 4 at statement block + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 0b39cb6019..346a7a6d14 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -582,3 +582,6 @@ test: gtt_clean # procedure, Function Test test: create_procedure create_function pg_compatibility postgres_fdw + +# autonomous transaction Test +test: autonomous_transaction diff --git a/src/test/regress/sql/autonomous_transaction.sql b/src/test/regress/sql/autonomous_transaction.sql new file mode 100755 index 0000000000..e6593513a8 --- /dev/null +++ b/src/test/regress/sql/autonomous_transaction.sql @@ -0,0 +1,189 @@ +create table at_tb2(id int, val varchar(20)); +create or replace function at_test2(i int) returns integer +LANGUAGE plpgsql +as $$ +declare +pragma autonomous_transaction; +begin +START TRANSACTION; +insert into at_tb2 values(1, 'before s1'); +if i > 10 then +rollback; +else +commit; +end if; +return i; +end; +$$; +select at_test2(15); +select * from at_tb2; +select at_test2(5); +select * from at_tb2; + +truncate table at_tb2; +create or replace procedure at_test3(i int) +AS +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + insert into at_tb2 values(1, 'before s1'); + insert into at_tb2 values(2, 'after s1'); + if i > 10 then + rollback; + else + commit; + end if; +end; +/ +call at_test3(6); +select * from at_tb2; + +truncate table at_tb2; +create or replace procedure at_test4(i int) +AS +DECLARE +BEGIN + insert into at_tb2 values(3, 'klk'); + PERFORM at_test3(6); + insert into at_tb2 values(4, 'klk'); + PERFORM at_test3(15); +end; +/ +select at_test4(6); +select * from at_tb2; + +truncate table at_tb2; +DECLARE +begin +insert into at_tb2 values(1, 'begin'); +PERFORM at_test3(6); +end; +/ +select * from at_tb2; + +truncate table at_tb2; +begin; +insert into at_tb2 values(1, 'begin'); +select * from at_tb2; +call at_test3(6); +select * from at_tb2; +rollback; +select * from at_tb2; + +create table at_test1 (a int); +create or replace procedure autonomous_test() +AS +declare +PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; +end; +/ + +truncate table at_test1; +begin; +insert into at_test1 values(1); +select * from at_test1; +call autonomous_test(); +select * from at_test1; +rollback; +select * from at_test1; + + +create or replace function autonomous_test2() returns integer +LANGUAGE plpgsql +as $$ +declare +PRAGMA AUTONOMOUS_TRANSACTION; +begin +START TRANSACTION; +for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; + return 42; + end; +$$; +truncate table at_test1; +begin; +insert into at_test1 values(20); +select * from at_test1; +select autonomous_test2(); +select * from at_test1; +rollback; +select * from at_test1; + +create or replace function autonomous_test3() returns text +LANGUAGE plpgsql +as $$ +declare +PRAGMA AUTONOMOUS_TRANSACTION; +begin +START TRANSACTION; +for i in 0..9 loop + if i % 2 = 0 then + execute 'insert into at_test1 values ('||i::integer||')'; + end if; + end loop; + commit; + return 'autonomous_test3 end'; + end; +$$; +truncate table at_test1; +begin; +insert into at_test1 values(30); +select * from at_test1; +select autonomous_test3(); +select * from at_test1; +rollback; +select * from at_test1; + +CREATE TABLE cp_test1 (a int, b text); +CREATE TABLE cp_test2 (a int, b text); +CREATE TABLE cp_test3 (a int, b text); +CREATE OR REPLACE FUNCTION autonomous_cp() RETURNS integer +LANGUAGE plpgsql +AS $$ +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN + START TRANSACTION; + insert into cp_test1 values(1,'a'),(2,'b'); + insert into cp_test2 values(1,'c'),(2,'d'); + with s1 as (select cp_test1.a, cp_test1.b from cp_test1 left join cp_test2 on cp_test1.a = cp_test2.a) insert into cp_test3 select * from s1; + COMMIT; + RETURN 42; +END; +$$; +select autonomous_cp(); +select * from cp_test3; + +CREATE TABLE tg_test1 (a int, b varchar(25), c timestamp, d int); +CREATE TABLE tg_test2 (a int, b varchar(25), c timestamp, d int); +CREATE OR REPLACE FUNCTION tri_insert_test2_func() RETURNS TRIGGER AS +$$ +DECLARE + PRAGMA AUTONOMOUS_TRANSACTION; +BEGIN +insert into tg_test2 values(new.a,new.b,new.c,new.d); +RETURN NEW; +commit; +END +$$ LANGUAGE PLPGSQL; + +CREATE TRIGGER TG_TEST2_TEMP +before insert +ON tg_test1 +FOR EACH ROW +EXECUTE PROCEDURE tri_insert_test2_func(); +insert into tg_test1 values(1,'a','2020-08-13 09:00:00', 1); + -- Gitee From c235237a4c78b967d76869054236655722b58ed8 Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Tue, 25 Aug 2020 22:50:30 +0800 Subject: [PATCH 5/6] remove redundant log --- src/common/backend/utils/init/miscinit.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp index f19f3bd2ef..8eb2842083 100755 --- a/src/common/backend/utils/init/miscinit.cpp +++ b/src/common/backend/utils/init/miscinit.cpp @@ -728,7 +728,6 @@ void InitializeSessionUserId(const char* role_name, Oid role_id) { HeapTuple role_tup; Form_pg_authid rform; - //Oid role_id; char* rname = NULL; /* Audit user login */ char details[PGAUDIT_MAXLENGTH]; @@ -764,16 +763,14 @@ void InitializeSessionUserId(const char* role_name, Oid role_id) role_tup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(role_id)); if (!HeapTupleIsValid(role_tup)) { ereport(FATAL, - (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), - errmsg("role with OID %u does not exist", role_id))); + (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), + errmsg("role with OID %u does not exist", role_id))); } } rform = (Form_pg_authid)GETSTRUCT(role_tup); role_id = HeapTupleGetOid(role_tup); rname = NameStr(rform->rolname); - ereport(LOG, - (errmsg("InitializeSessionUserId role name: %s with OID %u", rname, role_id))); u_sess->misc_cxt.AuthenticatedUserId = role_id; u_sess->misc_cxt.AuthenticatedUserIsSuperuser = rform->rolsuper; -- Gitee From 9717117500b1e74ff2177003d8fdd9afada3a801 Mon Sep 17 00:00:00 2001 From: jiang_jianyu Date: Sat, 29 Aug 2020 10:41:31 +0800 Subject: [PATCH 6/6] forbidden nested autonomous transaction --- src/common/backend/libpq/pqmq.cpp | 7 +++++ src/common/pl/plpgsql/src/pl_exec.cpp | 26 +++++++++---------- src/gausskernel/process/tcop/autonomous.cpp | 20 +++++++++++--- .../process/threadpool/knl_thread.cpp | 7 +++++ src/include/knl/knl_thread.h | 8 ++++++ 5 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/common/backend/libpq/pqmq.cpp b/src/common/backend/libpq/pqmq.cpp index 274c285a0c..fd3197de2a 100644 --- a/src/common/backend/libpq/pqmq.cpp +++ b/src/common/backend/libpq/pqmq.cpp @@ -224,6 +224,13 @@ void pq_parse_errornotice(StringInfo msg, ErrorData *edata) case PG_DIAG_SEVERITY: /* ignore, trusting we'll get a nonlocalized version */ break; + case PG_DIAG_INTERNEL_ERRCODE: + /* ignore */ + break; + case PG_DIAG_MODULE_ID: + /* It is always MOD_MAX */ + edata->mod_id = MOD_MAX; + break; case PG_DIAG_SQLSTATE: if (strlen(value) != 5) { elog(ERROR, "invalid SQLSTATE: \"%s\"", value); diff --git a/src/common/pl/plpgsql/src/pl_exec.cpp b/src/common/pl/plpgsql/src/pl_exec.cpp index 15eca2aa7f..ac073f06d0 100755 --- a/src/common/pl/plpgsql/src/pl_exec.cpp +++ b/src/common/pl/plpgsql/src/pl_exec.cpp @@ -198,7 +198,6 @@ static int check_line_validity_in_for_query(PLpgSQL_stmt_forq* stmt, int, int); static void bind_cursor_with_portal(Portal portal, PLpgSQL_execstate *estate, int varno); static char* transform_anonymous_block(char* query); static bool need_recompile_plan(SPIPlanPtr plan); -static THR_LOCAL PLpgSQL_expr* sqlstmt = NULL; /* ---------- * plpgsql_check_line_validity Called by the debugger plugin for @@ -1420,6 +1419,9 @@ static int exec_stmt_block(PLpgSQL_execstate* estate, PLpgSQL_stmt_block* block) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Un-support feature"), errdetail("Trigger doesnot support autonomous transaction"))); + } else if (t_thrd.autonomous_cxt.isnested) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Un-support feature : Autonomous transaction doesnot support nesting"))); } else { estate->autonomous_session = AutonomousSessionStart(); } @@ -3905,7 +3907,7 @@ static int exec_stmt_execsql(PLpgSQL_execstate* estate, PLpgSQL_stmt_execsql* st Datum *values = NULL; bool *nulls = NULL; AutonomousResult *aresult = NULL; - sqlstmt = stmt->sqlstmt; + t_thrd.autonomous_cxt.sqlstmt = stmt->sqlstmt; build_symbol_table(estate, stmt->sqlstmt->ns, &nparams, ¶m_names, ¶m_types); astmt = AutonomousSessionPrepare(estate->autonomous_session, stmt->sqlstmt->query, (int16)nparams, param_types, param_names); @@ -5083,7 +5085,7 @@ static int exec_stmt_null(PLpgSQL_execstate* estate, PLpgSQL_stmt* stmt) static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt) { if (estate->autonomous_session) { - if (sqlstmt) { + if (t_thrd.autonomous_cxt.sqlstmt) { int nparams = 0; int i; const char **param_names = NULL; @@ -5093,7 +5095,7 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt bool *nulls = NULL; AutonomousResult *aresult = NULL; ereport(LOG, (errmsg("query COMMIT"))); - build_symbol_table(estate, sqlstmt->ns, &nparams, ¶m_names, ¶m_types); + build_symbol_table(estate, t_thrd.autonomous_cxt.sqlstmt->ns, &nparams, ¶m_names, ¶m_types); astmt = AutonomousSessionPrepare(estate->autonomous_session, "COMMIT", (int16)nparams, param_types, param_names); values = (Datum *)palloc(nparams * sizeof(*values)); @@ -5104,12 +5106,11 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt } aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls); exec_set_found(estate, (list_length(aresult->tuples) != 0)); - sqlstmt = NULL; + t_thrd.autonomous_cxt.sqlstmt = NULL; return PLPGSQL_RC_OK; } else { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("syntax error"), - errdetail("In antonomous transaction, commit/rollback must match start transaction"))); + errmsg("Syntax error: In antonomous transaction, commit/rollback must match start transaction"))); } } @@ -5176,7 +5177,7 @@ static int exec_stmt_commit(PLpgSQL_execstate* estate, PLpgSQL_stmt_commit* stmt static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* stmt) { if (estate->autonomous_session) { - if (sqlstmt) { + if (t_thrd.autonomous_cxt.sqlstmt) { int nparams = 0; int i; const char **param_names = NULL; @@ -5186,7 +5187,7 @@ static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* bool *nulls = NULL; AutonomousResult *aresult = NULL; ereport(LOG, (errmsg("query ROLLBACK"))); - build_symbol_table(estate, sqlstmt->ns, &nparams, ¶m_names, ¶m_types); + build_symbol_table(estate, t_thrd.autonomous_cxt.sqlstmt->ns, &nparams, ¶m_names, ¶m_types); astmt = AutonomousSessionPrepare(estate->autonomous_session, "ROLLBACK", (int16)nparams, param_types, param_names); values = (Datum *)palloc(nparams * sizeof(*values)); @@ -5197,13 +5198,12 @@ static int exec_stmt_rollback(PLpgSQL_execstate* estate, PLpgSQL_stmt_rollback* } aresult = AutonomousSessionExecutePrepared(astmt, (int16)nparams, values, nulls); exec_set_found(estate, (list_length(aresult->tuples) != 0)); - sqlstmt = NULL; + t_thrd.autonomous_cxt.sqlstmt = NULL; return PLPGSQL_RC_OK; } else { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("syntax error"), - errdetail("In antonomous transaction, commit/rollback must match start transaction"))); - } + errmsg("Syntax error: In antonomous transaction, commit/rollback must match start transaction"))); + } } const char* PORTAL = "Portal"; diff --git a/src/gausskernel/process/tcop/autonomous.cpp b/src/gausskernel/process/tcop/autonomous.cpp index 44b25d1d5d..e3f02bf6f1 100644 --- a/src/gausskernel/process/tcop/autonomous.cpp +++ b/src/gausskernel/process/tcop/autonomous.cpp @@ -65,6 +65,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/resowner.h" +#include "utils/ps_status.h" /* Table-of-contents constants for our dynamic shared memory segment. */ #define AUTONOMOUS_MAGIC 0x50674267 @@ -524,6 +525,8 @@ void autonomous_worker_main(Datum main_arg) (void)gspqsignal(SIGTERM, die); BackgroundWorkerUnblockSignals(); + t_thrd.autonomous_cxt.isnested = true; + /* Set up a memory context and resource owner. */ Assert(t_thrd.utils_cxt.CurrentResourceOwner == NULL); t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "autonomous"); @@ -533,7 +536,6 @@ void autonomous_worker_main(Datum main_arg) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - initStringInfo(&(*t_thrd.postgres_cxt.row_description_buf)); seg = (char *)DatumGetPointer(main_arg); if (seg == NULL) ereport(ERROR, @@ -586,9 +588,19 @@ void autonomous_worker_main(Datum main_arg) (void)MemoryContextSwitchTo(t_thrd.mem_cxt.msg_mem_cxt); MemoryContextResetAndDeleteChildren(t_thrd.mem_cxt.msg_mem_cxt); - ProcessCompletedNotifies(); - pgstat_report_stat(false); - pgstat_report_activity(STATE_IDLE, NULL); + if (IsAbortedTransactionBlockState()) { + set_ps_display("idle in transaction (aborted)", false); + pgstat_report_activity(STATE_IDLEINTRANSACTION_ABORTED, NULL); + } else if (IsTransactionOrTransactionBlock()) { + set_ps_display("idle in transaction", false); + pgstat_report_activity(STATE_IDLEINTRANSACTION, NULL); + } else { + ProcessCompletedNotifies(); + pgstat_report_stat(false); + + set_ps_display("idle", false); + pgstat_report_activity(STATE_IDLE, NULL); + } shm_mq_receive_stringinfo(command_qh, &msg); ereport(LOG, (errmsg("bgworker receive msg %s", msg.data))); diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index 1d0ca27a90..f5a026dd71 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1381,6 +1381,12 @@ static void knl_t_heartbeat_init(knl_t_heartbeat_context* heartbeat_cxt) heartbeat_cxt->state = NULL; } +static void knl_t_autonomous_init(knl_t_autonomous_context* autonomous_cxt) +{ + autonomous_cxt->isnested = false; + autonomous_cxt->sqlstmt = NULL; +} + static void knl_t_mot_init(knl_t_mot_context* mot_cxt) { mot_cxt->last_error_code = 0; @@ -1498,6 +1504,7 @@ void knl_thread_init(knl_thread_role role) knl_t_heartbeat_init(&t_thrd.heartbeat_cxt); knl_t_poolcleaner_init(&t_thrd.poolcleaner_cxt); knl_t_mot_init(&t_thrd.mot_cxt); + knl_t_autonomous_init(&t_thrd.autonomous_cxt); } void knl_thread_set_name(const char* name) diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 21a08dde19..a809352f29 100644 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -2657,6 +2657,13 @@ typedef struct knl_t_heartbeat_context { struct heartbeat_state* state; } knl_t_heartbeat_context; +/* autonomous_transaction */ +struct PLpgSQL_expr; +typedef struct knl_t_autonomous_context { + PLpgSQL_expr* sqlstmt; + bool isnested; +} knl_t_autonomous_context; + /* MOT thread attributes */ #define MOT_MAX_ERROR_MESSAGE 256 #define MOT_MAX_ERROR_FRAMES 32 @@ -2731,6 +2738,7 @@ typedef struct knl_thrd_context { knl_t_arch_context arch; knl_t_async_context asy_cxt; knl_t_audit_context audit; + knl_t_autonomous_context autonomous_cxt; knl_t_autovacuum_context autovacuum_cxt; knl_t_basebackup_context basebackup_cxt; knl_t_bgwriter_context bgwriter_cxt; -- Gitee