From 01b2be179fc2fb5245bb5fff2207da1fd19c3e1a Mon Sep 17 00:00:00 2001 From: Jiachen1018 Date: Thu, 7 Mar 2024 16:34:39 +0800 Subject: [PATCH 1/2] cbo part table share mem alloc --- storage/tianchi/tse_srv_mq_stub.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/tianchi/tse_srv_mq_stub.cc b/storage/tianchi/tse_srv_mq_stub.cc index a9243ff..4c8477a 100644 --- a/storage/tianchi/tse_srv_mq_stub.cc +++ b/storage/tianchi/tse_srv_mq_stub.cc @@ -789,7 +789,7 @@ int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats, uint32 return ERR_ALLOC_MEMORY; } for (uint i = 0; i < num_part_fetch; i++) { - req->stats->tse_cbo_stats_part_table[i].columns = part_columns + i; + req->stats->tse_cbo_stats_part_table[i].columns = part_columns + i * (stats->msg_len / sizeof(tse_cbo_stats_column_t)); } } -- Gitee From 2874738282627e4dd9e1669cee94d162431ed53b Mon Sep 17 00:00:00 2001 From: xicoding Date: Sun, 31 Mar 2024 18:29:35 +0800 Subject: [PATCH 2/2] add normal stats --- storage/tianchi/ha_tse.cc | 229 +++++++++++++++++----- storage/tianchi/ha_tse.h | 3 +- storage/tianchi/ha_tsepart.cc | 302 ++++++++++++++++++++--------- storage/tianchi/tse_cbo.cc | 226 +++++++++++++++++++-- storage/tianchi/tse_cbo.h | 20 +- storage/tianchi/tse_srv.h | 58 ++++++ storage/tianchi/tse_srv_mq_stub.cc | 115 +++++++---- 7 files changed, 753 insertions(+), 200 deletions(-) diff --git a/storage/tianchi/ha_tse.cc b/storage/tianchi/ha_tse.cc index 6819c62..6888432 100644 --- a/storage/tianchi/ha_tse.cc +++ b/storage/tianchi/ha_tse.cc @@ -214,6 +214,10 @@ static MYSQL_SYSVAR_UINT(update_analyze_time, ctc_update_analyze_time, PLUGIN_VA "CBO updating time by CTC.", nullptr, nullptr, CTC_ANALYZE_TIME_SEC, 0, UINT32_MAX, 0); +bool ctc_histgram_enabled = false; +static MYSQL_SYSVAR_BOOL(histgram_enabled, ctc_histgram_enabled, PLUGIN_VAR_RQCMDARG, + "If The Histgram enabled bt CTC.", nullptr, nullptr, true); + // All global and session system variables must be published to mysqld before // use. This is done by constructing a NULL-terminated array of the variables // and linking to it in the plugin public interface. @@ -234,6 +238,7 @@ static SYS_VAR *tse_system_variables[] = { MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(cluster_role), MYSQL_SYSVAR(update_analyze_time), + MYSQL_SYSVAR(histgram_enabled), nullptr }; @@ -3232,7 +3237,14 @@ EXTER_ATTACK int ha_tse::rnd_pos(uchar *buf, uchar *pos) { void ha_tse::info_low() { if (m_share && m_share->cbo_stats != nullptr) { - stats.records = m_share->cbo_stats->tse_cbo_stats_table.estimate_rows; + if (ctc_histgram_enabled && m_share->cbo_stats->hist_stats) { + stats.records = m_share->cbo_stats->hist_stats->tse_cbo_stats_table.estimate_rows; + return; + } + if (!ctc_histgram_enabled && m_share->cbo_stats->normal_stats){ + stats.records = m_share->cbo_stats->normal_stats->estimate_rows; + return; + } } } @@ -3847,11 +3859,28 @@ enum_alter_inplace_result ha_tse::check_if_supported_inplace_alter( return HA_ALTER_INPLACE_EXCLUSIVE_LOCK; } +void ha_tse::set_tse_normal_range_key(tse_normal_key *tse_key, key_range *mysql_range_key, tse_normal_cmp_type_t default_type) { + if (!mysql_range_key) { + tse_key->cmp_type = CMP_TYPE_UNKNOWN; + tse_key->len = 0; + return; + } + + tse_key->col_map = mysql_range_key->keypart_map; + tse_key->key = (const char *)mysql_range_key->key; + tse_key->len = mysql_range_key->length; + if (mysql_range_key->flag == HA_READ_KEY_EXACT) { + tse_key->cmp_type = CMP_TYPE_EQUAL; + } else { + tse_key->cmp_type = default_type; + } +} + /** @brief Construct tse range key based on mysql range key */ -void ha_tse::set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key) { +void ha_tse::set_tse_hist_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key) { if (!mysql_range_key) { tse_key->cmp_type = CMP_TYPE_NULL; tse_key->len = 0; @@ -3894,32 +3923,45 @@ void ha_tse::set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, boo ha_rows ha_tse::records_in_range(uint inx, key_range *min_key, key_range *max_key) { DBUG_TRACE; - tse_key tse_min_key; - tse_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, true); - set_tse_range_key(&tse_max_key, max_key, false); - if (tse_max_key.len < tse_min_key.len) { - tse_max_key.cmp_type = CMP_TYPE_NULL; - } else if (tse_max_key.len > tse_min_key.len) { - tse_min_key.cmp_type = CMP_TYPE_NULL; - } - tse_range_key key = {&tse_min_key, &tse_max_key}; - uint64_t n_rows = 0; double density; - - if (m_share) { - if (!m_share->cbo_stats->is_updated) { - tse_log_debug("table %s has not been analyzed", table->alias); - density = DEFAULT_RANGE_DENSITY; + if (ctc_histgram_enabled) { + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_hist_range_key(&tse_min_key, min_key, true); + set_tse_hist_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + tse_range_key key = {&tse_min_key, &tse_max_key}; + if (m_share) { + if (!m_share->cbo_stats->hist_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + density = DEFAULT_RANGE_DENSITY; + } + density = calc_hist_density_one_table(inx, &key, m_share->cbo_stats->hist_stats->tse_cbo_stats_table, *table); + /* + * This is a safe-guard logic since we don't handle tse call error in this method, + * we need this to make sure that our optimizer continue to work even when we + * miscalculated the density, and it's still prefer index read + */ + n_rows += m_share->cbo_stats->hist_stats->tse_cbo_stats_table.estimate_rows * density; } - density = calc_density_one_table(inx, &key, m_share->cbo_stats->tse_cbo_stats_table, *table); + } else { + tse_normal_key tse_min_key; + tse_normal_key tse_max_key; + set_tse_normal_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); + set_tse_normal_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); + part_info_t part_info = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; + double density = calc_normal_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_share->cbo_stats->normal_stats, *table); /* * This is a safe-guard logic since we don't handle tse call error in this method, * we need this to make sure that our optimizer continue to work even when we * miscalculated the density, and it's still prefer index read */ - n_rows += m_share->cbo_stats->tse_cbo_stats_table.estimate_rows * density; + n_rows += m_share->cbo_stats->normal_stats->estimate_rows * density; } /* @@ -5220,25 +5262,74 @@ int ha_tse::initialize_cbo_stats() if (!m_share || m_share->cbo_stats != nullptr) { return CT_SUCCESS; } - m_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); - if (m_share->cbo_stats == nullptr) { - tse_log_error("alloc shm mem failed, m_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); - return ERR_ALLOC_MEMORY; - } - *m_share->cbo_stats = {0, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + if (ctc_histgram_enabled) { + m_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); + if (m_share->cbo_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); + return ERR_ALLOC_MEMORY; + } + m_share->cbo_stats->hist_stats = (tse_hist_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tse_hist_cbo_stats_t), MYF(MY_WME)); + if (m_share->cbo_stats->hist_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_share->cbo_stats->hist_stats size(%lu)", sizeof(tse_hist_cbo_stats_t)); + my_free(m_share->cbo_stats); + return ERR_ALLOC_MEMORY; + } + *m_share->cbo_stats->hist_stats = {0, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + + m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns = + (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); + if (m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns == nullptr) { + tse_log_error("alloc shm mem failed, m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns size(%lu)", sizeof(tse_cbo_stats_column_t)); + my_free(m_share->cbo_stats->hist_stats); + my_free(m_share->cbo_stats); + return ERR_ALLOC_MEMORY; + } + THD* thd = ha_thd(); + if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { + tse_log_system("[alloc memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_stats(table)); + } + + m_share->cbo_stats->hist_stats->tse_cbo_stats_table.ndv_keys = + (uint32_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->keys * sizeof(uint32_t), MYF(MY_WME)); + if (m_share->cbo_stats->hist_stats->tse_cbo_stats_table.ndv_keys == nullptr) { + my_free(m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns); + my_free(m_share->cbo_stats->hist_stats); + my_free(m_share->cbo_stats); + return ERR_ALLOC_MEMORY; + } + m_share->cbo_stats->hist_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); + m_share->cbo_stats->hist_stats->key_len = table->s->keys * sizeof(uint32_t); + } else { + m_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t)); + if (m_share->cbo_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); + return ERR_ALLOC_MEMORY; + } + m_share->cbo_stats->normal_stats = (tse_normal_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tse_normal_cbo_stats_t)); + if (m_share->cbo_stats->normal_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_share->cbo_stats->normal_stats size(%lu)", sizeof(tse_normal_cbo_stats_t)); + return ERR_ALLOC_MEMORY; + } + *m_share->cbo_stats->normal_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0,{}}; + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.num_distincts = + (uint32_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(uint32_t)); - m_share->cbo_stats->tse_cbo_stats_table.columns = - (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); - THD* thd = ha_thd(); - if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { - tse_log_system("[alloc memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_stats(table)); + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.low_values = + (cache_n_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_n_variant_t)); + + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.high_values = + (cache_n_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_n_variant_t)); + + if (m_share->cbo_stats->normal_stats->tse_cbo_stats_table.num_distincts == nullptr + || m_share->cbo_stats->normal_stats->tse_cbo_stats_table.low_values == nullptr + || m_share->cbo_stats->normal_stats->tse_cbo_stats_table.high_values == nullptr) { + tse_log_error("alloc shm mem error, size(%lu)", + table->s->fields * sizeof(uint32_t) + 2 * table->s->fields * sizeof(cache_n_variant_t)); + free_cbo_stats(); + return ERR_ALLOC_MEMORY; + } } - - m_share->cbo_stats->tse_cbo_stats_table.ndv_keys = - (uint32_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->keys * sizeof(uint32_t), MYF(MY_WME)); - - m_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); - m_share->cbo_stats->key_len = table->s->keys * sizeof(uint32_t); + return CT_SUCCESS; } @@ -5260,11 +5351,23 @@ int ha_tse::get_cbo_stats_4share() } } update_member_tch(m_tch, tse_hton, thd); - ret = tse_get_cbo_stats(&m_tch, m_share->cbo_stats, 0, 0); + if (ctc_histgram_enabled) { + ret = tse_get_cbo_stats(&m_tch, m_share->cbo_stats, 0, 0); + } else { + ret = tse_get_normal_cbo_stats(&m_tch, m_share->cbo_stats); + } update_sess_ctx_by_tch(m_tch, tse_hton, thd); - if (ret == CT_SUCCESS && m_share->cbo_stats->is_updated) { - m_share->need_fetch_cbo = false; - tse_index_stats_update(table, m_share->cbo_stats); + if (ret == CT_SUCCESS) { + if (ctc_histgram_enabled) { + if (m_share->cbo_stats->hist_stats->is_updated) { + m_share->need_fetch_cbo = false; + tse_index_stats_update(table, m_share->cbo_stats->hist_stats); + } + } else { + if (m_share->cbo_stats->normal_stats->is_updated) { + m_share->need_fetch_cbo = false; + } + } } m_share->get_cbo_time = now; } @@ -5277,16 +5380,40 @@ void ha_tse::free_cbo_stats() if (!m_share || m_share->cbo_stats == nullptr) { return; } - THD* thd = ha_thd(); - if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { - tse_log_system("[free memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_stats(table)); - } - my_free((m_share->cbo_stats->tse_cbo_stats_table.ndv_keys)); - m_share->cbo_stats->tse_cbo_stats_table.ndv_keys = nullptr; - my_free((m_share->cbo_stats->tse_cbo_stats_table.columns)); - m_share->cbo_stats->tse_cbo_stats_table.columns = nullptr; - my_free((uchar *)(m_share->cbo_stats)); - m_share->cbo_stats = nullptr; + if (ctc_histgram_enabled) { + THD* thd = ha_thd(); + if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { + tse_log_system("[free memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_stats(table)); + } + my_free((m_share->cbo_stats->hist_stats->tse_cbo_stats_table.ndv_keys)); + m_share->cbo_stats->hist_stats->tse_cbo_stats_table.ndv_keys = nullptr; + my_free((m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns)); + m_share->cbo_stats->hist_stats->tse_cbo_stats_table.columns = nullptr; + my_free((uchar *)(m_share->cbo_stats->hist_stats)); + m_share->cbo_stats->hist_stats = nullptr; + my_free((uchar *)(m_share->cbo_stats)); + m_share->cbo_stats = nullptr; + } else { + if (m_share->cbo_stats->normal_stats->tse_cbo_stats_table.num_distincts != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->normal_stats->tse_cbo_stats_table.num_distincts); + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.num_distincts = nullptr; + } + + if (m_share->cbo_stats->normal_stats->tse_cbo_stats_table.low_values != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->normal_stats->tse_cbo_stats_table.low_values); + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.low_values = nullptr; + } + + if (m_share->cbo_stats->normal_stats->tse_cbo_stats_table.high_values != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->normal_stats->tse_cbo_stats_table.high_values); + m_share->cbo_stats->normal_stats->tse_cbo_stats_table.high_values = nullptr; + } + + tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->normal_stats); + m_share->cbo_stats->normal_stats = nullptr; + tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats); + m_share->cbo_stats = nullptr; + } } diff --git a/storage/tianchi/ha_tse.h b/storage/tianchi/ha_tse.h index eb4aa72..b0a80c4 100644 --- a/storage/tianchi/ha_tse.h +++ b/storage/tianchi/ha_tse.h @@ -683,8 +683,9 @@ public: int records(ha_rows *num_rows) override; int records_from_index(ha_rows *num_rows, uint inx) override; - void set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key); + void set_tse_hist_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key); + void set_tse_normal_range_key(tse_normal_key *tse_key, key_range *mysql_range_key, tse_normal_cmp_type_t default_type); /** @brief Used to delete a table. By the time delete_table() has been called all diff --git a/storage/tianchi/ha_tsepart.cc b/storage/tianchi/ha_tsepart.cc index 0a93cb3..399dd76 100644 --- a/storage/tianchi/ha_tsepart.cc +++ b/storage/tianchi/ha_tsepart.cc @@ -58,6 +58,7 @@ extern handlerton *get_tse_hton(); extern uint32_t ctc_update_analyze_time; +extern bool ctc_histgram_enabled; constexpr uint64 INVALID_VALUE64 = 0xFFFFFFFFFFFFFFFFULL; constexpr int max_prefetch_num = MAX_PREFETCH_REC_NUM; @@ -796,62 +797,103 @@ enum row_type ha_tsepart::get_partition_row_type(const dd::Table *partition_tabl void ha_tsepart::info_low() { stats.records = 0; - if (m_part_share->cbo_stats != nullptr) { + if (m_part_share->cbo_stats && m_part_share->cbo_stats->hist_stats != nullptr) { + uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; + part_id = m_part_info->get_next_used_partition(part_id)) { + stats.records += m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[part_id].estimate_rows; + } + } + if (m_part_share->cbo_stats && m_part_share->cbo_stats->normal_stats != nullptr) { uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : table->part_info->num_parts; for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; part_id = m_part_info->get_next_used_partition(part_id)) { - stats.records += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows; + stats.records += m_part_share->cbo_stats->normal_stats->estimate_part_rows_and_blocks[part_id]; } } } ha_rows ha_tsepart::records_in_range(uint inx, key_range *min_key, key_range *max_key) { - double density; - if (m_part_share && !m_part_share->cbo_stats->is_updated) { - tse_log_debug("table %s has not been analyzed", table->alias); - return 1; - } - - - tse_key tse_min_key; - tse_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, true); - set_tse_range_key(&tse_max_key, max_key, false); - tse_range_key key = {&tse_min_key, &tse_max_key}; - if (tse_max_key.len < tse_min_key.len) { - tse_max_key.cmp_type = CMP_TYPE_NULL; - } else if (tse_max_key.len > tse_min_key.len) { - tse_min_key.cmp_type = CMP_TYPE_NULL; - } - - uint64_t n_rows_num = 0; - uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : - table->part_info->num_parts; - -for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; - part_id = m_part_info->get_next_used_partition(part_id)) { + if (ctc_histgram_enabled) { + double density; + if (m_part_share && !m_part_share->cbo_stats->hist_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + return 1; + } - set_tse_range_key(&tse_min_key, min_key, true); - set_tse_range_key(&tse_max_key, max_key, false); + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_hist_range_key(&tse_min_key, min_key, true); + set_tse_hist_range_key(&tse_max_key, max_key, false); + tse_range_key key = {&tse_min_key, &tse_max_key}; if (tse_max_key.len < tse_min_key.len) { tse_max_key.cmp_type = CMP_TYPE_NULL; } else if (tse_max_key.len > tse_min_key.len) { tse_min_key.cmp_type = CMP_TYPE_NULL; } - density = calc_density_one_table(inx, &key, m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id], *table); - n_rows_num += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows * density; - } - /* - * The MySQL optimizer seems to believe an estimate of 0 rows is - * always accurate and may return the result 'Empty set' based on that - */ - if (n_rows_num == 0) { - n_rows_num = 1; + uint64_t n_rows_num = 0; + uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + + for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; + part_id = m_part_info->get_next_used_partition(part_id)) { + set_tse_hist_range_key(&tse_min_key, min_key, true); + set_tse_hist_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + density = calc_hist_density_one_table(inx, &key, m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[part_id], *table); + n_rows_num += m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[part_id].estimate_rows * density; + } + + /* + * The MySQL optimizer seems to believe an estimate of 0 rows is + * always accurate and may return the result 'Empty set' based on that + */ + if (n_rows_num == 0) { + n_rows_num = 1; + } + return (ha_rows)n_rows_num; + } else { + tse_normal_key tse_min_key; + tse_normal_key tse_max_key; + set_tse_normal_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); + set_tse_normal_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); + + uint32_t used_parts; + uint32_t *part_ids = NULL; + get_used_partitions(m_part_info, &part_ids, &used_parts); + uint64_t n_rows_num = 0; + uint32_t part_num = table->part_info->num_parts; + uint32_t subpart_num = table->part_info->num_subparts; + + for (uint i = 0; i < used_parts; i++) { + uint32_t part_id = m_is_sub_partitioned ? part_ids[i] / m_part_info->num_subparts : + part_ids[i]; + uint32_t subpart_id = m_is_sub_partitioned ? part_ids[i] % m_part_info->num_subparts : + INVALID_PART_ID; + part_info_t part_info = {part_id, subpart_id, part_num, subpart_num}; + double density = calc_normal_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_part_share->cbo_stats->normal_stats, *table); + n_rows_num += m_part_share->cbo_stats->normal_stats->estimate_part_rows_and_blocks[part_id] * density; + } + + /* + * The MySQL optimizer seems to believe an estimate of 0 rows is + * always accurate and may return the result 'Empty set' based on that + */ + if (n_rows_num == 0) { + n_rows_num = 1; + } + my_free(part_ids); + return (ha_rows)n_rows_num; } - return (ha_rows)n_rows_num; + } int ha_tsepart::records(ha_rows *num_rows) /*!< out: number of rows */ @@ -965,7 +1007,8 @@ int ha_tsepart::initialize_cbo_stats() { if (m_part_share->cbo_stats != nullptr) { return CT_SUCCESS; } - uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + if (ctc_histgram_enabled) { + uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : table->part_info->num_parts; m_part_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); @@ -973,26 +1016,72 @@ int ha_tsepart::initialize_cbo_stats() { tse_log_error("alloc shm mem failed, m_part_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_part_share->cbo_stats = {0, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + m_part_share->cbo_stats->hist_stats = (tse_hist_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tse_hist_cbo_stats_t), MYF(MY_WME)); + if (m_part_share->cbo_stats->hist_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_part_share->cbo_stats size(%lu)", sizeof(tse_hist_cbo_stats_t)); + my_free(m_part_share->cbo_stats); + return ERR_ALLOC_MEMORY; + } + *m_part_share->cbo_stats->hist_stats = {0, 0, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr}; + + m_part_share->cbo_stats->hist_stats->part_cnt = part_num; + + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table = + (tse_cbo_stats_table_t* )my_malloc(PSI_NOT_INSTRUMENTED, part_num * sizeof(tse_cbo_stats_table_t), MYF(MY_WME)); + for (uint i = 0; i < part_num; i++) { + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].columns = + (tse_cbo_stats_column_t* )my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys = + (uint32_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->keys * sizeof(uint32_t), MYF(MY_WME)); + if (m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].columns == nullptr || m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys == nullptr) { + assert(0); + } + } + m_part_share->cbo_stats->hist_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); + m_part_share->cbo_stats->hist_stats->key_len = table->s->keys * sizeof(uint32_t); - m_part_share->cbo_stats->part_cnt = part_num; + THD* thd = ha_thd(); + if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { + tse_log_system("[alloc memory]part table first_partid: %s alloc size :%lu", table->alias, calculate_size_of_cbo_part_stats(table,part_num)); + } + } else { + uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + uint32_t part_field = table->s->fields; + + /* estimate_part_rows_and_blocks数组包括part_rows和part_blocks,数组长度为2 * part_num */ + uint32_t rows_and_blocks_num = 2 * part_num; + m_part_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t) + + rows_and_blocks_num * sizeof(uint32_t)); + if (m_part_share->cbo_stats == nullptr) { + tse_log_error("alloc shm mem failed, m_part_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); + return ERR_ALLOC_MEMORY; + } + *m_part_share->cbo_stats->normal_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0, {}}; - m_part_share->cbo_stats->tse_cbo_stats_part_table = - (tse_cbo_stats_table_t*)my_malloc(PSI_NOT_INSTRUMENTED, part_num * sizeof(tse_cbo_stats_table_t), MYF(MY_WME)); - - for (uint i = 0; i < part_num; i++) { - m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = - (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); - m_part_share->cbo_stats->tse_cbo_stats_part_table[i].ndv_keys = - (uint32_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->keys * sizeof(uint32_t), MYF(MY_WME)); - } - m_part_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); - m_part_share->cbo_stats->key_len = table->s->keys * sizeof(uint32_t); + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_num_distincts = + (uint32_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(uint32_t)); - THD* thd = ha_thd(); - if (user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")) { - tse_log_system("[alloc memory]part table first_partid: %s alloc size :%lu", table->alias, calculate_size_of_cbo_part_stats(table,part_num)); + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_low_values = + (cache_n_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_n_variant_t)); + + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_high_values = + (cache_n_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_n_variant_t)); + + if (m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_num_distincts == nullptr || + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_low_values == nullptr || + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_high_values == nullptr) { + tse_log_error("alloc mem failed, size(%lu)", part_num * part_field * sizeof(uint32_t) + + 2 * part_num * part_field * sizeof(cache_n_variant_t)); + free_cbo_stats(); + return ERR_ALLOC_MEMORY; + } + m_part_share->cbo_stats->normal_stats->part_table_info.rows_and_blocks_size = rows_and_blocks_num * sizeof(uint32_t); + m_part_share->cbo_stats->normal_stats->part_table_info.num_distinct_size = part_num * part_field * sizeof(uint32_t); + m_part_share->cbo_stats->normal_stats->part_table_info.low_value_size = part_num * part_field * sizeof(cache_n_variant_t); + m_part_share->cbo_stats->normal_stats->part_table_info.high_value_size = part_num * part_field * sizeof(cache_n_variant_t); } + return CT_SUCCESS; } @@ -1013,31 +1102,41 @@ int ha_tsepart::get_cbo_stats_4share() return ret; } } - update_member_tch(m_tch, get_tse_hton(), thd); - - uint32_t data_size = m_part_share->cbo_stats->msg_len; - uint32_t part_cnt = m_part_share->cbo_stats->part_cnt; - uint32_t num_part_fetch = MAX_MESSAGE_SIZE / data_size; - uint32_t fetch_times = part_cnt / num_part_fetch; - uint32_t first_partid = 0; + if (ctc_histgram_enabled) { + update_member_tch(m_tch, get_tse_hton(), thd); + uint32_t data_size = m_part_share->cbo_stats->hist_stats->msg_len; + uint32_t part_cnt = m_part_share->cbo_stats->hist_stats->part_cnt; + uint32_t num_part_fetch = MAX_MESSAGE_SIZE / data_size; + uint32_t fetch_times = part_cnt / num_part_fetch; + uint32_t first_partid = 0; + + + for (uint32_t i = 0; icbo_stats, first_partid, num_part_fetch); + if (ret != CT_SUCCESS) { + return ret; + } + first_partid += num_part_fetch; + } - for (uint32_t i = 0; icbo_stats, first_partid, num_part_fetch); - if (ret != CT_SUCCESS) { - return ret; + update_sess_ctx_by_tch(m_tch, get_tse_hton(), thd); + if (ret == CT_SUCCESS && m_part_share->cbo_stats->hist_stats->is_updated) { + m_part_share->need_fetch_cbo = false; + tse_index_stats_update(table, m_part_share->cbo_stats->hist_stats); } - first_partid += num_part_fetch; - } - - num_part_fetch = part_cnt - first_partid; - ret = tse_get_cbo_stats(&m_tch, m_part_share->cbo_stats, first_partid, num_part_fetch); - update_sess_ctx_by_tch(m_tch, get_tse_hton(), thd); - if (ret == CT_SUCCESS && m_part_share->cbo_stats->is_updated) { - m_part_share->need_fetch_cbo = false; - tse_index_stats_update(table, m_part_share->cbo_stats); + m_part_share->get_cbo_time = now; + } else { + update_member_tch(m_tch, get_tse_hton(), thd); + ret = tse_get_normal_cbo_stats(&m_tch, m_part_share->cbo_stats); + update_sess_ctx_by_tch(m_tch, get_tse_hton(), thd); + if (ret == CT_SUCCESS && m_part_share->cbo_stats->normal_stats->is_updated) { + m_part_share->need_fetch_cbo = false; + } + m_part_share->get_cbo_time = now; } - m_part_share->get_cbo_time = now; } return ret; @@ -1047,22 +1146,45 @@ void ha_tsepart::free_cbo_stats() { if (m_part_share->cbo_stats == nullptr) { return; } - uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : - table->part_info->num_parts; - THD* thd = ha_thd(); - if(user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")){ - tse_log_system("[free memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_part_stats(table,part_num)); + if (ctc_histgram_enabled) { + uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + THD* thd = ha_thd(); + if(user_var_set(thd, "ctc_show_alloc_cbo_stats_mem")){ + tse_log_system("[free memory]normal table : %s alloc size :%lu", table->alias, calculate_size_of_cbo_part_stats(table,part_num)); + } + for (uint i = 0; i < part_num; i++) { + my_free(m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].columns); + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].columns = nullptr; + my_free(m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys); + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys = nullptr; + } + my_free(m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table); + m_part_share->cbo_stats->hist_stats->tse_cbo_stats_part_table = nullptr; + my_free(m_part_share->cbo_stats->hist_stats); + m_part_share->cbo_stats->hist_stats = nullptr; + my_free(m_part_share->cbo_stats); + m_part_share->cbo_stats = nullptr; + } else { + // 释放m_part_share->cbo_stats指向的共享内存 + if (m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_num_distincts != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_num_distincts); + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_num_distincts = nullptr; + } + + if (m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_low_values != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_low_values); + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_low_values = nullptr; + } + + if (m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_high_values != nullptr) { + tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_high_values); + m_part_share->cbo_stats->normal_stats->tse_cbo_stats_table.part_table_high_values = nullptr; + } + tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->normal_stats); + tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats); } - for (uint i = 0; i < part_num; i++) { - my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns); - m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = nullptr; - my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table[i].ndv_keys); - m_part_share->cbo_stats->tse_cbo_stats_part_table[i].ndv_keys = nullptr; - } - my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table); - m_part_share->cbo_stats->tse_cbo_stats_part_table = nullptr; - my_free(m_part_share->cbo_stats); - m_part_share->cbo_stats = nullptr; + } diff --git a/storage/tianchi/tse_cbo.cc b/storage/tianchi/tse_cbo.cc index 205ffaa..be4df31 100644 --- a/storage/tianchi/tse_cbo.cc +++ b/storage/tianchi/tse_cbo.cc @@ -19,44 +19,77 @@ #include "tse_log.h" #include "sql/field.h" #include "tse_srv_mq_module.h" +extern bool ctc_histgram_enabled; void r_key2variant(tse_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, cache_variant_t * value, uint32_t key_offset) { - if (rKey->cmp_type == CMP_TYPE_NULL) { + if (rKey->cmp_type == CMP_TYPE_NULL) { *ret_val = *value; rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; return; - } + } + + enum_field_types field_type = cur_index_part->field->real_type(); + uint32_t offset = 0; + if (cur_index_part->field->is_nullable()) { + /* The first byte in the field tells if this is an SQL NULL value */ + if(*(rKey->key + key_offset) == 1) { + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } + offset = 1; + } + const uchar *key = rKey->key + key_offset + offset; + switch(field_type) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + ret_val->v_int = *(int32_t *)const_cast(key); + break; + case MYSQL_TYPE_FLOAT: + ret_val->v_real = *(float *)const_cast(key); + break; + case MYSQL_TYPE_DOUBLE: + ret_val->v_real = *(double *)const_cast(key); + break; + case MYSQL_TYPE_LONGLONG: + ret_val->v_bigint = *(int64_t *)const_cast(key); + break; + default: + break; + } +} + +void r_n_key2variant(tse_normal_key *rKey, KEY_PART_INFO *cur_index_part, cache_n_st_variant *ret_val) +{ enum_field_types field_type = cur_index_part->field->real_type(); - uint32_t offset = 0; + uint16_t offset = 0; if (cur_index_part->field->is_nullable()) { - /* The first byte in the field tells if this is an SQL NULL value */ - if(*(rKey->key + key_offset) == 1) { - *ret_val = *value; - rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; - return; - } offset = 1; } - const uchar *key = rKey->key + key_offset + offset; + + ret_val->is_null = 0; + ret_val->type = field_type; switch(field_type) { case MYSQL_TYPE_TINY: case MYSQL_TYPE_SHORT: case MYSQL_TYPE_LONG: - ret_val->v_int = *(int32_t *)const_cast(key); - break; + ret_val->v_int = *(int32_t *)const_cast(rKey->key + offset); + break; case MYSQL_TYPE_FLOAT: - ret_val->v_real = *(float *)const_cast(key); + ret_val->v_real = *(float *)const_cast(rKey->key + offset); break; case MYSQL_TYPE_DOUBLE: - ret_val->v_real = *(double *)const_cast(key); + ret_val->v_real = *(double *)const_cast(rKey->key + offset); break; case MYSQL_TYPE_LONGLONG: - ret_val->v_bigint = *(int64_t *)const_cast(key); + ret_val->v_bigint = *(int64_t *)const_cast(rKey->key + offset); break; default: + ret_val->is_null = 1; break; } } @@ -107,6 +140,165 @@ double eval_density_result(double density) return density; } +int calc_normal_density_low(KEY_PART_INFO *cur_index_part, cache_n_variant_t *high_val, cache_n_variant_t *low_val, + cache_n_variant_t *left_val, cache_n_variant_t *right_val, double *result) +{ + double density = DEFAULT_RANGE_DENSITY; + if (low_val->is_null == 1 || right_val->is_null == 1) { + return ERR_SQL_SYNTAX_ERROR; + } + + enum_field_types field_type = cur_index_part->field->real_type(); + + double numerator, denominator; + switch(field_type) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + numerator = (int64_t)left_val->v_int - (int64_t)right_val->v_int; + denominator = (int64_t)high_val->v_int - (int64_t)low_val->v_int; + density = (double)numerator/(double)denominator; + break; + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + numerator = left_val->v_real - right_val->v_real; + denominator = high_val->v_real - low_val->v_real; + density = (double)numerator/(double)denominator; + break; + case MYSQL_TYPE_LONGLONG: + numerator = (int64_t)left_val->v_bigint - (int64_t)right_val->v_bigint; + denominator = (int64_t)high_val->v_bigint - (int64_t)low_val->v_bigint; + density = (double)numerator/(double)denominator; + break; + default: + break; + } + + *result = density; + + return CT_SUCCESS; +} + +double calc_normal_density_by_cond(tse_normal_cbo_stats_t *cbo_stats, KEY *cur_index, + tse_normal_key *tse_min_key, tse_normal_key *tse_max_key, part_info_t part_info) { + double default_density = DEFAULT_RANGE_DENSITY; + KEY_PART_INFO cur_index_part = cur_index->key_part[0]; + uint32 col_id = cur_index_part.field->field_index(); + uint32_t part_id = part_info.part_id; + uint32_t subpart_id = part_info.subpart_id; + uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; + + if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] == 0) { + return DEFAULT_RANGE_DENSITY; + } + + uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? + (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : + total_part_count * col_id + part_id; + + if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] == 0) { + return DEFAULT_RANGE_DENSITY; + } + + cache_n_variant_t *low_val, *high_val; + double result; + + if (IS_TSE_PART(part_id)) { + low_val = &(cbo_stats->tse_cbo_stats_table.part_table_low_values[index_no]); + high_val = &(cbo_stats->tse_cbo_stats_table.part_table_high_values[index_no]); + } else { + low_val = cbo_stats->tse_cbo_stats_table.low_values + col_id; + high_val = cbo_stats->tse_cbo_stats_table.high_values + col_id; + } + + if (tse_min_key->cmp_type == CMP_TYPE_GREAT && tse_max_key->cmp_type == CMP_TYPE_LESS) { + cache_n_variant_t min_key_val, max_key_val; + r_n_key2variant(tse_min_key, &cur_index_part, &min_key_val); + r_n_key2variant(tse_max_key, &cur_index_part, &max_key_val); + + if (calc_normal_density_low(&cur_index_part, high_val, low_val, &max_key_val, &min_key_val, &result) != CT_SUCCESS) { + return default_density; + } + return eval_density_result(result); + } + + if (tse_min_key->cmp_type == CMP_TYPE_GREAT) { + cache_n_variant_t min_key_val; + r_n_key2variant(tse_min_key, &cur_index_part, &min_key_val); + + if (calc_normal_density_low(&cur_index_part, high_val, low_val, high_val, &min_key_val, &result) != CT_SUCCESS) { + return default_density; + } + return eval_density_result(result); + } + + if (tse_max_key->cmp_type == CMP_TYPE_LESS) { + cache_n_variant_t max_key_val; + r_n_key2variant(tse_max_key, &cur_index_part, &max_key_val); + + if (calc_normal_density_low(&cur_index_part, high_val, low_val, &max_key_val, low_val, &result) != CT_SUCCESS) { + return default_density; + } + return eval_density_result(result); + } + + return default_density; +} + +double calc_normal_density_one_table(uint16_t idx_id, tse_normal_key *min_key, tse_normal_key *max_key, + part_info_t part_info, tse_normal_cbo_stats_t *cbo_stats, const TABLE &table) +{ + double density = DEFAULT_RANGE_DENSITY; + if (!cbo_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table.alias); + return density; + } + uint32_t part_id = part_info.part_id; + uint32_t subpart_id = part_info.subpart_id; + uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; + + uint32 col_id; + if (min_key->cmp_type == CMP_TYPE_EQUAL) { + double col_product = 1.0; + uint64_t col_map = min_key->col_map; + KEY cur_index = table.key_info[idx_id]; + /* + * For all columns in used index, + * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) + */ + for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { + if (col_map & ((uint64_t)1 << idx_col_num)) { + KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; + col_id = cur_index_part.field->field_index(); + if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] != 0) { + col_product = col_product * cbo_stats->tse_cbo_stats_table.num_distincts[col_id]; + } + + uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? + (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : + total_part_count * col_id + part_id; + + if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] != 0) { + col_product = col_product * cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no]; + } + } + } + density = 1.0 / col_product; + } else { + KEY cur_index = table.key_info[idx_id]; + density = calc_normal_density_by_cond(cbo_stats, &cur_index, min_key, max_key, part_info); + } + /* + * This is a safe-guard logic since we don't handle tse call error in this method, + * we need this to make sure that our optimizer continue to work even when we + * miscalculated the density, and it's still prefer index read + */ + if (density < 0.0 || density > 1.0) { + density = PREFER_RANGE_DENSITY; + } + return density; +} + static double calc_frequency_hist_equal_density(tse_cbo_stats_column_t *col_stat, cache_variant_t *val, enum_field_types field_type) { @@ -387,7 +579,7 @@ double calc_density_by_cond(tse_cbo_stats_table_t cbo_stats, KEY_PART_INFO cur_i return density; } -double calc_density_one_table(uint16_t idx_id, tse_range_key *key, +double calc_hist_density_one_table(uint16_t idx_id, tse_range_key *key, tse_cbo_stats_table_t cbo_stats, const TABLE &table) { double density = 1.0; @@ -436,7 +628,7 @@ double calc_density_one_table(uint16_t idx_id, tse_range_key *key, return density; } -void tse_index_stats_update(TABLE *table, tianchi_cbo_stats_t *cbo_stats) +void tse_index_stats_update(TABLE *table, tse_hist_cbo_stats_t *cbo_stats) { rec_per_key_t rec_per_key; bool is_part_table = (cbo_stats->tse_cbo_stats_part_table != nullptr); diff --git a/storage/tianchi/tse_cbo.h b/storage/tianchi/tse_cbo.h index 7516bf7..c49a444 100644 --- a/storage/tianchi/tse_cbo.h +++ b/storage/tianchi/tse_cbo.h @@ -44,6 +44,13 @@ typedef enum en_tse_cmp_type { CMP_TYPE_CLOSE_INTERNAL } tse_cmp_type_t; +typedef enum en_tse_normal_cmp_type{ + CMP_TYPE_UNKNOWN = 0, + CMP_TYPE_EQUAL, + CMP_TYPE_GREAT, + CMP_TYPE_LESS +} tse_normal_cmp_type_t; + typedef struct { const uchar *key; uint len; @@ -51,6 +58,13 @@ typedef struct { uint64_t col_map; } tse_key; +typedef struct { + const char *key; + uint len; + tse_normal_cmp_type_t cmp_type; + uint64_t col_map; +} tse_normal_key; + typedef struct { tse_key *min_key; tse_key *max_key; @@ -70,8 +84,10 @@ typedef struct { uint32_t subpart_num; } part_info_t; -double calc_density_one_table(uint16_t idx_id, tse_range_key *key, +double calc_hist_density_one_table(uint16_t idx_id, tse_range_key *key, tse_cbo_stats_table_t cbo_stats, const TABLE &table); +double calc_normal_density_one_table(uint16_t idx_id, tse_normal_key *min_key, tse_normal_key *max_key, + part_info_t part_info, tse_normal_cbo_stats_t *cbo_stats, const TABLE &table); -void tse_index_stats_update(TABLE *table, tianchi_cbo_stats_t *cbo_stats); +void tse_index_stats_update(TABLE *table, tse_hist_cbo_stats_t *cbo_stats); #endif diff --git a/storage/tianchi/tse_srv.h b/storage/tianchi/tse_srv.h index 1922adb..e2a458c 100644 --- a/storage/tianchi/tse_srv.h +++ b/storage/tianchi/tse_srv.h @@ -114,6 +114,27 @@ typedef struct cache_st_variant { }; } cache_variant_t; +typedef struct cache_n_st_variant { + union { + int v_int; + unsigned int v_uint32; + unsigned int v_bool; + long long v_bigint; + unsigned long long v_ubigint; + double v_real; + }; + + union { + struct { + short type; + unsigned char is_null; + unsigned char is_hex; + }; + + unsigned int ctrl; + }; +} cache_n_variant_t; + typedef enum { FREQUENCY_HIST = 0, HEIGHT_BALANCED_HIST = 1, @@ -160,6 +181,42 @@ typedef struct { bool is_updated; tse_cbo_stats_table_t tse_cbo_stats_table; tse_cbo_stats_table_t *tse_cbo_stats_part_table; +} tse_hist_cbo_stats_t; + +typedef struct { + uint32_t max_col_id; + uint32_t *num_distincts; + cache_n_variant_t *low_values; + cache_n_variant_t *high_values; + uint32_t max_part_no; // the part no of max rows num part + uint32_t *part_table_num_distincts; + cache_n_variant_t *part_table_low_values; + cache_n_variant_t *part_table_high_values; +} tse_normal_cbo_stats_table_t; + +typedef struct { + uint32_t rows_and_blocks_size; + uint32_t num_distinct_size; + uint32_t low_value_size; + uint32_t high_value_size; +} tse_normal_part_table_cbo_info_t; + +/* + * statistics information that mysql optimizer need + * expand this struct if need more cbo stats + */ +typedef struct { + uint32_t estimate_rows; + uint32_t estimate_blocks; + bool is_updated; + tse_normal_cbo_stats_table_t tse_cbo_stats_table; + tse_normal_part_table_cbo_info_t part_table_info; + uint32_t estimate_part_rows_and_blocks[0]; +} tse_normal_cbo_stats_t; + +typedef struct { + tse_hist_cbo_stats_t *hist_stats; + tse_normal_cbo_stats_t *normal_stats; } tianchi_cbo_stats_t; #pragma pack() @@ -555,6 +612,7 @@ int tse_srv_release_savepoint(tianchi_handler_t *tch, const char *name); /* Optimizer Related Interface */ int tse_analyze_table(tianchi_handler_t *tch, const char *db_name, const char *table_name, double sampling_ratio); int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats, uint32_t first_partid, uint32_t num_part_fetch); +int tse_get_normal_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats); int tse_get_index_name(tianchi_handler_t *tch, char *index_name); /* Datatype Related Interface */ diff --git a/storage/tianchi/tse_srv_mq_stub.cc b/storage/tianchi/tse_srv_mq_stub.cc index 4d2d98d..9102fd0 100644 --- a/storage/tianchi/tse_srv_mq_stub.cc +++ b/storage/tianchi/tse_srv_mq_stub.cc @@ -740,6 +740,29 @@ int tse_analyze_table(tianchi_handler_t *tch, const char *db_name, const char *t return result; } +int tse_get_normal_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats) { + void *shm_inst = get_one_shm_inst(tch); + get_cbo_stats_request *req = (get_cbo_stats_request*)alloc_share_mem(shm_inst, sizeof(get_cbo_stats_request)); + if (req == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(get_cbo_stats_request)); + return ERR_ALLOC_MEMORY; + } + req->tch = *tch; + req->stats = stats; + + int result = ERR_CONNECTION_FAILED; + int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_GET_CBO_STATS, req, tch->msg_buf); + if (ret == CT_SUCCESS) { + if (req->result == CT_SUCCESS) { + *tch = req->tch; + stats = req->stats; + } + result = req->result; + } + free_share_mem(shm_inst, req); + return result; +} + int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats, uint32_t first_partid, uint32_t num_part_fetch) { void *shm_inst_4_req = get_one_shm_inst(tch); get_cbo_stats_request *req = (get_cbo_stats_request*)alloc_share_mem(shm_inst_4_req, sizeof(get_cbo_stats_request)); @@ -751,66 +774,79 @@ int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats, uint32 void *shm_inst_4_stats = get_one_shm_inst(tch); req->stats = (tianchi_cbo_stats_t *)alloc_share_mem(shm_inst_4_stats, sizeof(tianchi_cbo_stats_t)); if (req->stats == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst_4_req, sizeof(get_cbo_stats_request)); + free_share_mem(shm_inst_4_req, req); + return ERR_ALLOC_MEMORY; + } + + req->stats->hist_stats = (tse_hist_cbo_stats_t *)alloc_share_mem(shm_inst_4_stats, sizeof(tse_hist_cbo_stats_t)); + if (req->stats->hist_stats == NULL) { tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst_4_stats, sizeof(tianchi_cbo_stats_t)); free_share_mem(shm_inst_4_req, req); + free_share_mem(shm_inst_4_req, req->stats); return ERR_ALLOC_MEMORY; } - bool is_part_table = stats->tse_cbo_stats_part_table != nullptr ? true : false; - req->stats->msg_len = stats->msg_len; - req->stats->part_cnt = stats->part_cnt; + bool is_part_table = stats->hist_stats->tse_cbo_stats_part_table != nullptr ? true : false; + req->stats->hist_stats->msg_len = stats->hist_stats->msg_len; + req->stats->hist_stats->part_cnt = stats->hist_stats->part_cnt; void *shm_inst_4_columns = get_one_shm_inst(tch); void *shm_inst_4_keys = get_one_shm_inst(tch); void *shm_inst_4_part_table = get_one_shm_inst(tch); tse_cbo_stats_column_t* part_columns; uint32_t *part_ndv_keys; if (!is_part_table) { - req->stats->tse_cbo_stats_table.columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst_4_columns, req->stats->msg_len); - if (req->stats->tse_cbo_stats_table.columns == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_columns, req->stats->msg_len); - free_share_mem(shm_inst_4_stats, req->stats); + req->stats->hist_stats->tse_cbo_stats_table.columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst_4_columns, req->stats->hist_stats->msg_len); + if (req->stats->hist_stats->tse_cbo_stats_table.columns == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_columns, req->stats->hist_stats->msg_len); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return ERR_ALLOC_MEMORY; } - req->stats->tse_cbo_stats_table.ndv_keys = (uint32_t*)alloc_share_mem(shm_inst_4_keys, stats->key_len); - if (req->stats->tse_cbo_stats_table.ndv_keys == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_keys, stats->key_len); - free_share_mem(shm_inst_4_columns, req->stats->tse_cbo_stats_table.columns); - free_share_mem(shm_inst_4_stats, req->stats); + req->stats->hist_stats->tse_cbo_stats_table.ndv_keys = (uint32_t*)alloc_share_mem(shm_inst_4_keys, stats->hist_stats->key_len); + if (req->stats->hist_stats->tse_cbo_stats_table.ndv_keys == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_keys, stats->hist_stats->key_len); + free_share_mem(shm_inst_4_columns, req->stats->hist_stats->tse_cbo_stats_table.columns); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return ERR_ALLOC_MEMORY; } } else { - req->stats->first_partid = first_partid; - req->stats->num_part_fetch = num_part_fetch; - req->stats->tse_cbo_stats_part_table = + req->stats->hist_stats->first_partid = first_partid; + req->stats->hist_stats->num_part_fetch = num_part_fetch; + req->stats->hist_stats->tse_cbo_stats_part_table = (tse_cbo_stats_table_t*)alloc_share_mem(shm_inst_4_part_table, num_part_fetch * sizeof(tse_cbo_stats_table_t)); - if (req->stats->tse_cbo_stats_part_table == NULL) { + if (req->stats->hist_stats->tse_cbo_stats_part_table == NULL) { tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst_4_part_table, num_part_fetch * sizeof(tse_cbo_stats_table_t)); - free_share_mem(shm_inst_4_stats, req->stats); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return ERR_ALLOC_MEMORY; } - part_columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst_4_columns, stats->msg_len * num_part_fetch); + part_columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst_4_columns, stats->hist_stats->msg_len * num_part_fetch); if (part_columns == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_columns, stats->msg_len * num_part_fetch); - free_share_mem(shm_inst_4_part_table, req->stats->tse_cbo_stats_part_table); - free_share_mem(shm_inst_4_stats, req->stats); + tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_columns, stats->hist_stats->msg_len * num_part_fetch); + free_share_mem(shm_inst_4_part_table, req->stats->hist_stats->tse_cbo_stats_part_table); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return ERR_ALLOC_MEMORY; } - part_ndv_keys = (uint32_t*)alloc_share_mem(shm_inst_4_keys, stats->key_len * num_part_fetch); + part_ndv_keys = (uint32_t*)alloc_share_mem(shm_inst_4_keys, stats->hist_stats->key_len * num_part_fetch); if (part_ndv_keys == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_keys, stats->key_len * num_part_fetch); + tse_log_error("alloc shm mem error, shm_inst(%p), size(%u)", shm_inst_4_keys, stats->hist_stats->key_len * num_part_fetch); free_share_mem(shm_inst_4_columns, part_columns); - free_share_mem(shm_inst_4_part_table, req->stats->tse_cbo_stats_part_table); - free_share_mem(shm_inst_4_stats, req->stats); + free_share_mem(shm_inst_4_part_table, req->stats->hist_stats->tse_cbo_stats_part_table); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return ERR_ALLOC_MEMORY; } for (uint i = 0; i < num_part_fetch; i++) { - req->stats->tse_cbo_stats_part_table[i].columns = part_columns + i * (stats->msg_len / sizeof(tse_cbo_stats_column_t)); - req->stats->tse_cbo_stats_part_table[i].ndv_keys = part_ndv_keys + i * (stats->key_len / sizeof(uint32_t)); + req->stats->hist_stats->tse_cbo_stats_part_table[i].columns = part_columns + i * (stats->hist_stats->msg_len / sizeof(tse_cbo_stats_column_t)); + req->stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys = part_ndv_keys + i * (stats->hist_stats->key_len / sizeof(uint32_t)); } } @@ -821,30 +857,31 @@ int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats, uint32 if (req->result == CT_SUCCESS) { if (!is_part_table) { *tch = req->tch; - memcpy(stats->tse_cbo_stats_table.columns, req->stats->tse_cbo_stats_table.columns, stats->msg_len); - memcpy(stats->tse_cbo_stats_table.ndv_keys, req->stats->tse_cbo_stats_table.ndv_keys, stats->key_len); - stats->is_updated = req->stats->is_updated; - stats->tse_cbo_stats_table.estimate_rows = req->stats->tse_cbo_stats_table.estimate_rows; + memcpy(stats->hist_stats->tse_cbo_stats_table.columns, req->stats->hist_stats->tse_cbo_stats_table.columns, stats->hist_stats->msg_len); + memcpy(stats->hist_stats->tse_cbo_stats_table.ndv_keys, req->stats->hist_stats->tse_cbo_stats_table.ndv_keys, stats->hist_stats->key_len); + stats->hist_stats->is_updated = req->stats->hist_stats->is_updated; + stats->hist_stats->tse_cbo_stats_table.estimate_rows = req->stats->hist_stats->tse_cbo_stats_table.estimate_rows; } else { - stats->is_updated = req->stats->is_updated; + stats->hist_stats->is_updated = req->stats->hist_stats->is_updated; for (uint i = 0; i < num_part_fetch; i++) { - stats->tse_cbo_stats_part_table[i+first_partid].estimate_rows = req->stats->tse_cbo_stats_part_table[i].estimate_rows; - memcpy(stats->tse_cbo_stats_part_table[i+first_partid].columns, req->stats->tse_cbo_stats_part_table[i].columns, stats->msg_len); - memcpy(stats->tse_cbo_stats_part_table[i+first_partid].ndv_keys, req->stats->tse_cbo_stats_part_table[i].ndv_keys, stats->key_len); + stats->hist_stats->tse_cbo_stats_part_table[i+first_partid].estimate_rows = req->stats->hist_stats->tse_cbo_stats_part_table[i].estimate_rows; + memcpy(stats->hist_stats->tse_cbo_stats_part_table[i+first_partid].columns, req->stats->hist_stats->tse_cbo_stats_part_table[i].columns, stats->hist_stats->msg_len); + memcpy(stats->hist_stats->tse_cbo_stats_part_table[i+first_partid].ndv_keys, req->stats->hist_stats->tse_cbo_stats_part_table[i].ndv_keys, stats->hist_stats->key_len); } } } result = req->result; } if (!is_part_table) { - free_share_mem(shm_inst_4_columns, req->stats->tse_cbo_stats_table.columns); - free_share_mem(shm_inst_4_keys, req->stats->tse_cbo_stats_table.ndv_keys); + free_share_mem(shm_inst_4_columns, req->stats->hist_stats->tse_cbo_stats_table.columns); + free_share_mem(shm_inst_4_keys, req->stats->hist_stats->tse_cbo_stats_table.ndv_keys); } else { free_share_mem(shm_inst_4_columns, part_columns); free_share_mem(shm_inst_4_keys, part_ndv_keys); - free_share_mem(shm_inst_4_part_table, req->stats->tse_cbo_stats_part_table); + free_share_mem(shm_inst_4_part_table, req->stats->hist_stats->tse_cbo_stats_part_table); } - free_share_mem(shm_inst_4_stats, req->stats); + free_share_mem(shm_inst_4_stats, req->stats->hist_stats); + free_share_mem(shm_inst_4_req, req->stats); free_share_mem(shm_inst_4_req, req); return result; } -- Gitee