diff --git a/storage/tianchi/ha_tse.cc b/storage/tianchi/ha_tse.cc index 7f1db061e2219484d473f39a3a0543e109213274..cb5fcd22a624cacc14da29b6155aa0a2d1d62b91 100644 --- a/storage/tianchi/ha_tse.cc +++ b/storage/tianchi/ha_tse.cc @@ -206,6 +206,11 @@ static MYSQL_SYSVAR_UINT(autoinc_lock_mode, ctc_autoinc_lock_mode, PLUGIN_VAR_RQ "The AUTOINC lock modes supported by CTC.", nullptr, nullptr, CTC_AUTOINC_NO_LOCKING, CTC_AUTOINC_OLD_STYLE_LOCKING, CTC_AUTOINC_NO_LOCKING, 0); +uint32_t ctc_update_analyze_time = CTC_ANALYZE_TIME_SEC; +static MYSQL_SYSVAR_UINT(update_analyze_time, ctc_update_analyze_time, PLUGIN_VAR_RQCMDARG, + "CBO updating time by CTC.", nullptr, nullptr, CTC_ANALYZE_TIME_SEC, + 0, UINT32_MAX, 0); + // All global and session system variables must be published to mysqld before // use. This is done by constructing a NULL-terminated array of the variables // and linking to it in the plugin public interface. @@ -225,6 +230,7 @@ static SYS_VAR *tse_system_variables[] = { MYSQL_SYSVAR(stats_enabled), MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(disaster_cluster_role), + MYSQL_SYSVAR(update_analyze_time), nullptr }; @@ -3213,7 +3219,7 @@ EXTER_ATTACK int ha_tse::rnd_pos(uchar *buf, uchar *pos) { void ha_tse::info_low() { if (m_share && m_share->cbo_stats != nullptr) { - stats.records = m_share->cbo_stats->estimate_rows; + stats.records = m_share->cbo_stats->tse_cbo_stats_table.estimate_rows; } } @@ -3828,20 +3834,30 @@ enum_alter_inplace_result ha_tse::check_if_supported_inplace_alter( @brief Construct tse range key based on mysql range key */ -void ha_tse::set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_range_key, tse_cmp_type_t default_type) { +void ha_tse::set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key) { if (!mysql_range_key) { - tse_range_key->cmp_type = CMP_TYPE_UNKNOWN; - tse_range_key->len = 0; + tse_key->cmp_type = CMP_TYPE_NULL; + tse_key->len = 0; + tse_key->col_map = 0; return; } - tse_range_key->col_map = mysql_range_key->keypart_map; - tse_range_key->key = (const char *)mysql_range_key->key; - tse_range_key->len = mysql_range_key->length; - if (mysql_range_key->flag == HA_READ_KEY_EXACT) { - tse_range_key->cmp_type = CMP_TYPE_EQUAL; - } else { - tse_range_key->cmp_type = default_type; + tse_key->col_map = mysql_range_key->keypart_map; + tse_key->key = mysql_range_key->key; + tse_key->len = mysql_range_key->length; + + switch(mysql_range_key->flag) { + case HA_READ_KEY_EXACT: + tse_key->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + break; + case HA_READ_BEFORE_KEY: + tse_key->cmp_type = CMP_TYPE_OPEN_INTERNAL; + break; + case HA_READ_AFTER_KEY: + tse_key->cmp_type = is_min_key ? CMP_TYPE_OPEN_INTERNAL : CMP_TYPE_CLOSE_INTERNAL; + break; + default: + tse_key->cmp_type = CMP_TYPE_NULL; } } @@ -3861,22 +3877,32 @@ void ha_tse::set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_ra ha_rows ha_tse::records_in_range(uint inx, key_range *min_key, key_range *max_key) { DBUG_TRACE; - tse_range_key tse_min_key; - tse_range_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); - set_tse_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + tse_range_key key = {&tse_min_key, &tse_max_key}; uint64_t n_rows = 0; - part_info_t part_info = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; + double density; if (m_share) { - double density = calc_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_share->cbo_stats, *table); + if (!m_share->cbo_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + density = DEFAULT_RANGE_DENSITY; + } + density = calc_density_one_table(inx, &key, m_share->cbo_stats->tse_cbo_stats_table, *table); /* * This is a safe-guard logic since we don't handle tse call error in this method, * we need this to make sure that our optimizer continue to work even when we * miscalculated the density, and it's still prefer index read */ - n_rows += m_share->cbo_stats->estimate_rows * density; + n_rows += m_share->cbo_stats->tse_cbo_stats_table.estimate_rows * density; } /* @@ -5175,30 +5201,17 @@ int ha_tse::initialize_cbo_stats() if (!m_share || m_share->cbo_stats != nullptr) { return CT_SUCCESS; } - m_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t)); + m_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); if (m_share->cbo_stats == nullptr) { tse_log_error("alloc shm mem failed, m_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0,{}}; - m_share->cbo_stats->tse_cbo_stats_table.num_distincts = - (uint32_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(uint32_t)); + *m_share->cbo_stats = {0, 0, 0, 0, 0, 0, nullptr, nullptr}; - m_share->cbo_stats->tse_cbo_stats_table.low_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_variant_t)); + m_share->cbo_stats->tse_cbo_stats_table.columns = + (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); - m_share->cbo_stats->tse_cbo_stats_table.high_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_variant_t)); - - if (m_share->cbo_stats->tse_cbo_stats_table.num_distincts == nullptr - || m_share->cbo_stats->tse_cbo_stats_table.low_values == nullptr - || m_share->cbo_stats->tse_cbo_stats_table.high_values == nullptr) { - tse_log_error("alloc shm mem error, size(%lu)", - table->s->fields * sizeof(uint32_t) + 2 * table->s->fields * sizeof(cache_variant_t)); - free_cbo_stats(); - return ERR_ALLOC_MEMORY; - } - + m_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); return CT_SUCCESS; } @@ -5207,7 +5220,7 @@ int ha_tse::get_cbo_stats_4share() THD *thd = ha_thd(); int ret = CT_SUCCESS; time_t now = time(nullptr); - if (m_share && (m_share->need_fetch_cbo || now - m_share->get_cbo_time > 60)) { + if (m_share && (m_share->need_fetch_cbo || now - m_share->get_cbo_time > ctc_update_analyze_time)) { if (m_tch.ctx_addr == INVALID_VALUE64) { char user_name[SMALL_RECORD_SIZE]; tse_split_normalized_name(table->s->normalized_path.str, user_name, SMALL_RECORD_SIZE, nullptr, 0, nullptr); @@ -5237,22 +5250,9 @@ void ha_tse::free_cbo_stats() return; } - if (m_share->cbo_stats->tse_cbo_stats_table.num_distincts != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.num_distincts); - m_share->cbo_stats->tse_cbo_stats_table.num_distincts = nullptr; - } - - if (m_share->cbo_stats->tse_cbo_stats_table.low_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.low_values); - m_share->cbo_stats->tse_cbo_stats_table.low_values = nullptr; - } - - if (m_share->cbo_stats->tse_cbo_stats_table.high_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.high_values); - m_share->cbo_stats->tse_cbo_stats_table.high_values = nullptr; - } - - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats); + my_free((m_share->cbo_stats->tse_cbo_stats_table.columns)); + m_share->cbo_stats->tse_cbo_stats_table.columns = nullptr; + my_free((uchar *)(m_share->cbo_stats)); m_share->cbo_stats = nullptr; } diff --git a/storage/tianchi/ha_tse.h b/storage/tianchi/ha_tse.h index d349c205f14a3f8ded6dff03a7a560b7d43ad6b3..f7597274256ffb5f80f0a9ccc7e02602d35c5224 100644 --- a/storage/tianchi/ha_tse.h +++ b/storage/tianchi/ha_tse.h @@ -132,6 +132,8 @@ again. */ #define TSE_INTERNAL_TMP_TABLE 2 #define TSE_TABLE_CONTAINS_VIRCOL 4 +#define CTC_ANALYZE_TIME_SEC 60 + /* cond pushdown */ #define INVALID_MAX_COLUMN (uint16_t)0xFFFF @@ -681,7 +683,7 @@ public: int records(ha_rows *num_rows) override; int records_from_index(ha_rows *num_rows, uint inx) override; - void set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_range_key, tse_cmp_type_t default_type); + void set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key); /** @brief diff --git a/storage/tianchi/ha_tsepart.cc b/storage/tianchi/ha_tsepart.cc index 94ccef635a932806baacf43d9060ef7bd71cfc42..f687505fb62867a326b3c52bd25f640d86631cb7 100644 --- a/storage/tianchi/ha_tsepart.cc +++ b/storage/tianchi/ha_tsepart.cc @@ -57,6 +57,7 @@ #define INVALID_PART_ID (uint32)0xFFFFFFFF; extern handlerton *get_tse_hton(); +extern uint32_t ctc_update_analyze_time; constexpr uint64 INVALID_VALUE64 = 0xFFFFFFFFFFFFFFFFULL; constexpr int max_prefetch_num = MAX_PREFETCH_REC_NUM; @@ -793,32 +794,47 @@ void ha_tsepart::info_low() { table->part_info->num_parts; for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; part_id = m_part_info->get_next_used_partition(part_id)) { - stats.records += m_part_share->cbo_stats->estimate_part_rows_and_blocks[part_id]; + stats.records += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows; } } } ha_rows ha_tsepart::records_in_range(uint inx, key_range *min_key, key_range *max_key) { - tse_range_key tse_min_key; - tse_range_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); - set_tse_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); - uint32_t used_parts; - uint32_t *part_ids = NULL; - get_used_partitions(m_part_info, &part_ids, &used_parts); + double density; + if (m_part_share && !m_part_share->cbo_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + return 1; + } + + + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + tse_range_key key = {&tse_min_key, &tse_max_key}; + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + uint64_t n_rows_num = 0; - uint32_t part_num = table->part_info->num_parts; - uint32_t subpart_num = table->part_info->num_subparts; + uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; - for (uint i = 0; i < used_parts; i++) { - uint32_t part_id = m_is_sub_partitioned ? part_ids[i] / m_part_info->num_subparts : - part_ids[i]; - uint32_t subpart_id = m_is_sub_partitioned ? part_ids[i] % m_part_info->num_subparts : - INVALID_PART_ID; - part_info_t part_info = {part_id, subpart_id, part_num, subpart_num}; - double density = calc_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_part_share->cbo_stats, *table); - n_rows_num += m_part_share->cbo_stats->estimate_part_rows_and_blocks[part_id] * density; +for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; + part_id = m_part_info->get_next_used_partition(part_id)) { + + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + density = calc_density_one_table(inx, &key, m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id], *table); + n_rows_num += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows * density; } /* @@ -828,7 +844,6 @@ ha_rows ha_tsepart::records_in_range(uint inx, key_range *min_key, key_range *ma if (n_rows_num == 0) { n_rows_num = 1; } - my_free(part_ids); return (ha_rows)n_rows_num; } @@ -945,72 +960,33 @@ int ha_tsepart::initialize_cbo_stats() { } uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : table->part_info->num_parts; - uint32_t part_field = table->s->fields; - - /* estimate_part_rows_and_blocks数组包括part_rows和part_blocks,数组长度为2 * part_num */ - uint32_t rows_and_blocks_num = 2 * part_num; - - if (part_num * part_field * sizeof(cache_variant_t) > MAX_MESSAGE_SIZE) { - /* 申请共享内存超限时申请一片连续空间,包括tianchi_cbo_stats_t结构体、part_rows及part_blocks数组 - * 以及num_distincts、low_values、high_values三块数据区域 - */ - uint32_t data_size = sizeof(tianchi_cbo_stats_t) + rows_and_blocks_num * sizeof(uint32_t) + - part_num * part_field * (sizeof(uint32_t) + sizeof(cache_variant_t) + sizeof(cache_variant_t)); - m_part_share->cbo_stats = (tianchi_cbo_stats_t *)malloc(data_size); - if (m_part_share->cbo_stats == nullptr) { - tse_log_error("alloc mem failed, m_part_share->cbo_stats size(%lu)", sizeof(data_size)); - return ERR_ALLOC_MEMORY; - } - *m_part_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0, {}}; - - uint8_t *offset = (uint8_t *)m_part_share->cbo_stats + sizeof(tianchi_cbo_stats_t) + rows_and_blocks_num * sizeof(uint32_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = (uint32_t *)offset; - offset += part_num * part_field * sizeof(uint32_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = (cache_variant_t *)offset; - offset += part_num * part_field * sizeof(cache_variant_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = (cache_variant_t *)offset; - } else { - m_part_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t) + - rows_and_blocks_num * sizeof(uint32_t)); + + m_part_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); if (m_part_share->cbo_stats == nullptr) { tse_log_error("alloc shm mem failed, m_part_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_part_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0, {}}; - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = - (uint32_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(uint32_t)); - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_variant_t)); - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_variant_t)); - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts == nullptr || - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values == nullptr || - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values == nullptr) { - tse_log_error("alloc mem failed, size(%lu)", part_num * part_field * sizeof(uint32_t) - + 2 * part_num * part_field * sizeof(cache_variant_t)); - free_cbo_stats(); - return ERR_ALLOC_MEMORY; - } - } - m_part_share->cbo_stats->part_table_info.rows_and_blocks_size = rows_and_blocks_num * sizeof(uint32_t); - m_part_share->cbo_stats->part_table_info.num_distinct_size = part_num * part_field * sizeof(uint32_t); - m_part_share->cbo_stats->part_table_info.low_value_size = part_num * part_field * sizeof(cache_variant_t); - m_part_share->cbo_stats->part_table_info.high_value_size = part_num * part_field * sizeof(cache_variant_t); + *m_part_share->cbo_stats = {0, 0, 0, 0, 0, 0, nullptr, nullptr}; + + m_part_share->cbo_stats->part_cnt = part_num; + m_part_share->cbo_stats->tse_cbo_stats_part_table = + (tse_cbo_stats_table_t*)my_malloc(PSI_NOT_INSTRUMENTED, part_num * sizeof(tse_cbo_stats_table_t), MYF(MY_WME)); + + for (uint i = 0; i < part_num; i++) { + m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = + (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); + } + m_part_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); return CT_SUCCESS; } - int ha_tsepart::get_cbo_stats_4share() { THD *thd = ha_thd(); int ret = CT_SUCCESS; time_t now = time(nullptr); - if (m_part_share->need_fetch_cbo || now - m_part_share->get_cbo_time > 60) { + if (m_part_share->need_fetch_cbo || now - m_part_share->get_cbo_time > ctc_update_analyze_time) { if (m_tch.ctx_addr == INVALID_VALUE64) { char user_name[SMALL_RECORD_SIZE]; tse_split_normalized_name(table->s->normalized_path.str, user_name, SMALL_RECORD_SIZE, nullptr, 0, nullptr); @@ -1023,6 +999,22 @@ int ha_tsepart::get_cbo_stats_4share() } } update_member_tch(m_tch, get_tse_hton(), thd); + + uint32_t part_per_cnt = MAX_MESSAGE_SIZE / (table->s->fields * sizeof(tse_cbo_stats_column_t) + CBO_PART_MEM_RESIDUAL); + uint32_t part_cnt = m_part_share->cbo_stats->part_cnt; + uint32_t fetch_times = part_cnt / part_per_cnt; + m_part_share->cbo_stats->first_partid = 0; + m_part_share->cbo_stats->num_part_fetch = part_per_cnt; + + for (uint32_t i = 0; icbo_stats); + if (ret != CT_SUCCESS) { + return ret; + } + m_part_share->cbo_stats->first_partid += part_per_cnt; + } + + m_part_share->cbo_stats->num_part_fetch = part_cnt - m_part_share->cbo_stats->first_partid; ret = tse_get_cbo_stats(&m_tch, m_part_share->cbo_stats); update_sess_ctx_by_tch(m_tch, get_tse_hton(), thd); if (ret == CT_SUCCESS && m_part_share->cbo_stats->is_updated) { @@ -1039,41 +1031,17 @@ void ha_tsepart::free_cbo_stats() { return; } - if (m_part_share->cbo_stats->part_table_info.low_value_size <= MAX_MESSAGE_SIZE) { - // 释放m_part_share->cbo_stats指向的共享内存 - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = nullptr; - } - - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats); - } else { - // 释放m_part_share->cbo_stats指向的普通内存 - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = nullptr; - } - - free(m_part_share->cbo_stats); + uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + for (uint i = 0; i < part_num; i++) { + my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns); + m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = nullptr; } + my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table); + m_part_share->cbo_stats->tse_cbo_stats_part_table = nullptr; + my_free(m_part_share->cbo_stats); m_part_share->cbo_stats = nullptr; + } int ha_tsepart::check(THD *, HA_CHECK_OPT *) diff --git a/storage/tianchi/srv_mq_msg.h b/storage/tianchi/srv_mq_msg.h index 06d771e645a5d6756d60e65d666b2b5f371e4112..95d5e0e92769597140ddf226d103c32373001d58 100644 --- a/storage/tianchi/srv_mq_msg.h +++ b/storage/tianchi/srv_mq_msg.h @@ -32,7 +32,6 @@ extern "C" { #define TSE_MQ_MESSAGE_SLICE_LEN 102400 #define MAX_LOB_LOCATOR_SIZE 4000 // 存储引擎存储blob对象结构体最大长度 -#define MAX_MESSAGE_SIZE 491520 // 共享内存最大可申请空间大小 #define REG_MISMATCH_CTC_VERSION 501 #define REG_ALLOC_INST_ID_FAILED 502 diff --git a/storage/tianchi/tse_cbo.cc b/storage/tianchi/tse_cbo.cc index 95bf3fe48bf81a9edf89d8c2b9c11426fdc3bf1c..b472748a1871e1ca1a30d162c44b61bd106c0b84 100644 --- a/storage/tianchi/tse_cbo.cc +++ b/storage/tianchi/tse_cbo.cc @@ -20,31 +20,43 @@ #include "sql/field.h" #include "tse_srv_mq_module.h" -void r_key2variant(tse_range_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val) { - enum_field_types field_type = cur_index_part->field->real_type(); +void r_key2variant(tse_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, cache_variant_t * value, uint32_t key_offset) +{ + ret_val->is_null = 0; + if (rKey->cmp_type == CMP_TYPE_NULL) { + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } - uint16_t offset = 0; + enum_field_types field_type = cur_index_part->field->real_type(); + ret_val->type = field_type; + uint32_t offset = 0; if (cur_index_part->field->is_nullable()) { + /* The first byte in the field tells if this is an SQL NULL value */ + if(*(rKey->key + key_offset) == 1) { + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } offset = 1; } - - ret_val->is_null = 0; - ret_val->type = field_type; + const uchar *key = rKey->key + key_offset + offset; switch(field_type) { case MYSQL_TYPE_TINY: case MYSQL_TYPE_SHORT: case MYSQL_TYPE_LONG: - ret_val->v_int = *(int32_t *)const_cast(rKey->key + offset); - break; + ret_val->v_int = *(int32_t *)const_cast(key); + break; case MYSQL_TYPE_FLOAT: - ret_val->v_real = *(float *)const_cast(rKey->key + offset); + ret_val->v_real = *(float *)const_cast(key); break; case MYSQL_TYPE_DOUBLE: - ret_val->v_real = *(double *)const_cast(rKey->key + offset); + ret_val->v_real = *(double *)const_cast(key); break; case MYSQL_TYPE_LONGLONG: - ret_val->v_bigint = *(int64_t *)const_cast(rKey->key + offset); + ret_val->v_bigint = *(int64_t *)const_cast(key); break; default: ret_val->is_null = 1; @@ -52,180 +64,342 @@ void r_key2variant(tse_range_key *rKey, KEY_PART_INFO *cur_index_part, cache_var } } -double eval_density_result(cache_variant_t *result) +en_tse_compare_type compare(cache_variant_t *right, cache_variant_t *left, enum_field_types field_type) +{ + double compare_value = 0; + switch(field_type) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + compare_value = right->v_int - left->v_int; + break; + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + compare_value = (right->v_real - left->v_real); + break; + case MYSQL_TYPE_LONGLONG: + compare_value = (right->v_bigint - left->v_bigint); + break; + default: + return UNCOMPARABLE; + } + if (abs(compare_value) < REAL_EPSINON) { + return EQUAL; + } else if (compare_value > 0) { + return GREAT; + } + return LESS; +} + +double eval_density_result(double density) { /* * key range is beyond the actual index range, * don't have any records in this range */ - if (result->v_real < 0) { - return 0; + if (density < 0) { + return 0; } /* * key range is larger than the actual index range, * any key with this range shoule be deemed as not selective */ - if (result->v_real > 1) { - return 1; + if (density > 1) { + return 1; } - return result->v_real; + return density; } -int calc_density_low(KEY_PART_INFO *cur_index_part, cache_variant_t *high_val, cache_variant_t *low_val, - cache_variant_t *left_val, cache_variant_t *right_val, cache_variant_t *result) +static double calc_frequency_hist_equal_density(tse_cbo_stats_column_t *col_stat, cache_variant_t *val, + enum_field_types field_type) { - double density = DEFAULT_RANGE_DENSITY; - if (low_val->is_null == 1 || right_val->is_null == 1) { - return ERR_SQL_SYNTAX_ERROR; - } - - enum_field_types field_type = cur_index_part->field->real_type(); + en_tse_compare_type cmp_result; + int64 result = 0; + double density = col_stat->density; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + for (uint32 i = 0; i < col_stat->hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, val, field_type); - double numerator, denominator; - switch(field_type) { - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_LONG: - numerator = (int64_t)left_val->v_int - (int64_t)right_val->v_int; - denominator = (int64_t)high_val->v_int - (int64_t)low_val->v_int; - density = (double)numerator/(double)denominator; + if (cmp_result == EQUAL) { + result = (i == 0) ? hist_infos[i].ep_number : hist_infos[i].ep_number - hist_infos[i - 1].ep_number; break; - case MYSQL_TYPE_FLOAT: - case MYSQL_TYPE_DOUBLE: - numerator = left_val->v_real - right_val->v_real; - denominator = high_val->v_real - low_val->v_real; - density = (double)numerator/(double)denominator; - break; - case MYSQL_TYPE_LONGLONG: - numerator = (int64_t)left_val->v_bigint - (int64_t)right_val->v_bigint; - denominator = (int64_t)high_val->v_bigint - (int64_t)low_val->v_bigint; - density = (double)numerator/(double)denominator; - break; - default: + } else if (cmp_result == GREAT) { break; + } } - result->v_real = density; + uint32 end_pos = col_stat->hist_count - 1; + if (result > 0 && hist_infos[end_pos].ep_number > 0) { + density = (double)result / hist_infos[end_pos].ep_number; + } else if (result == 0) { + density = 0; + //calc_density_by_sample_rate(stmt, entity, &density); + } + return density; +} + +static double calc_balance_hist_equal_density(tse_cbo_stats_column_t *col_stat, cache_variant_t *val, + enum_field_types field_type) +{ + uint32 popular_count = 0; + en_tse_compare_type cmp_result; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + for (uint32 i = 0; i < col_stat->hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, val, field_type); - return CT_SUCCESS; + if (cmp_result == EQUAL) { + // ep_number is different from oracle, when compress balance histogram, need to change this + popular_count++; + } else if (cmp_result == GREAT) { + break; + } + } + if (popular_count > 1) { + return (double)popular_count / col_stat->num_buckets; + } + return col_stat->density; } -double calc_density_by_cond(tianchi_cbo_stats_t *cbo_stats, KEY *cur_index, - tse_range_key *tse_min_key, tse_range_key *tse_max_key, part_info_t part_info) { - double default_density = DEFAULT_RANGE_DENSITY; - KEY_PART_INFO cur_index_part = cur_index->key_part[0]; - uint32 col_id = cur_index_part.field->field_index(); - uint32_t part_id = part_info.part_id; - uint32_t subpart_id = part_info.subpart_id; - uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; +static double calc_equal_null_density(tse_cbo_stats_table_t cbo_stats, uint32 col_id, bool is_null) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = (double)col_stat->num_null / cbo_stats.estimate_rows; + return is_null ? density : (double)1 - density; +} - if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] == 0) { - return DEFAULT_RANGE_DENSITY; +double calc_hist_equal_density(tse_cbo_stats_table_t cbo_stats, cache_variant_t *val, + uint32 col_id, enum_field_types field_type) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; + } + if (col_stat->hist_type == FREQUENCY_HIST) { + // HISTOGRAM_FREQUENCY + density = calc_frequency_hist_equal_density(col_stat, val, field_type); + } else { + // HISTOGRAM_BALANCE + density = calc_balance_hist_equal_density(col_stat, val, field_type); } + return density; +} - uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? - (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : - total_part_count * col_id + part_id; +static double calc_hist_between_frequency(tse_cbo_stats_table_t cbo_stats, field_stats_val stats_val, enum_field_types field_type, uint32 col_id) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; + } - if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] == 0) { - return DEFAULT_RANGE_DENSITY; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + uint32 end_pos = hist_count - 1; + int64 total_nums = hist_infos[end_pos].ep_number; + int64 low_nums = 0; + int64 high_nums = total_nums; + en_tse_compare_type cmp_result; + + // HISTOGRAM_FREQUNCEY + for (uint32 i = 0; i < hist_count; i++) { + + cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type); + if ((stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) + || (stats_val.min_type == CMP_TYPE_OPEN_INTERNAL && cmp_result == GREAT)) { + if (i > 0) { + low_nums = hist_infos[i - 1].ep_number; + } + low_nums = total_nums - low_nums; + break; + } } - cache_variant_t *low_val, *high_val; - cache_variant_t result; + for (uint32 i = 0; i < hist_count; i++) { - if (IS_TSE_PART(part_id)) { - low_val = &(cbo_stats->tse_cbo_stats_table.part_table_low_values[index_no]); - high_val = &(cbo_stats->tse_cbo_stats_table.part_table_high_values[index_no]); - } else { - low_val = cbo_stats->tse_cbo_stats_table.low_values + col_id; - high_val = cbo_stats->tse_cbo_stats_table.high_values + col_id; + cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type); + + if ((stats_val.max_type == CMP_TYPE_OPEN_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) + || (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL && cmp_result == GREAT)) { + high_nums = (i == 0) ? 0 : hist_infos[i - 1].ep_number; + break; + } } - if (tse_min_key->cmp_type == CMP_TYPE_GREAT && tse_max_key->cmp_type == CMP_TYPE_LESS) { - cache_variant_t min_key_val, max_key_val; - r_key2variant(tse_min_key, &cur_index_part, &min_key_val); - r_key2variant(tse_max_key, &cur_index_part, &max_key_val); + if (total_nums > 0) { + return ((double)(low_nums + high_nums - total_nums) / total_nums) ; + } else { + return density; + } + +} - if (calc_density_low(&cur_index_part, high_val, low_val, &max_key_val, &min_key_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); +static int calc_hist_range_boundary(field_stats_val stats_val, enum_field_types field_type, tse_cbo_stats_column_t *col_stat, +double *percent) +{ + en_tse_compare_type cmp_result; + uint32 i, lo_pos, hi_pos; + uint32 hist_count = col_stat->hist_count; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + + + lo_pos = hi_pos = hist_count - 1; + + for (i = 0; i < hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type); + + if (cmp_result == GREAT) { + lo_pos = i; + break; + } } - if (tse_min_key->cmp_type == CMP_TYPE_GREAT) { - cache_variant_t min_key_val; - r_key2variant(tse_min_key, &cur_index_part, &min_key_val); + if (stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL) { + *percent += calc_balance_hist_equal_density(col_stat, stats_val.min_key_val, field_type); + } - if (calc_density_low(&cur_index_part, high_val, low_val, high_val, &min_key_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); + for (i = lo_pos; i < hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type); + + if (cmp_result == GREAT || cmp_result == EQUAL) { + hi_pos = i; + break; + } + } + + if (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL) { + *percent += calc_balance_hist_equal_density(col_stat, stats_val.max_key_val, field_type); } - if (tse_max_key->cmp_type == CMP_TYPE_LESS) { - cache_variant_t max_key_val; - r_key2variant(tse_max_key, &cur_index_part, &max_key_val); + return hi_pos - lo_pos; +} - if (calc_density_low(&cur_index_part, high_val, low_val, &max_key_val, low_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); +static double calc_hist_between_balance(tse_cbo_stats_table_t cbo_stats, field_stats_val stats_val, enum_field_types field_type, uint32 col_id) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; } + double percent = 0; - return default_density; + int bucket_range = calc_hist_range_boundary(stats_val, field_type, col_stat, &percent); + + density = (double)bucket_range / col_stat->num_buckets + percent; + return density; } -double calc_density_one_table(uint16_t idx_id, tse_range_key *min_key, tse_range_key *max_key, - part_info_t part_info, tianchi_cbo_stats_t *cbo_stats, const TABLE &table) +static double calc_hist_between_density(tse_cbo_stats_table_t cbo_stats, + uint32 col_id, enum_field_types field_type, field_stats_val stats_val) { - double density = DEFAULT_RANGE_DENSITY; - if (!cbo_stats->is_updated) { - tse_log_debug("table %s has not been analyzed", table.alias); - return density; + double density; + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + if (col_stat->hist_type == FREQUENCY_HIST) { + // HISTOGRAM_FREQUENCY + density = calc_hist_between_frequency(cbo_stats, stats_val, field_type, col_id); + } else { + // HISTOGRAM_BALANCE + density = calc_hist_between_balance(cbo_stats, stats_val, field_type, col_id); + } + return density; +} + +double calc_density_by_cond(tse_cbo_stats_table_t cbo_stats, KEY_PART_INFO cur_index_part, tse_range_key *key, + uint32_t key_offset) +{ + double density = DEFAULT_RANGE_DENSITY; + uint32 col_id = cur_index_part.field->field_index(); + tse_key *min_key = key->min_key; + tse_key *max_key = key->max_key; + + if (cur_index_part.field->is_nullable()) { + if (*(min_key->key + key_offset) == 1 && max_key->cmp_type == CMP_TYPE_NULL) { + return calc_equal_null_density(cbo_stats, col_id, false); } - uint32_t part_id = part_info.part_id; - uint32_t subpart_id = part_info.subpart_id; - uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; - - uint32 col_id; - if (min_key->cmp_type == CMP_TYPE_EQUAL) { - double col_product = 1.0; - uint64_t col_map = min_key->col_map; - KEY cur_index = table.key_info[idx_id]; - /* - * For all columns in used index, - * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) - */ - for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { - if (col_map & ((uint64_t)1 << idx_col_num)) { - KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; - col_id = cur_index_part.field->field_index(); - if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] != 0) { - col_product = col_product * cbo_stats->tse_cbo_stats_table.num_distincts[col_id]; - } - - uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? - (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : - total_part_count * col_id + part_id; - - if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] != 0) { - col_product = col_product * cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no]; - } - } - } - density = 1.0 / col_product; - } else { - KEY cur_index = table.key_info[idx_id]; - density = calc_density_by_cond(cbo_stats, &cur_index, min_key, max_key, part_info); + + if (*(min_key->key + key_offset) == 1 && *(max_key->key + key_offset) == 1) { + return calc_equal_null_density(cbo_stats, col_id, true); } - /* - * This is a safe-guard logic since we don't handle tse call error in this method, - * we need this to make sure that our optimizer continue to work even when we - * miscalculated the density, and it's still prefer index read - */ - if (density < 0.0 || density > 1.0) { - density = PREFER_RANGE_DENSITY; + } + + cache_variant_t *low_val; + cache_variant_t *high_val; + + low_val = &cbo_stats.columns[col_id].low_value; + high_val = &cbo_stats.columns[col_id].high_value; + + cache_variant_t min_key_val; + cache_variant_t max_key_val; + r_key2variant(min_key, &cur_index_part, &min_key_val, low_val, key_offset); + r_key2variant(max_key, &cur_index_part, &max_key_val, high_val, key_offset); + enum_field_types field_type = cur_index_part.field->real_type(); + if (compare(&max_key_val, low_val, field_type) == LESS || compare(&min_key_val, high_val, field_type) == GREAT) { + return 0; + } + en_tse_compare_type comapare_value = compare(&max_key_val, &min_key_val, field_type); + if (comapare_value == EQUAL && min_key->cmp_type == CMP_TYPE_CLOSE_INTERNAL && + max_key->cmp_type == CMP_TYPE_CLOSE_INTERNAL) { + return calc_hist_equal_density(cbo_stats, &max_key_val, col_id, field_type); + } else if (comapare_value == UNCOMPARABLE) { + return DEFAULT_RANGE_DENSITY; + } else if (comapare_value == LESS) { + return 0; + } + + field_stats_val stats_val = {min_key->cmp_type, max_key->cmp_type, &max_key_val, &min_key_val}; + density = calc_hist_between_density(cbo_stats, col_id, field_type, stats_val); + + return density; +} + +double calc_density_one_table(uint16_t idx_id, tse_range_key *key, + tse_cbo_stats_table_t cbo_stats, const TABLE &table) +{ + double density = 1.0; + uint32 col_id; + uint32_t key_offset = 0;//列在索引中的偏移量 + uint64_t col_map = max(key->min_key->col_map, key->max_key->col_map); + uint32_t key_len = max(key->min_key->len, key->max_key->len); + KEY cur_index = table.key_info[idx_id]; + + /* + * For all columns in used index, + * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) + */ + for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { + double col_product = 1.0; + if (col_map & ((uint64_t)1 << idx_col_num)) { + KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; + col_id = cur_index_part.field->field_index(); + uint32_t offset = cur_index_part.field->is_nullable() ? 1 : 0;//null值标记位 + + if (cbo_stats.columns[col_id].total_rows == 0) { //空表 + col_product = 0; + } else if (key_offset + offset + cur_index_part.field->key_length() == key_len) {// + col_product = calc_density_by_cond(cbo_stats, cur_index_part, key, key_offset); + } else if ((offset == 1) && *(key->min_key->key + key_offset) == 1) { //null值 + col_product = calc_equal_null_density(cbo_stats, col_id, true); + } else { + col_product = calc_density_by_cond(cbo_stats, cur_index_part, key, key_offset);//联合索引 + // col_product = calc_equal_density(part_info, QUERY_TYPE_EQUAL, cbo_stats, col_id); + // col_product = calc_hist_equal_density(cbo_stats, &max_key_val, col_id, field_type); + } + col_product = eval_density_result(col_product); + key_offset += (offset + cur_index_part.field->key_length()); + density = density * col_product; } - return density; -} \ No newline at end of file + } + + /* + * This is a safe-guard logic since we don't handle tse call error in this method, + * we need this to make sure that our optimizer continue to work even when we + * miscalculated the density, and it's still prefer index read + */ + if (density < 0.0 || density > 1.0) { + density = PREFER_RANGE_DENSITY; + } + return density; +} + diff --git a/storage/tianchi/tse_cbo.h b/storage/tianchi/tse_cbo.h index 4fee771cc21de10fdc1e0cfca1d6d679dee0d01d..ee2452c444dfd8a0a2cd8b2579bcab22ad4425b5 100644 --- a/storage/tianchi/tse_cbo.h +++ b/storage/tianchi/tse_cbo.h @@ -23,22 +23,46 @@ #include "sql/dd/types/table.h" #include "srv_mq_msg.h" +#define REAL_EPSINON 0.00001 + +typedef enum en_tse_compare_type { + GREAT = 0, + EQUAL, + LESS, + UNCOMPARABLE +} compare_type; + +typedef enum en_tse_query_type { + QUERY_TYPE_NULL = 0, + QUERY_TYPE_NOT_NULL, + QUERY_TYPE_EQUAL +} query_type; typedef enum en_tse_cmp_type { - CMP_TYPE_UNKNOWN = 0, - CMP_TYPE_EQUAL, - CMP_TYPE_GREAT, - CMP_TYPE_LESS + CMP_TYPE_NULL = 0, + CMP_TYPE_OPEN_INTERNAL, + CMP_TYPE_CLOSE_INTERNAL } tse_cmp_type_t; - -/* range key type */ + typedef struct { - const char *key; + const uchar *key; uint len; tse_cmp_type_t cmp_type; uint64_t col_map; +} tse_key; + +typedef struct { + tse_key *min_key; + tse_key *max_key; } tse_range_key; +typedef struct { + tse_cmp_type_t min_type; + tse_cmp_type_t max_type; + cache_variant_t *max_key_val; + cache_variant_t *min_key_val; +} field_stats_val; + typedef struct { uint32_t part_id; uint32_t subpart_id; @@ -46,7 +70,7 @@ typedef struct { uint32_t subpart_num; } part_info_t; -double calc_density_one_table(uint16_t idx_id, tse_range_key *min_key, tse_range_key *max_key, - part_info_t part_info, tianchi_cbo_stats_t *cbo_stats, const TABLE &table); +double calc_density_one_table(uint16_t idx_id, tse_range_key *key, + tse_cbo_stats_table_t cbo_stats, const TABLE &table); #endif diff --git a/storage/tianchi/tse_srv.h b/storage/tianchi/tse_srv.h index c0edada345d6cd879aebbc05fff56ef7698de33d..eb8ec85f0ab43a2427a9314922503851a188a7c1 100644 --- a/storage/tianchi/tse_srv.h +++ b/storage/tianchi/tse_srv.h @@ -33,6 +33,8 @@ extern "C" { #define SENSI_INFO #endif +#define CBO_PART_MEM_RESIDUAL 1000 +#define STATS_HISTGRAM_MAX_SIZE 254 #define SMALL_RECORD_SIZE 128 // 表名、库名等长度不会特别大,取128 #define ERROR_MESSAGE_LEN 512 #define MAX_DDL_SQL_LEN_CONTEXT (129024) // 126kb, 预留2kb @@ -55,6 +57,7 @@ extern "C" { #define IS_TSE_PART(part_id) ((part_id) < (PART_CURSOR_NUM)) #define MAX_BULK_INSERT_PART_ROWS 128 #define SESSION_CURSOR_NUM (8192 * 2) +#define MAX_MESSAGE_SIZE 52428800 // 共享内存最大可申请空间大小 // for broadcast_req.options #define TSE_SET_VARIABLE_PERSIST (0x1 << 8) @@ -122,39 +125,50 @@ typedef struct cache_st_variant { }; } cache_variant_t; +typedef enum { + FREQUENCY_HIST = 0, + HEIGHT_BALANCED_HIST = 1, +} tse_cbo_hist_type_t; + +typedef struct { + cache_variant_t ep_value; + int ep_number; +} tse_cbo_column_hist_t; + +typedef struct { + uint32_t total_rows; + uint32_t num_buckets; + uint32_t num_distinct; + uint32_t num_null; + double density; + tse_cbo_hist_type_t hist_type; + uint32_t hist_count; + tse_cbo_column_hist_t column_hist[STATS_HISTGRAM_MAX_SIZE]; // Column histogram statistics (array) + cache_variant_t low_value; + cache_variant_t high_value; +} tse_cbo_stats_column_t; + /** * cache info that can expand this struct * if need more cbo stats cache */ typedef struct { - uint32_t max_col_id; - uint32_t *num_distincts; - cache_variant_t *low_values; - cache_variant_t *high_values; - uint32_t max_part_no; // the part no of max rows num part - uint32_t *part_table_num_distincts; - cache_variant_t *part_table_low_values; - cache_variant_t *part_table_high_values; + uint32_t estimate_rows; + tse_cbo_stats_column_t *columns; } tse_cbo_stats_table_t; -typedef struct { - uint32_t rows_and_blocks_size; - uint32_t num_distinct_size; - uint32_t low_value_size; - uint32_t high_value_size; -} tianchi_part_table_cbo_info_t; - /* * statistics information that mysql optimizer need * expand this struct if need more cbo stats */ typedef struct { - uint32_t estimate_rows; - uint32_t estimate_blocks; + uint16_t first_partid; + uint16_t num_part_fetch; + uint32_t part_cnt; + uint32_t msg_len; bool is_updated; tse_cbo_stats_table_t tse_cbo_stats_table; - tianchi_part_table_cbo_info_t part_table_info; - uint32_t estimate_part_rows_and_blocks[0]; + tse_cbo_stats_table_t *tse_cbo_stats_part_table; } tianchi_cbo_stats_t; #pragma pack() @@ -289,7 +303,6 @@ enum TSE_FUNC_TYPE { TSE_FUNC_TYPE_POSITION, TSE_FUNC_TYPE_DELETE_ALL_ROWS, TSE_FUNC_TYPE_GET_CBO_STATS, - TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS, TSE_FUNC_TYPE_WRITE_LOB, TSE_FUNC_TYPE_READ_LOB, TSE_FUNC_TYPE_CREATE_TABLE, diff --git a/storage/tianchi/tse_srv_mq_stub.cc b/storage/tianchi/tse_srv_mq_stub.cc index 6aaaa24740e4d14a2bfac8a5dc487001e420689d..35b9ba640e21dc32da92fd7220e07b4224d889ae 100644 --- a/storage/tianchi/tse_srv_mq_stub.cc +++ b/storage/tianchi/tse_srv_mq_stub.cc @@ -203,7 +203,6 @@ int tse_update_row(tianchi_handler_t *tch, uint16_t new_record_len, const uint8_ req->col_num = col_num; req->new_record = const_cast(new_record); req->flag = flag; - memcpy(req->upd_cols, upd_cols, sizeof(uint16_t) * col_num); int result = ERR_CONNECTION_FAILED; int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_UPDATE_ROW, req, tch->msg_buf); @@ -740,64 +739,65 @@ int tse_analyze_table(tianchi_handler_t *tch, const char *db_name, const char *t return result; } -int tse_get_huge_part_table_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats) { - void *shm_inst = get_one_shm_inst(tch); - uint32_t request_size = sizeof(get_cbo_stats_request) + sizeof(tianchi_cbo_stats_t) - + stats->part_table_info.rows_and_blocks_size; - uint32_t req_size = request_size + stats->part_table_info.high_value_size - + stats->part_table_info.num_distinct_size - + stats->part_table_info.low_value_size; - uint8_t *req_buf = new uint8_t[req_size]; - get_cbo_stats_request *req = (get_cbo_stats_request *)req_buf; - req->tch = *tch; - memcpy(req_buf + sizeof(get_cbo_stats_request), stats, - sizeof(tianchi_cbo_stats_t) + stats->part_table_info.rows_and_blocks_size); - uint8_t *stats_offset = req_buf + sizeof(get_cbo_stats_request); - req->stats = (tianchi_cbo_stats_t *)stats_offset; - - int result = ERR_CONNECTION_FAILED; - int ret = tse_mq_batch_send_message(shm_inst, TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS, req_buf, - request_size, req_size); - if (ret != CT_SUCCESS) { - result = ret; - tse_log_error("tse_mq_batch_send_message failed in get_huge_part_table_cbo_stats: %d", ret); - } else if (req->result == CT_SUCCESS) { - // 此时req指向的参天区域,需要将其指向mysql数据区 - req->stats = (tianchi_cbo_stats_t *)stats_offset; - req->stats->tse_cbo_stats_table.part_table_num_distincts = stats->tse_cbo_stats_table.part_table_num_distincts; - req->stats->tse_cbo_stats_table.part_table_low_values = stats->tse_cbo_stats_table.part_table_low_values; - req->stats->tse_cbo_stats_table.part_table_high_values = stats->tse_cbo_stats_table.part_table_high_values; - - *tch = req->tch; - memcpy(stats, req_buf + sizeof(get_cbo_stats_request), req_size - sizeof(get_cbo_stats_request)); - result = req->result; - } - delete[] req_buf; - return result; -} - int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats) { - if (stats->part_table_info.low_value_size > MAX_MESSAGE_SIZE) { - return tse_get_huge_part_table_cbo_stats(tch, stats); - } void *shm_inst = get_one_shm_inst(tch); get_cbo_stats_request *req = (get_cbo_stats_request*)alloc_share_mem(shm_inst, sizeof(get_cbo_stats_request)); + if (req == NULL) { tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(get_cbo_stats_request)); return ERR_ALLOC_MEMORY; } - req->tch = *tch; - req->stats = stats; + req->stats = (tianchi_cbo_stats_t *)alloc_share_mem(shm_inst, sizeof(tianchi_cbo_stats_t)); + if (req->stats == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(get_cbo_stats_request)); + return ERR_ALLOC_MEMORY; + } + + bool is_part_table = stats->tse_cbo_stats_part_table != nullptr ? true : false; + req->stats->msg_len = stats->msg_len; + if (!is_part_table) { + req->stats->tse_cbo_stats_table.columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst, req->stats->msg_len); + } else { + req->stats->first_partid = stats->first_partid; + req->stats->num_part_fetch = stats->num_part_fetch; + req->stats->tse_cbo_stats_part_table = + (tse_cbo_stats_table_t*)alloc_share_mem(shm_inst, stats->num_part_fetch * sizeof(tse_cbo_stats_table_t)); + for (uint i = 0; i < stats->num_part_fetch; i++) { + req->stats->tse_cbo_stats_part_table[i].columns = + (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst, stats->msg_len); + } + } + + req->tch = *tch; int result = ERR_CONNECTION_FAILED; int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_GET_CBO_STATS, req, tch->msg_buf); if (ret == CT_SUCCESS) { if (req->result == CT_SUCCESS) { - *tch = req->tch; - stats = req->stats; + if (!is_part_table) { + *tch = req->tch; + memcpy(stats->tse_cbo_stats_table.columns, req->stats->tse_cbo_stats_table.columns, stats->msg_len); + stats->is_updated = req->stats->is_updated; + stats->tse_cbo_stats_table.estimate_rows = req->stats->tse_cbo_stats_table.estimate_rows; + } else { + stats->is_updated = req->stats->is_updated; + for (uint i = 0; i < stats->num_part_fetch; i++) { + stats->tse_cbo_stats_part_table[i+stats->first_partid].estimate_rows = req->stats->tse_cbo_stats_part_table[i].estimate_rows; + memcpy(stats->tse_cbo_stats_part_table[i+stats->first_partid].columns, req->stats->tse_cbo_stats_part_table[i].columns, stats->msg_len); + } + } } result = req->result; } + if (!is_part_table) { + free_share_mem(shm_inst, req->stats->tse_cbo_stats_table.columns); + } else { + for (uint i = 0; i < stats->num_part_fetch; i++) { + free_share_mem(shm_inst, req->stats->tse_cbo_stats_part_table[i].columns); + } + free_share_mem(shm_inst, req->stats->tse_cbo_stats_part_table); + } + free_share_mem(shm_inst, req->stats); free_share_mem(shm_inst, req); return result; } @@ -1457,25 +1457,3 @@ int ctc_record_sql_for_cantian(tianchi_handler_t *tch, tse_ddl_broadcast_request free_share_mem(shm_inst, req); return result; } - -int tse_query_cluster_role(bool *is_slave, bool *cantian_cluster_ready) { - void *shm_inst = get_one_shm_inst(NULL); - query_cluster_role_request *req = (query_cluster_role_request*) alloc_share_mem(shm_inst, sizeof(query_cluster_role_request)); - DBUG_EXECUTE_IF("check_init_shm_oom", { req = NULL; }); - if (req == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(query_cluster_role_request)); - return ERR_ALLOC_MEMORY; - } - - int result = ERR_CONNECTION_FAILED; - int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_QUERY_CLUSTER_ROLE, req, nullptr); - if (ret == CT_SUCCESS) { - result = req->result; - *is_slave = req->is_slave; - *cantian_cluster_ready = req->cluster_ready; - } - free_share_mem(shm_inst, req); - tse_log_system("[Disaster Recovery] is_slave: %d", *is_slave); - - return result; -} diff --git a/storage/tianchi/tse_stats.cc b/storage/tianchi/tse_stats.cc index 90827113109bd7770a95c18dea39af7e9302a314..5c1560cdc78bd43aad73784e657ddf6d7b222ba7 100644 --- a/storage/tianchi/tse_stats.cc +++ b/storage/tianchi/tse_stats.cc @@ -49,7 +49,6 @@ const char *ctc_interface_strs[] = { "TSE_FUNC_TYPE_POSITION", "TSE_FUNC_TYPE_DELETE_ALL_ROWS", "TSE_FUNC_TYPE_GET_CBO_STATS", - "TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS", "TSE_FUNC_TYPE_WRITE_LOB", "TSE_FUNC_TYPE_READ_LOB", "TSE_FUNC_TYPE_CREATE_TABLE",