From c18b5566594718e5526f2f369879263c8afd7df8 Mon Sep 17 00:00:00 2001 From: Jiachen1018 Date: Thu, 21 Mar 2024 15:59:27 +0800 Subject: [PATCH] cbo range --- storage/tianchi/tse_cbo.cc | 47 ++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/storage/tianchi/tse_cbo.cc b/storage/tianchi/tse_cbo.cc index 7bb6391..02a5dcb 100644 --- a/storage/tianchi/tse_cbo.cc +++ b/storage/tianchi/tse_cbo.cc @@ -235,15 +235,40 @@ static double calc_hist_between_frequency(tse_cbo_stats_table_t cbo_stats, field } +double percent_in_bucket(cache_variant_t *max_val, cache_variant_t *val, cache_variant_t *min_val, enum_field_types field_type) +{ + if (compare(max_val, min_val, field_type) == EQUAL) { + return 1; + } + double percent = 0; + switch(field_type) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + percent = (double)(val->v_int - min_val->v_int) / (max_val->v_int - min_val->v_int); + break; + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + percent = (double)(val->v_real - min_val->v_real) / (max_val->v_real - min_val->v_real); + break; + case MYSQL_TYPE_LONGLONG: + percent = (double)(val->v_bigint - min_val->v_bigint) / (max_val->v_bigint - min_val->v_bigint); + break; + default: + return DEFAULT_RANGE_DENSITY; + } + + return percent; +} + static int calc_hist_range_boundary(field_stats_val stats_val, enum_field_types field_type, tse_cbo_stats_column_t *col_stat, -double *percent) + double *percent, cache_variant_t *low_val) { en_tse_compare_type cmp_result; uint32 i, lo_pos, hi_pos; uint32 hist_count = col_stat->hist_count; tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; - lo_pos = hi_pos = hist_count - 1; for (i = 0; i < hist_count; i++) { @@ -251,12 +276,10 @@ double *percent) if (cmp_result == GREAT) { lo_pos = i; + *percent -= percent_in_bucket(&hist_infos[i].ep_value, stats_val.min_key_val, low_val, field_type); break; } - } - - if (stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL) { - *percent += calc_balance_hist_equal_density(col_stat, stats_val.min_key_val, field_type); + low_val = &hist_infos[i].ep_value; } for (i = lo_pos; i < hist_count; i++) { @@ -264,8 +287,18 @@ double *percent) if (cmp_result == GREAT || cmp_result == EQUAL) { hi_pos = i; + *percent += percent_in_bucket(&hist_infos[i].ep_value, stats_val.max_key_val, low_val, field_type); break; } + low_val = &hist_infos[i].ep_value; + } + + if (col_stat->num_buckets > 0) { + *percent = *percent / col_stat->num_buckets; + } + + if (stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL) { + *percent += calc_balance_hist_equal_density(col_stat, stats_val.min_key_val, field_type); } if (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL) { @@ -285,7 +318,7 @@ static double calc_hist_between_balance(tse_cbo_stats_table_t cbo_stats, field_s } double percent = 0; - int bucket_range = calc_hist_range_boundary(stats_val, field_type, col_stat, &percent); + int bucket_range = calc_hist_range_boundary(stats_val, field_type, col_stat, &percent, &cbo_stats.columns[col_id].low_value); density = (double)bucket_range / col_stat->num_buckets + percent; return density; -- Gitee