diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini index 37e57f5f0bf6941cdd0bc9559e1ad484aeaa9857..67596e1cf1a125064102f6a1f0a3c880b8b163ed 100644 --- a/src/common/backend/catalog/builtin_funcs.ini +++ b/src/common/backend/catalog/builtin_funcs.ini @@ -5254,6 +5254,23 @@ AddBuiltinFunc(_0(2311), _1("md5"), _2(1), _3(true), _4(false), _5(md5_text), _6(25), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 25), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("md5_text"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)), AddBuiltinFunc(_0(2321), _1("md5"), _2(1), _3(true), _4(false), _5(md5_bytea), _6(25), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 17), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("md5_bytea"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)) ), + AddFuncGroup( + "median", 2, + AddBuiltinFunc(_0(5555), _1("median"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 701), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)), + AddBuiltinFunc(_0(5556), _1("median"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1186), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)) + ), + AddFuncGroup( + "median_float8_finalfn", 1, + AddBuiltinFunc(_0(5557), _1("median_float8_finalfn"), _2(1), _3(false), _4(false), _5(median_float8_finalfn), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("median_float8_finalfn"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)) + ), + AddFuncGroup( + "median_interval_finalfn", 1, + AddBuiltinFunc(_0(5558), _1("median_interval_finalfn"), _2(1), _3(false), _4(false), _5(median_interval_finalfn), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("median_interval_finalfn"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)) + ), + AddFuncGroup( + "median_transfn", 1, + AddBuiltinFunc(_0(5559), _1("median_transfn"), _2(2), _3(false), _4(false), _5(median_transfn), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 2281, 2276), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("median_transfn"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)), + ), AddFuncGroup( "min", 21, AddBuiltinFunc(_0(2051), _1("min"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(2277), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2277), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false)), diff --git a/src/common/backend/utils/adt/Makefile b/src/common/backend/utils/adt/Makefile index 21c850f61979b94e19fdc3b22f0fd4f38107559c..a4eb54dd667bc5e1154d84fb9bc2405ec556c304 100755 --- a/src/common/backend/utils/adt/Makefile +++ b/src/common/backend/utils/adt/Makefile @@ -37,7 +37,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsvector.o tsvector_op.o tsvector_parser.o \ - txid.o uuid.o windowfuncs.o xml.o extended_statistics.o + txid.o uuid.o windowfuncs.o xml.o extended_statistics.o \ + median_aggs.o like.o: like.cpp like_match.cpp diff --git a/src/common/backend/utils/adt/float.cpp b/src/common/backend/utils/adt/float.cpp index c66aa67fb86f724e3279efc7079007b4f54c455e..94ddfac98a23d11c0511472b8b24f6593791dc2f 100755 --- a/src/common/backend/utils/adt/float.cpp +++ b/src/common/backend/utils/adt/float.cpp @@ -61,7 +61,7 @@ static const uint32 nan[2] = {0xffffffff, 0x7fffffff}; /* ========== USER I/O ROUTINES ========== */ static int float4_cmp_internal(float4 a, float4 b); -static int float8_cmp_internal(float8 a, float8 b); +int float8_cmp_internal(float8 a, float8 b); #ifndef HAVE_CBRT /* @@ -972,7 +972,7 @@ Datum btfloat4sortsupport(PG_FUNCTION_ARGS) /* * float8{eq,ne,lt,le,gt,ge} - float8/float8 comparison operations */ -static int float8_cmp_internal(float8 a, float8 b) +int float8_cmp_internal(float8 a, float8 b) { /* * We consider all NANs to be equal and larger than any non-NAN. This is diff --git a/src/common/backend/utils/adt/median_aggs.cpp b/src/common/backend/utils/adt/median_aggs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1ecf5029cd769494099379bdfcacfa023dbd421b --- /dev/null +++ b/src/common/backend/utils/adt/median_aggs.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * median_aggs.cpp + * Aggregate for computing the statistical median + * + * IDENTIFICATION + * src/common/backend/utils/adt/median_aggs.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "knl/knl_variable.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/median_aggs.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" + +/* + * working state for median aggregation + */ +typedef struct MedianBuildState { + MemoryContext mcontext; /* where all the temp stuff is kept */ + Datum* dvalues; /* array of accumulated Datums */ + uint32 maxlen; /* allocated length of above arrays */ + uint32 nelems; /* number of valid entries in above arrays */ + Oid dtype; /* data type of the Datums */ + int16 typlen; /* needed info about datatype */ + bool typbyval; + char typalign; +} MedianBuildState; + +static MedianBuildState* CreateMedianBuildState(Oid elemType, MemoryContext aggCtx) +{ + MemoryContext medianCtx, oldCtx; + + /* Make a temporary context to hold all the junk */ + medianCtx = AllocSetContextCreate(aggCtx, "AccumMedianSet", ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); + oldCtx = MemoryContextSwitchTo(medianCtx); + + MedianBuildState* mstate = (MedianBuildState*)palloc(sizeof(MedianBuildState)); + mstate->mcontext = medianCtx; + mstate->maxlen = 64; /* starting size */ + mstate->dvalues = (Datum*)palloc(mstate->maxlen * sizeof(Datum)); + mstate->nelems = 0; + mstate->dtype = elemType; + get_typlenbyvalalign(elemType, &mstate->typlen, &mstate->typbyval, &mstate->typalign); + + (void)MemoryContextSwitchTo(oldCtx); + + return mstate; +} + +/* + * Putting an element into working state + */ +static void MedianPutDatum(MedianBuildState* mstate, Datum dvalue) +{ + MemoryContext oldCtx = MemoryContextSwitchTo(mstate->mcontext); + + /* enlarge dvalues[] if needed */ + if (mstate->nelems >= mstate->maxlen) { + mstate->maxlen *= 2; + mstate->dvalues = (Datum*)repalloc(mstate->dvalues, mstate->maxlen * sizeof(Datum)); + } + + /* + * Ensure pass-by-ref stuff is copied into mcontext; and detoast it too if it's varlena. + */ + if (!mstate->typbyval) { + if (mstate->typlen == -1) { + dvalue = PointerGetDatum(PG_DETOAST_DATUM_COPY(dvalue)); + } else { + dvalue = datumCopy(dvalue, mstate->typbyval, mstate->typlen); + } + } + + mstate->dvalues[mstate->nelems] = dvalue; + mstate->nelems++; + + (void)MemoryContextSwitchTo(oldCtx); +} + + +/* + * MEDIAN aggregate function + */ +Datum median_transfn(PG_FUNCTION_ARGS) +{ + Oid arg1Typeid = get_fn_expr_argtype(fcinfo->flinfo, 1); + if (arg1Typeid == InvalidOid) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not determine input data type"))); + } + + /* Get the MemoryContext to keep the working state */ + MemoryContext aggCtx; + if (!AggCheckCallContext(fcinfo, &aggCtx)) { + /* cannot be called directly because of internal-type argument */ + ereport(ERROR, + (errcode(ERRCODE_SQL_ROUTINE_EXCEPTION), errmsg("median_transfn called in non-aggregate context"))); + } + + MedianBuildState* mstate = NULL; + if (PG_ARGISNULL(0)) { + /* Create the transition state workspace */ + mstate = CreateMedianBuildState(arg1Typeid, aggCtx); + } else { + mstate = (MedianBuildState*)PG_GETARG_POINTER(0); + } + + /* skip the null values */ + if (PG_ARGISNULL(1)) { + PG_RETURN_POINTER(mstate); + } + + /* the datatype must be matched */ + Assert(mstate->dtype == arg1Typeid); + + Datum elem = PG_GETARG_DATUM(1); + MedianPutDatum(mstate, elem); + + PG_RETURN_POINTER(mstate); +} + +extern int float8_cmp_internal(float8 a, float8 b); + +/* The comparison function for sorting an array of FLOAT8 datums */ +static int datum_float8_cmp(const void* arg1, const void* arg2) +{ + return float8_cmp_internal(*(const float8*)arg1, *(const float8*)arg2); +} + +static const double HALF_FACTOR = 0.5; + +/* + * the final function for median(float8) + */ +Datum median_float8_finalfn(PG_FUNCTION_ARGS) +{ + Datum result; + + if (PG_ARGISNULL(0)) { + PG_RETURN_NULL(); /* returns null if no input values */ + } + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + MedianBuildState* mstate = (MedianBuildState*)PG_GETARG_POINTER(0); + /* If no element had been found, the result is NULL */ + if (mstate->nelems == 0) { + PG_RETURN_NULL(); + } + + Assert(mstate->dtype == FLOAT8OID); + Assert(mstate->typlen == sizeof(float8)); + + /* sort all elements and find the median */ + qsort(mstate->dvalues, mstate->nelems, sizeof(Datum), datum_float8_cmp); + + uint32 i = mstate->nelems / 2; + if (mstate->nelems % 2 == 1) { + result = mstate->dvalues[i]; + } else { + double low = DatumGetFloat8(mstate->dvalues[i - 1]); + double high = DatumGetFloat8(mstate->dvalues[i]); + result = Float8GetDatum(low + (high - low) * HALF_FACTOR); + } + + PG_RETURN_DATUM(result); +} + + +/* The comparison function for sorting an array of INTERVAL datums. Since the + * INTERVAL type is pass-by-ref, thus the arguments passing to this function + * actually are Interval** */ +static int datum_interval_cmp(const void* arg1, const void* arg2) +{ + Interval** itvl1 = (Interval**)arg1; + Interval** itvl2 = (Interval**)arg2; + return interval_cmp_internal(*itvl1, *itvl2); +} + +/* + * the final function for median(interval) + */ +Datum median_interval_finalfn(PG_FUNCTION_ARGS) +{ + /* returns null if no input values */ + if (PG_ARGISNULL(0)) { + PG_RETURN_NULL(); + } + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL) != 0); + + MedianBuildState* mstate = (MedianBuildState*)PG_GETARG_POINTER(0); + /* If no element had been found, the result is NULL */ + if (mstate->nelems == 0) { + PG_RETURN_NULL(); + } + + Assert(mstate->dtype == INTERVALOID); + Assert(mstate->typlen == sizeof(Interval)); + Assert(!mstate->typbyval); /* INTERVAL is passed by reference */ + + /* sort all elements and find the median */ + qsort(mstate->dvalues, mstate->nelems, sizeof(Datum), datum_interval_cmp); + + Datum result; + uint32 i = mstate->nelems / 2; + if (mstate->nelems % 2 == 1) { + result = mstate->dvalues[i]; + } else { + Datum low = mstate->dvalues[i - 1]; + Datum high = mstate->dvalues[i]; + + /* compute the result by LOW + (HIGH - LOW) * 0.5 */ + Datum diff = DirectFunctionCall2(interval_mi, high, low); + Datum halfdiff = DirectFunctionCall2(interval_mul, diff, Float8GetDatumFast(HALF_FACTOR)); + + result = DirectFunctionCall2(interval_pl, halfdiff, low); + } + + PG_RETURN_DATUM(result); +} diff --git a/src/common/backend/utils/adt/regproc.cpp b/src/common/backend/utils/adt/regproc.cpp index 90df21817cfe3e0a9c756399f0a3fbbbcd8d21d7..cda277c5ae8fbdde66345246a872abcd56db3b65 100755 --- a/src/common/backend/utils/adt/regproc.cpp +++ b/src/common/backend/utils/adt/regproc.cpp @@ -75,7 +75,7 @@ static Datum regprocin_booststrap(char* procname) } if (matches == 0) { - ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist xxxx", procname))); + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", procname))); } else if (matches > 1) { ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index 5b95eee0418536bee857c3b90312613db574ff22..74febbd9988942c724dc5b5594576f114d91a400 100755 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -438,10 +438,12 @@ DATA(insert ( 4508 interval_list_agg_noarg2_transfn - list_agg_finalfn 0 228 #endif /* ordered-set aggregates XXX shall we add collect funcs? */ -#ifdef PGXC DATA(insert ( 4452 ordered_set_transition - percentile_cont_float8_final 0 2281 _null_ _null_ o 1)); DATA(insert ( 4454 ordered_set_transition - percentile_cont_interval_final 0 2281 _null_ _null_ o 1)); -#endif + +DATA(insert (5555 median_transfn - median_float8_finalfn 0 2281 _null_ _null_ n 0)); +DATA(insert (5556 median_transfn - median_interval_finalfn 0 2281 _null_ _null_ n 0)); + /* * prototypes for functions in pg_aggregate.c diff --git a/src/include/utils/median_aggs.h b/src/include/utils/median_aggs.h new file mode 100644 index 0000000000000000000000000000000000000000..95ce285fa58f1fcaf7f38065ea32b37a45881d58 --- /dev/null +++ b/src/include/utils/median_aggs.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * median_aggs.h + * Aggregate for computing the statistical median + * + * IDENTIFICATION + * src/include/utils/median_aggs.h + * + * ------------------------------------------------------------------------- + */ + +#ifndef MEDIAN_AGGS_H +#define MEDIAN_AGGS_H + +#include "fmgr.h" + +extern Datum median_transfn(PG_FUNCTION_ARGS); +extern Datum median_float8_finalfn(PG_FUNCTION_ARGS); +extern Datum median_interval_finalfn(PG_FUNCTION_ARGS); + +#endif /* MEDIAN_AGGS_H */ diff --git a/src/test/regress/expected/median.out b/src/test/regress/expected/median.out new file mode 100644 index 0000000000000000000000000000000000000000..fca8081df19ca861834527779a282c6a94f2f790 --- /dev/null +++ b/src/test/regress/expected/median.out @@ -0,0 +1,216 @@ +-- +-- The test cases for numerical types +CREATE TABLE MEDTEST(ID INT, NUMVAL NUMERIC, IVAL INT, BVAL BIGINT); +INSERT INTO MEDTEST VALUES(1, 100, 12, 99999), (1, 200, 13, NULL); +INSERT INTO MEDTEST VALUES(2, 100, 7777, NULL), (2, 300, 3333, 11111111), (2, 330, 5555, 11111110); +INSERT INTO MEDTEST VALUES(3, 110, 9, 909), (3, 111, 11, 808), (3, 111, 33, 505), (3, 112, 66, 606), (3, 102, 22, 202); +INSERT INTO MEDTEST VALUES(4, 91, 3, NULL), (4, 90, 3, 888); +INSERT INTO MEDTEST VALUES(5, 'NAN', NULL, 3), (5, 30, NULL, 3); +INSERT INTO MEDTEST VALUES(6, 'NAN', 100, 100), (6, 'NAN', 100, 200); +INSERT INTO MEDTEST VALUES(7, 'NAN', 123, 6666), (7, 'NAN', NULL, NULL), (7, 'NAN', NULL, 3333); +INSERT INTO MEDTEST VALUES(8, 'NAN', 99, 9999), (8, 888, 32, NULL), (8, 889, 123, NULL); + +SELECT ID, MEDIAN(NUMVAL), MEDIAN(IVAL), MEDIAN(BVAL) FROM MEDTEST GROUP BY ID ORDER BY 1; + id | median | median | median +----+--------+--------+------------ + 1 | 150 | 12.5 | 99999 + 2 | 300 | 5555 | 11111110.5 + 3 | 111 | 22 | 606 + 4 | 90.5 | 3 | 888 + 5 | NaN | | 3 + 6 | NaN | 100 | 150 + 7 | NaN | 123 | 4999.5 + 8 | 889 | 99 | 9999 +(8 rows) + + +-- the median function is equivalent to percentile_cont(0.5) within group (order by column) +SELECT ID, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY NUMVAL) = MEDIAN(NUMVAL) AS MUST_TRUE1, +PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY IVAL) = MEDIAN(IVAL) AS MUST_TRUE2, +PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BVAL) = MEDIAN(BVAL) AS MUST_TRUE3 +FROM MEDTEST GROUP BY ID ORDER BY 1; + id | must_true1 | must_true2 | must_true3 +----+------------+------------+------------ + 1 | t | t | t + 2 | t | t | t + 3 | t | t | t + 4 | t | t | t + 5 | t | | t + 6 | t | t | t + 7 | t | t | t + 8 | t | t | t +(8 rows) + + +-- the test case for median with window function +SELECT ID, NUMVAL, MEDIAN(NUMVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, NUMVAL ORDER BY 1, 2; + id | numval | median +----+--------+-------- + 1 | 100 | 150 + 1 | 200 | 150 + 2 | 100 | 300 + 2 | 300 | 300 + 2 | 330 | 300 + 3 | 102 | 110.5 + 3 | 110 | 110.5 + 3 | 111 | 110.5 + 3 | 112 | 110.5 + 4 | 90 | 90.5 + 4 | 91 | 90.5 + 5 | 30 | NaN + 5 | NaN | NaN + 6 | NaN | NaN + 7 | NaN | NaN + 8 | 888 | 889 + 8 | 889 | 889 + 8 | NaN | 889 +(18 rows) + +SELECT ID, IVAL, MEDIAN(IVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, IVAL ORDER BY 1, 2; + id | ival | median +----+------+-------- + 1 | 12 | 12.5 + 1 | 13 | 12.5 + 2 | 3333 | 5555 + 2 | 5555 | 5555 + 2 | 7777 | 5555 + 3 | 9 | 22 + 3 | 11 | 22 + 3 | 22 | 22 + 3 | 33 | 22 + 3 | 66 | 22 + 4 | 3 | 3 + 5 | | + 6 | 100 | 100 + 7 | 123 | 123 + 7 | | 123 + 8 | 32 | 99 + 8 | 99 | 99 + 8 | 123 | 99 +(18 rows) + +SELECT ID, BVAL, MEDIAN(BVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, BVAL ORDER BY 1, 2; + id | bval | median +----+----------+------------ + 1 | 99999 | 99999 + 1 | | 99999 + 2 | 11111110 | 11111110.5 + 2 | 11111111 | 11111110.5 + 2 | | 11111110.5 + 3 | 202 | 606 + 3 | 505 | 606 + 3 | 606 | 606 + 3 | 808 | 606 + 3 | 909 | 606 + 4 | 888 | 888 + 4 | | 888 + 5 | 3 | 3 + 6 | 100 | 150 + 6 | 200 | 150 + 7 | 3333 | 4999.5 + 7 | 6666 | 4999.5 + 7 | | 4999.5 + 8 | 9999 | 9999 + 8 | | 9999 +(20 rows) + + +-- the test cases for median(extension) +SELECT ID, MEDIAN(NUMVAL + IVAL) FROM MEDTEST GROUP BY ID ORDER BY 1; + id | median +----+-------- + 1 | 162.5 + 2 | 5885 + 3 | 124 + 4 | 93.5 + 5 | + 6 | NaN + 7 | NaN + 8 | 1012 +(8 rows) + +SELECT ID, BVAL + IVAL, MEDIAN(BVAL + IVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, BVAL + IVAL ORDER BY 1, 2; + id | ?column? | median +----+----------+------------ + 1 | 100011 | 100011 + 1 | | 100011 + 2 | 11114444 | 11115554.5 + 2 | 11116665 | 11115554.5 + 2 | | 11115554.5 + 3 | 224 | 672 + 3 | 538 | 672 + 3 | 672 | 672 + 3 | 819 | 672 + 3 | 918 | 672 + 4 | 891 | 891 + 4 | | 891 + 5 | | + 6 | 200 | 250 + 6 | 300 | 250 + 7 | 6789 | 6789 + 7 | | 6789 + 8 | 10098 | 10098 + 8 | | 10098 +(19 rows) + + +-- +-- The test cases for INTERVAL type +CREATE TABLE MEDTEST2(ID INT, ITVL INTERVAL, TS TIMESTAMP); +INSERT INTO MEDTEST2 VALUES(1, INTERVAL '1 YEAR 2 MONTHS 3 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(1, INTERVAL '1 YEAR 2 MONTHS 4 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(2, INTERVAL '11 YEAR 2 MONTHS 4 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 1 MONTHS 0 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 2 MONTHS 0 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 2 MONTHS 1 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(4, NULL, NULL); +INSERT INTO MEDTEST2 VALUES(5, NULL, NULL); +INSERT INTO MEDTEST2 VALUES(5, INTERVAL '7 YEAR 2 MONTHS 1 DAYS', NULL); + +SELECT ID, MEDIAN(ITVL) FROM MEDTEST2 GROUP BY ID; + id | median +----+--------------------------------- + 1 | @ 1 year 2 mons 3 days 12 hours + 4 | + 3 | @ 7 years 2 mons + 5 | @ 7 years 2 mons 1 day + 2 | @ 11 years 2 mons 4 days +(5 rows) + + +-- unsupported datatype +SELECT ID, MEDIAN(TS) FROM MEDTEST2 GROUP BY ID; +ERROR: function median(timestamp without time zone) does not exist +LINE 1: SELECT ID, MEDIAN(TS) FROM MEDTEST2 GROUP BY ID; + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +CONTEXT: referenced column: median + +-- median & percentile_cont(0.5) +SELECT ID, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ITVL) = MEDIAN(ITVL) AS MUST_TRUE +FROM MEDTEST2 GROUP BY ID ORDER BY 1; + id | must_true +----+----------- + 1 | t + 2 | t + 3 | t + 4 | + 5 | t +(5 rows) + + +-- the window function +SELECT ID, ITVL, MEDIAN(ITVL) OVER(PARTITION BY ID) FROM MEDTEST2 GROUP BY ID, ITVL ORDER BY 1, 2; + id | itvl | median +----+--------------------------+--------------------------------- + 1 | @ 1 year 2 mons 3 days | @ 1 year 2 mons 3 days 12 hours + 1 | @ 1 year 2 mons 4 days | @ 1 year 2 mons 3 days 12 hours + 2 | @ 11 years 2 mons 4 days | @ 11 years 2 mons 4 days + 3 | @ 7 years 1 mon | @ 7 years 2 mons + 3 | @ 7 years 2 mons | @ 7 years 2 mons + 3 | @ 7 years 2 mons 1 day | @ 7 years 2 mons + 4 | | + 5 | @ 7 years 2 mons 1 day | @ 7 years 2 mons 1 day + 5 | | @ 7 years 2 mons 1 day +(9 rows) + diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9b7de808387c565053c1fd0c957018e699653c7d..69d8470b331b435bdb0822a43a29e8cb0673ac02 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -2568,6 +2568,11 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 5544 | int1send 5548 | int1_avg_accum 5549 | pg_stat_get_activity_ng + 5555 | median + 5556 | median + 5557 | median_float8_finalfn + 5558 | median_interval_finalfn + 5559 | median_transfn 5560 | to_date 5586 | smalldatetime_cmp 5587 | smalldatetime_hash @@ -2627,7 +2632,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 9016 | pg_advisory_lock 9017 | pgxc_unlock_for_sp_database 9999 | pg_test_err_contain_err -(2265 rows) +(2270 rows) -- **************** pg_cast **************** -- Catch bogus values in pg_cast columns (other than cases detected by diff --git a/src/test/regress/expected/single_node_opr_sanity.out b/src/test/regress/expected/single_node_opr_sanity.out index d930bdd54443e90b3be394ed9c4dd37b1c32a8c5..11afef7d43f3dd11f7a8e951d8f17dab13c0ab9c 100755 --- a/src/test/regress/expected/single_node_opr_sanity.out +++ b/src/test/regress/expected/single_node_opr_sanity.out @@ -2607,6 +2607,11 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 5544 | int1send 5548 | int1_avg_accum 5549 | pg_stat_get_activity_ng + 5555 | median + 5556 | median + 5557 | median_float8_finalfn + 5558 | median_interval_finalfn + 5559 | median_transfn 5560 | to_date 5586 | smalldatetime_cmp 5587 | smalldatetime_hash @@ -2666,7 +2671,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1; 9016 | pg_advisory_lock 9017 | pgxc_unlock_for_sp_database 9999 | pg_test_err_contain_err -(2265 rows) +(2270 rows) -- **************** pg_cast **************** -- Catch bogus values in pg_cast columns (other than cases detected by diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index 639f4f5c1713f30c27d6928c1aa3b5a866e190f0..d1b7b31673c64f3644f0e67ee774908484f9b29c 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -8,7 +8,7 @@ test: analyze_commands #test: single_node_job test: single_node_ddl -test: single_node_sqlbypass +test: single_node_sqlbypass median # run tablespace by itself, and first, because it forces a checkpoint; # we'd prefer not to have checkpoints later in the tests because that diff --git a/src/test/regress/sql/median.sql b/src/test/regress/sql/median.sql new file mode 100644 index 0000000000000000000000000000000000000000..a656892593449a209c0e93ada136c8aea624adc4 --- /dev/null +++ b/src/test/regress/sql/median.sql @@ -0,0 +1,53 @@ +-- +-- The test cases for numerical types +CREATE TABLE MEDTEST(ID INT, NUMVAL NUMERIC, IVAL INT, BVAL BIGINT); +INSERT INTO MEDTEST VALUES(1, 100, 12, 99999), (1, 200, 13, NULL); +INSERT INTO MEDTEST VALUES(2, 100, 7777, NULL), (2, 300, 3333, 11111111), (2, 330, 5555, 11111110); +INSERT INTO MEDTEST VALUES(3, 110, 9, 909), (3, 111, 11, 808), (3, 111, 33, 505), (3, 112, 66, 606), (3, 102, 22, 202); +INSERT INTO MEDTEST VALUES(4, 91, 3, NULL), (4, 90, 3, 888); +INSERT INTO MEDTEST VALUES(5, 'NAN', NULL, 3), (5, 30, NULL, 3); +INSERT INTO MEDTEST VALUES(6, 'NAN', 100, 100), (6, 'NAN', 100, 200); +INSERT INTO MEDTEST VALUES(7, 'NAN', 123, 6666), (7, 'NAN', NULL, NULL), (7, 'NAN', NULL, 3333); +INSERT INTO MEDTEST VALUES(8, 'NAN', 99, 9999), (8, 888, 32, NULL), (8, 889, 123, NULL); + +SELECT ID, MEDIAN(NUMVAL), MEDIAN(IVAL), MEDIAN(BVAL) FROM MEDTEST GROUP BY ID ORDER BY 1; + +-- the median function is equivalent to percentile_cont(0.5) within group (order by column) +SELECT ID, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY NUMVAL) = MEDIAN(NUMVAL) AS MUST_TRUE1, +PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY IVAL) = MEDIAN(IVAL) AS MUST_TRUE2, +PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BVAL) = MEDIAN(BVAL) AS MUST_TRUE3 +FROM MEDTEST GROUP BY ID ORDER BY 1; + +-- the test case for median with window function +SELECT ID, NUMVAL, MEDIAN(NUMVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, NUMVAL ORDER BY 1, 2; +SELECT ID, IVAL, MEDIAN(IVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, IVAL ORDER BY 1, 2; +SELECT ID, BVAL, MEDIAN(BVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, BVAL ORDER BY 1, 2; + +-- the test cases for median(extension) +SELECT ID, MEDIAN(NUMVAL + IVAL) FROM MEDTEST GROUP BY ID ORDER BY 1; +SELECT ID, BVAL + IVAL, MEDIAN(BVAL + IVAL) OVER(PARTITION BY ID) FROM MEDTEST GROUP BY ID, BVAL + IVAL ORDER BY 1, 2; + +-- +-- The test cases for INTERVAL type +CREATE TABLE MEDTEST2(ID INT, ITVL INTERVAL, TS TIMESTAMP); +INSERT INTO MEDTEST2 VALUES(1, INTERVAL '1 YEAR 2 MONTHS 3 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(1, INTERVAL '1 YEAR 2 MONTHS 4 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(2, INTERVAL '11 YEAR 2 MONTHS 4 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 1 MONTHS 0 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 2 MONTHS 0 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(3, INTERVAL '7 YEAR 2 MONTHS 1 DAYS', NOW()); +INSERT INTO MEDTEST2 VALUES(4, NULL, NULL); +INSERT INTO MEDTEST2 VALUES(5, NULL, NULL); +INSERT INTO MEDTEST2 VALUES(5, INTERVAL '7 YEAR 2 MONTHS 1 DAYS', NULL); + +SELECT ID, MEDIAN(ITVL) FROM MEDTEST2 GROUP BY ID; + +-- unsupported datatype +SELECT ID, MEDIAN(TS) FROM MEDTEST2 GROUP BY ID; + +-- median & percentile_cont(0.5) +SELECT ID, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ITVL) = MEDIAN(ITVL) AS MUST_TRUE +FROM MEDTEST2 GROUP BY ID ORDER BY 1; + +-- the window function +SELECT ID, ITVL, MEDIAN(ITVL) OVER(PARTITION BY ID) FROM MEDTEST2 GROUP BY ID, ITVL ORDER BY 1, 2;