From 7415618f07e352022ac1170df40b9efc89449b72 Mon Sep 17 00:00:00 2001 From: h30054849 Date: Tue, 5 Nov 2024 14:23:49 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81DataVec=E5=90=91=E9=87=8F?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/catalog/builtin_funcs.ini | 353 ++++- src/common/backend/catalog/index.cpp | 2 +- src/common/backend/utils/adt/Makefile | 2 +- src/common/backend/utils/adt/bitvec.cpp | 76 + src/common/backend/utils/adt/f2s.cpp | 732 +++++++++ src/common/backend/utils/adt/halfutils.cpp | 275 ++++ src/common/backend/utils/adt/halfvec.cpp | 1123 +++++++++++++ src/common/backend/utils/adt/sparsevec.cpp | 1053 +++++++++++++ src/common/backend/utils/adt/vector.cpp | 1379 ++++++++++++++++ src/common/backend/utils/misc/guc/guc_sql.cpp | 28 + src/gausskernel/CMakeLists.txt | 1 + .../process/threadpool/knl_session.cpp | 6 + src/gausskernel/storage/access/CMakeLists.txt | 4 +- src/gausskernel/storage/access/Makefile | 2 +- .../storage/access/datavec/CMakeLists.txt | 16 + .../storage/access/datavec/Makefile | 16 + .../storage/access/datavec/bitutils.cpp | 215 +++ .../storage/access/datavec/hnsw.cpp | 337 ++++ .../storage/access/datavec/hnswbuild.cpp | 1051 +++++++++++++ .../storage/access/datavec/hnswdelete.cpp | 201 +++ .../storage/access/datavec/hnswinsert.cpp | 652 ++++++++ .../storage/access/datavec/hnswscan.cpp | 211 +++ .../storage/access/datavec/hnswutils.cpp | 1393 +++++++++++++++++ .../storage/access/datavec/hnswvacuum.cpp | 606 +++++++ .../storage/access/datavec/ivfbuild.cpp | 881 +++++++++++ .../storage/access/datavec/ivfflat.cpp | 340 ++++ .../storage/access/datavec/ivfinsert.cpp | 198 +++ .../storage/access/datavec/ivfkmeans.cpp | 558 +++++++ .../storage/access/datavec/ivfscan.cpp | 347 ++++ .../storage/access/datavec/ivfutils.cpp | 332 ++++ .../storage/access/datavec/ivfvacuum.cpp | 145 ++ .../storage/access/datavec/vecindex.cpp | 292 ++++ src/include/access/datavec/bitvec.h | 18 + src/include/access/datavec/halfutils.h | 229 +++ src/include/access/datavec/halfvec.h | 111 ++ src/include/access/datavec/hnsw.h | 630 ++++++++ src/include/access/datavec/ivfflat.h | 318 ++++ src/include/access/datavec/pg_prng.h | 58 + src/include/access/datavec/ryu_common.h | 146 ++ src/include/access/datavec/sampling.h | 34 + src/include/access/datavec/shortest_dec.h | 9 + src/include/access/datavec/sparsevec.h | 64 + src/include/access/datavec/vecindex.h | 23 + src/include/access/datavec/vector.h | 71 + src/include/catalog/pg_aggregate.h | 4 +- src/include/catalog/pg_am.h | 8 +- src/include/catalog/pg_amop.data | 24 + src/include/catalog/pg_amproc.h | 42 + src/include/catalog/pg_cast.h | 12 + src/include/catalog/pg_opclass.h | 23 + src/include/catalog/pg_operator.data | 57 + src/include/catalog/pg_opfamily.h | 22 + src/include/catalog/pg_type.h | 13 + src/include/knl/knl_session.h | 7 + src/test/regress/expected/opr_sanity_2.out | 16 +- .../single_node_test_null_operator.out | 8 +- src/test/regress/pg_regress.cpp | 2 +- src/test/regress/sql/opr_sanity_2.sql | 4 +- 58 files changed, 14762 insertions(+), 18 deletions(-) create mode 100644 src/common/backend/utils/adt/bitvec.cpp create mode 100644 src/common/backend/utils/adt/f2s.cpp create mode 100644 src/common/backend/utils/adt/halfutils.cpp create mode 100644 src/common/backend/utils/adt/halfvec.cpp create mode 100644 src/common/backend/utils/adt/sparsevec.cpp create mode 100644 src/common/backend/utils/adt/vector.cpp create mode 100644 src/gausskernel/storage/access/datavec/CMakeLists.txt create mode 100644 src/gausskernel/storage/access/datavec/Makefile create mode 100644 src/gausskernel/storage/access/datavec/bitutils.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnsw.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswbuild.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswdelete.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswinsert.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswscan.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswutils.cpp create mode 100644 src/gausskernel/storage/access/datavec/hnswvacuum.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfbuild.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfflat.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfinsert.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfkmeans.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfscan.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfutils.cpp create mode 100644 src/gausskernel/storage/access/datavec/ivfvacuum.cpp create mode 100644 src/gausskernel/storage/access/datavec/vecindex.cpp create mode 100644 src/include/access/datavec/bitvec.h create mode 100644 src/include/access/datavec/halfutils.h create mode 100644 src/include/access/datavec/halfvec.h create mode 100644 src/include/access/datavec/hnsw.h create mode 100644 src/include/access/datavec/ivfflat.h create mode 100644 src/include/access/datavec/pg_prng.h create mode 100644 src/include/access/datavec/ryu_common.h create mode 100644 src/include/access/datavec/sampling.h create mode 100644 src/include/access/datavec/shortest_dec.h create mode 100644 src/include/access/datavec/sparsevec.h create mode 100644 src/include/access/datavec/vecindex.h create mode 100644 src/include/access/datavec/vector.h diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini index d45c97453f..961cf6b70e 100644 --- a/src/common/backend/catalog/builtin_funcs.ini +++ b/src/common/backend/catalog/builtin_funcs.ini @@ -494,7 +494,7 @@ AddBuiltinFunc(_0(1504), _1("attach"), _2(2), _3(true), _4(true), _5(debug_client_attatch), _6(2249), _7(PG_PLDEBUG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(1), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 25, 23), _21(6, 25, 23, 26, 25, 23, 25), _22(6, 'i', 'i', 'o', 'o', 'o', 'o'), _23(6, "nodename", "port", "funcoid", "funcname", "lineno", "query"), _24(NULL), _25("debug_client_attatch"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), ), AddFuncGroup( - "avg", 8, + "avg", 9, AddBuiltinFunc(_0(2100), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 20), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2101), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2102), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 21), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), @@ -502,7 +502,8 @@ AddBuiltinFunc(_0(2104), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2105), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 701), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2106), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1186), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), - AddBuiltinFunc(_0(5537), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 5545), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + AddBuiltinFunc(_0(5537), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 5545), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8241), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), AddFuncGroup( "backtrace", 1, @@ -11238,7 +11239,7 @@ AddFuncGroup( AddBuiltinFunc(_0(9463), _1("subtype_recv"), _2(3), _3(false), _4(false), _5(subtype_recv), _6(2276), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2281, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("subtype_recv"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0), _41(NULL)) ), AddFuncGroup( - "sum", 8, + "sum", 9, AddBuiltinFunc(_0(2107), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 20), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2108), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(20), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2109), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(20), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 21), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), @@ -11246,7 +11247,8 @@ AddFuncGroup( AddBuiltinFunc(_0(2111), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 701), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2112), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(790), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 790), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2113), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1186), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), - AddBuiltinFunc(_0(2114), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + AddBuiltinFunc(_0(2114), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8242), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), AddFuncGroup( "suppress_redundant_updates_trigger", 1, @@ -13124,4 +13126,347 @@ AddFuncGroup( AddFuncGroup( "query_imcstore_views", 1, AddBuiltinFunc(_0(6808), _1("query_imcstore_views"), _2(1), _3(true), _4(true), _5(query_imcstore_views), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(1000), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(0), _21(11, 26, 19, 22, 21, 19, 16, 26, 20, 20, 20, 20), _22(11, 'o', 'o', 'o', 'o','o', 'o', 'o', 'o', 'o','o', 'o'), _23(11, "reloid", "relname", "imcs_attrs", "imcs_nattrs", "imcs_status", "is_partition", "parent_oid", "cu_size_in_mem", "cu_num_in_mem", "cu_size_in_disk", "cu_num_in_disk"), _24(NULL), _25("query_imcstore_views"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("query_imcstore_views"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswinsert", 1, + AddBuiltinFunc(_0(8401), _1("hnswinsert"), _2(6), _3(true), _4(false), _5(hnswinsert), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(6, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswinsert"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbeginscan", 1, + AddBuiltinFunc(_0(8402), _1("hnswbeginscan"), _2(3), _3(true), _4(false), _5(hnswbeginscan), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbeginscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswgettuple", 1, + AddBuiltinFunc(_0(8403), _1("hnswgettuple"), _2(2), _3(true), _4(false), _5(hnswgettuple), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswgettuple"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswrescan", 1, + AddBuiltinFunc(_0(8404), _1("hnswrescan"), _2(5), _3(true), _4(false), _5(hnswrescan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(5, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswrescan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswendscan", 1, + AddBuiltinFunc(_0(8405), _1("hnswendscan"), _2(1), _3(true), _4(false), _5(hnswendscan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswendscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbuild", 1, + AddBuiltinFunc(_0(8406), _1("hnswbuild"), _2(3), _3(true), _4(false), _5(hnswbuild), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbuild"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbuildempty", 1, + AddBuiltinFunc(_0(8407), _1("hnswbuildempty"), _2(1), _3(true), _4(false), _5(hnswbuildempty), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbuildempty"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbulkdelete", 1, + AddBuiltinFunc(_0(8408), _1("hnswbulkdelete"), _2(4), _3(true), _4(false), _5(hnswbulkdelete), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(4, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbulkdelete"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswvacuumcleanup", 1, + AddBuiltinFunc(_0(8409), _1("hnswvacuumcleanup"), _2(2), _3(true), _4(false), _5(hnswvacuumcleanup), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswvacuumcleanup"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswcostestimate", 1, + AddBuiltinFunc(_0(8410), _1("hnswcostestimate"), _2(7), _3(true), _4(false), _5(hnswcostestimate), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(7, 2281, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswcostestimate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswoptions", 1, + AddBuiltinFunc(_0(8411), _1("hnswoptions"), _2(2), _3(true), _4(false), _5(hnswoptions), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 1009, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswoptions"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatinsert", 1, + AddBuiltinFunc(_0(8412), _1("ivfflatinsert"), _2(6), _3(true), _4(false), _5(ivfflatinsert), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(6, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatinsert"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbeginscan", 1, + AddBuiltinFunc(_0(8413), _1("ivfflatbeginscan"), _2(3), _3(true), _4(false), _5(ivfflatbeginscan), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbeginscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatgettuple", 1, + AddBuiltinFunc(_0(8414), _1("ivfflatgettuple"), _2(2), _3(true), _4(false), _5(ivfflatgettuple), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatgettuple"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatrescan", 1, + AddBuiltinFunc(_0(8415), _1("ivfflatrescan"), _2(5), _3(true), _4(false), _5(ivfflatrescan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(5, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatrescan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatendscan", 1, + AddBuiltinFunc(_0(8416), _1("ivfflatendscan"), _2(1), _3(true), _4(false), _5(ivfflatendscan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatendscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbuild", 1, + AddBuiltinFunc(_0(8417), _1("ivfflatbuild"), _2(3), _3(true), _4(false), _5(ivfflatbuild), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbuild"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbuildempty", 1, + AddBuiltinFunc(_0(8418), _1("ivfflatbuildempty"), _2(1), _3(true), _4(false), _5(ivfflatbuildempty), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbuildempty"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbulkdelete", 1, + AddBuiltinFunc( _0(8419), _1("ivfflatbulkdelete"), _2(4), _3(true), _4(false), _5(ivfflatbulkdelete), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(4, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbulkdelete"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatvacuumcleanup", 1, + AddBuiltinFunc(_0(8420), _1("ivfflatvacuumcleanup"), _2(2), _3(true), _4(false), _5(ivfflatvacuumcleanup), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatvacuumcleanup"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatcostestimate", 1, + AddBuiltinFunc(_0(8421), _1("ivfflatcostestimate"), _2(7), _3(true), _4(false), _5(ivfflatcostestimate), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(7, 2281, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatcostestimate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatoptions", 1, + AddBuiltinFunc(_0(8422), _1("ivfflatoptions"), _2(2), _3(true), _4(false), _5(ivfflatoptions), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 1009, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatoptions"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_in", 1, + AddBuiltinFunc(_0(8423), _1("vector_in"), _2(3), _3(true), _4(false), _5(vector_in), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2275, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_out", 1, + AddBuiltinFunc(_0(8424), _1("vector_out"), _2(1), _3(true), _4(false), _5(vector_out), _6(2275), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_out"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_typmod_in", 1, + AddBuiltinFunc(_0(8425), _1("vector_typmod_in"), _2(1), _3(true), _4(false), _5(vector_typmod_in), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1263), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_typmod_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false),_33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_recv", 1, + AddBuiltinFunc(_0(8426), _1("vector_recv"), _2(3), _3(true), _4(false), _5(vector_recv), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2281, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_recv"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_send", 1, + AddBuiltinFunc(_0(8427), _1("vector_send"), _2(1), _3(true), _4(false), _5(vector_send), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_send"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_dims", 1, + AddBuiltinFunc(_0(8428), _1("vector_dims"), _2(1), _3(true), _4(false), _5(vector_dims), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_dims"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + + AddFuncGroup( + "vector_l2_squared_distance", 1, + AddBuiltinFunc(_0(8431), _1("vector_l2_squared_distance"), _2(1), _3(true), _4(false), _5(vector_l2_squared_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_l2_squared_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_spherical_distance", 1, + AddBuiltinFunc(_0(8432), _1("vector_spherical_distance"), _2(1), _3(true), _4(false), _5(vector_spherical_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL),_25("vector_spherical_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_distance", 2, + AddBuiltinFunc(_0(8433), _1("l2_distance"), _2(1), _3(true), _4(false), _5(l2_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l2_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8465), _1("l2_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l2_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_negative_inner_product", 1, + AddBuiltinFunc(_0(8434), _1("vector_negative_inner_product"), _2(1), _3(true), _4(false), _5(vector_negative_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_negative_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "cosine_distance", 2, + AddBuiltinFunc(_0(8435), _1("cosine_distance"), _2(1), _3(true), _4(false), _5(cosine_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("cosine_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8466), _1("cosine_distance"), _2(1), _3(true), _4(false), _5(sparsevec_cosine_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_cosine_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l1_distance", 2, + AddBuiltinFunc(_0(8436), _1("l1_distance"), _2(1), _3(true), _4(false), _5(l1_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l1_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8467), _1("l1_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l1_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l1_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "inner_product", 2, + AddBuiltinFunc(_0(8437), _1("inner_product"), _2(1), _3(true), _4(false), _5(inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8471), _1("inner_product"), _2(1), _3(true), _4(false), _5(sparsevec_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_norm", 1, + AddBuiltinFunc(_0(8438), _1("vector_norm"), _2(1), _3(true), _4(false), _5(vector_norm), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_norm"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_add", 1, + AddBuiltinFunc(_0(8439), _1("vector_add"), _2(1), _3(true), _4(false), _5(vector_add), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_add"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_sub", 1, + AddBuiltinFunc(_0(8440), _1("vector_sub"), _2(1), _3(true), _4(false), _5(vector_sub), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_sub"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_lt", 1, + AddBuiltinFunc(_0(8441), _1("vector_lt"), _2(1), _3(true), _4(false), _5(vector_lt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_lt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_le", 1, + AddBuiltinFunc(_0(8442), _1("vector_le"), _2(1), _3(true), _4(false), _5(vector_le), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_le"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_eq", 1, + AddBuiltinFunc(_0(8443), _1("vector_eq"), _2(1), _3(true), _4(false), _5(vector_eq), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_eq"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_ne", 1, + AddBuiltinFunc(_0(8444), _1("vector_ne"), _2(1), _3(true), _4(false), _5(vector_ne), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_ne"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_ge", 1, + AddBuiltinFunc(_0(8445), _1("vector_ge"), _2(1), _3(true), _4(false), _5(vector_ge), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_ge"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_gt", 1, + AddBuiltinFunc(_0(8446), _1("vector_gt"), _2(1), _3(true), _4(false), _5(vector_gt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_gt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_accum", 1, + AddBuiltinFunc(_0(8447), _1("vector_accum"), _2(1), _3(true), _4(false), _5(vector_accum), _6(2277), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 2277, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_accum"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_combine", 1, + AddBuiltinFunc(_0(8448), _1("vector_combine"), _2(1), _3(true), _4(false), _5(vector_combine), _6(2277), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 2277, 2277), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_combine"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_avg", 1, + AddBuiltinFunc(_0(8449), _1("vector_avg"), _2(1), _3(true), _4(false), _5(vector_avg), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 2277), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_avg"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_cmp", 1, + AddBuiltinFunc(_0(8450), _1("vector_cmp"), _2(1), _3(true), _4(false), _5(vector_cmp), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_cmp"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_in", 1, + AddBuiltinFunc(_0(8458), _1("sparsevec_in"), _2(3), _3(true), _4(false), _5(sparsevec_in), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2275, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_out", 1, + AddBuiltinFunc(_0(8459), _1("sparsevec_out"), _2(1), _3(true), _4(false), _5(sparsevec_out), _6(2275), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_out"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_typmod_in", 1, + AddBuiltinFunc(_0(8460), _1("sparsevec_typmod_in"), _2(1), _3(true), _4(false), _5(sparsevec_typmod_in), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1263), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_typmod_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false),_33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_recv", 1, + AddBuiltinFunc(_0(8461), _1("sparsevec_recv"), _2(3), _3(true), _4(false), _5(sparsevec_recv), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2281, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_recv"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_send", 1, + AddBuiltinFunc(_0(8462), _1("sparsevec_send"), _2(1), _3(true), _4(false), _5(sparsevec_send), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_send"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_negative_inner_product", 1, + AddBuiltinFunc(_0(8463), _1("sparsevec_negative_inner_product"), _2(1), _3(true), _4(false), _5(sparsevec_negative_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_negative_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_cmp", 1, + AddBuiltinFunc(_0(8464), _1("sparsevec_cmp"), _2(1), _3(true), _4(false), _5(sparsevec_cmp), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_cmp"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + + AddFuncGroup( + "jaccard_distance", 1, + AddBuiltinFunc(_0(8468), _1("jaccard_distance"), _2(2), _3(true), _4(false), _5(jaccard_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 1560, 1560), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("jaccard_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hamming_distance", 1, + AddBuiltinFunc(_0(8469), _1("hamming_distance"), _2(2), _3(true), _4(false), _5(hamming_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 1560, 1560), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hamming_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_l2_squared_distance", 1, + AddBuiltinFunc(_0(8470), _1("sparsevec_l2_squared_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l2_squared_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_squared_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_lt", 1, + AddBuiltinFunc(_0(8472), _1("sparsevec_lt"), _2(1), _3(true), _4(false), _5(sparsevec_lt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_lt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_le", 1, + AddBuiltinFunc(_0(8473), _1("sparsevec_le"), _2(1), _3(true), _4(false), _5(sparsevec_le), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_le"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_eq", 1, + AddBuiltinFunc(_0(8474), _1("sparsevec_eq"), _2(1), _3(true), _4(false), _5(sparsevec_eq), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_eq"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_ne", 1, + AddBuiltinFunc(_0(8475), _1("sparsevec_ne"), _2(1), _3(true), _4(false), _5(sparsevec_ne), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_ne"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_ge", 1, + AddBuiltinFunc(_0(8476), _1("sparsevec_ge"), _2(1), _3(true), _4(false), _5(sparsevec_ge), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_ge"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_gt", 1, + AddBuiltinFunc(_0(8477), _1("sparsevec_gt"), _2(1), _3(true), _4(false), _5(sparsevec_gt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_gt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + + AddFuncGroup( + "hnsw_sparsevec_support", 1, + AddBuiltinFunc(_0(8479), _1("hnsw_sparsevec_support"), _2(0), _3(true), _4(false), _5(hnsw_sparsevec_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnsw_sparsevec_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_norm", 1, + AddBuiltinFunc(_0(8478), _1("l2_norm"), _2(1), _3(true), _4(false), _5(sparsevec_l2_norm), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_norm"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_normalize", 2, + AddBuiltinFunc(_0(8200), _1("l2_normalize"), _2(1), _3(true), _4(false), _5(l2_normalize), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l2_normalize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8211), _1("l2_normalize"), _2(1), _3(true), _4(false), _5(sparsevec_l2_normalize), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_normalize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "binary_quantize", 1, + AddBuiltinFunc(_0(8201), _1("binary_quantize"), _2(1), _3(true), _4(false), _5(binary_quantize), _6(1562), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("binary_quantize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + AddFuncGroup( + "subvector", 1, + AddBuiltinFunc(_0(8202), _1("subvector"), _2(1), _3(true), _4(false), _5(subvector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8305, 23, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("subvector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + AddFuncGroup( + "vector_mul", 1, + AddBuiltinFunc(_0(8203), _1("vector_mul"), _2(1), _3(true), _4(false), _5(vector_mul), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_mul"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_concat", 1, + AddBuiltinFunc(_0(8204), _1("vector_concat"), _2(1), _3(true), _4(false), _5(vector_concat), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_concat"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatvalidate", 1, + AddBuiltinFunc(_0(8205), _1("ivfflatvalidate"), _2(1), _3(true), _4(false), _5(ivfflatvalidate), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 26), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatvalidate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflathandler", 1, + AddBuiltinFunc(_0(8206), _1("ivfflathandler"), _2(0), _3(true), _4(false), _5(ivfflathandler), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(0), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflathandler"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswvalidate", 1, + AddBuiltinFunc(_0(8207), _1("hnswvalidate"), _2(1), _3(true), _4(false), _5(hnswvalidate), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 26), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswvalidate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswhandler", 1, + AddBuiltinFunc(_0(8208), _1("hnswhandler"), _2(0), _3(true), _4(false), _5(hnswhandler), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(0), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswhandler"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnsw_bit_support", 1, + AddBuiltinFunc(_0(8209), _1("hnsw_bit_support"), _2(0), _3(true), _4(false), _5(hnsw_bit_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnsw_bit_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflat_bit_support", 1, + AddBuiltinFunc(_0(8210), _1("ivfflat_bit_support"), _2(0), _3(true), _4(false), _5(ivfflat_bit_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflat_bit_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector", 1, + AddBuiltinFunc(_0(8214), _1("vector"), _2(3), _3(true), _4(false), _5(vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "array_to_vector", 4, + AddBuiltinFunc(_0(8215), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 1007, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8216), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 1021, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8217), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 1022, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8218), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 1231, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_float4", 1, + AddBuiltinFunc(_0(8219), _1("vector_to_float4"), _2(3), _3(true), _4(false), _5(vector_to_float4), _6(1021), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_float4"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec", 1, + AddBuiltinFunc(_0(8228), _1("sparsevec"), _2(3), _3(true), _4(false), _5(sparsevec), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 8307, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_sparsevec", 1, + AddBuiltinFunc(_0(8229), _1("vector_to_sparsevec"), _2(3), _3(true), _4(false), _5(vector_to_sparsevec), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_sparsevec"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_to_vector", 1, + AddBuiltinFunc(_0(8230), _1("sparsevec_to_vector"), _2(3), _3(true), _4(false), _5(sparsevec_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 8307, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), \ No newline at end of file diff --git a/src/common/backend/catalog/index.cpp b/src/common/backend/catalog/index.cpp index b7ebcdb92f..2dda128ace 100644 --- a/src/common/backend/catalog/index.cpp +++ b/src/common/backend/catalog/index.cpp @@ -416,7 +416,7 @@ static TupleDesc ConstructTupleDescriptor(Relation heapRelation, IndexInfo* inde to->attalign = typeTup->typalign; to->attstattarget = -1; to->attcacheoff = -1; - to->atttypmod = -1; + to->atttypmod = exprTypmod(indexkey); to->attislocal = true; to->attcollation = (i < numkeyatts) ? collationObjectId[i] : InvalidOid; diff --git a/src/common/backend/utils/adt/Makefile b/src/common/backend/utils/adt/Makefile index 8875214f15..58a9f99dc9 100644 --- a/src/common/backend/utils/adt/Makefile +++ b/src/common/backend/utils/adt/Makefile @@ -40,7 +40,7 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ tsvector.o tsvector_op.o tsvector_parser.o \ txid.o uuid.o windowfuncs.o xml.o extended_statistics.o clientlogic_bytea.o clientlogicsettings.o \ median_aggs.o expr_distinct.o nlssort.o memory_func.o first_last_agg.o encrypt_decrypt.o expandeddatum.o \ - subtype.o + subtype.o bitvec.o f2s.o halfutils.o halfvec.o sparsevec.o vector.o like.o: like.cpp like_match.cpp diff --git a/src/common/backend/utils/adt/bitvec.cpp b/src/common/backend/utils/adt/bitvec.cpp new file mode 100644 index 0000000000..46f86ad9b1 --- /dev/null +++ b/src/common/backend/utils/adt/bitvec.cpp @@ -0,0 +1,76 @@ +#include "postgres.h" + +#include "access/datavec/bitvec.h" +#include "utils/varbit.h" + +#if PG_VERSION_NUM >= 160000 +#include "varatt.h" +#endif + +uint64 (*BitHammingDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance); +double (*BitJaccardDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, uint64 aa, uint64 bb); +static THR_LOCAL bool BitvecNeedInitialization = true; + +/* + * Allocate and initialize a new bit vector + */ +VarBit *InitBitVector(int dim) +{ + VarBit *result; + int size; + + size = VARBITTOTALLEN(dim); + result = (VarBit *)palloc0(size); + SET_VARSIZE(result, size); + VARBITLEN(result) = dim; + + return result; +} + +/* + * Ensure same dimensions + */ +static inline void CheckDims(VarBit *a, VarBit *b) +{ + if (VARBITLEN(a) != VARBITLEN(b)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("different bit lengths %u and %u", VARBITLEN(a), VARBITLEN(b)))); +} + +/* + * Get the Hamming distance between two bit vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance); +Datum hamming_distance(PG_FUNCTION_ARGS) +{ + VarBit *a = PG_GETARG_VARBIT_P(0); + VarBit *b = PG_GETARG_VARBIT_P(1); + + if (BitvecNeedInitialization) { + BitvecInit(); + BitvecNeedInitialization = false; + } + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)BitHammingDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0)); +} + +/* + * Get the Jaccard distance between two bit vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(jaccard_distance); +Datum jaccard_distance(PG_FUNCTION_ARGS) +{ + VarBit *a = PG_GETARG_VARBIT_P(0); + VarBit *b = PG_GETARG_VARBIT_P(1); + + if (BitvecNeedInitialization) { + BitvecInit(); + BitvecNeedInitialization = false; + } + + CheckDims(a, b); + + PG_RETURN_FLOAT8(BitJaccardDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0, 0, 0)); +} diff --git a/src/common/backend/utils/adt/f2s.cpp b/src/common/backend/utils/adt/f2s.cpp new file mode 100644 index 0000000000..9ade516383 --- /dev/null +++ b/src/common/backend/utils/adt/f2s.cpp @@ -0,0 +1,732 @@ +/*--------------------------------------------------------------------------- + * + * Ryu floating-point output for single precision. + * + * Portions Copyright (c) 2018-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/f2s.c + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "access/datavec/shortest_dec.h" + +#include "access/datavec/ryu_common.h" + +#define FLOAT_MANTISSA_BITS 23 +#define FLOAT_EXPONENT_BITS 8 +#define FLOAT_BIAS 127 + +/* + * This table is generated (by the upstream) by PrintFloatLookupTable, + * and modified (by us) to add UINT64CONST. + */ +#define FLOAT_POW5_INV_BITCOUNT 59 +static const uint64 FLOAT_POW5_INV_SPLIT[31] = { + UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), + UINT64CONST(295147905179352826), UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), + UINT64CONST(302231454903657294), UINT64CONST(483570327845851670), UINT64CONST(386856262276681336), + UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688), + UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), + UINT64CONST(324518553658426727), UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), + UINT64CONST(332306998946228969), UINT64CONST(531691198313966350), UINT64CONST(425352958651173080), + UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234), + UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), + UINT64CONST(356811923176489971), UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), + UINT64CONST(365375409332725730)}; +#define FLOAT_POW5_BITCOUNT 61 +static const uint64 FLOAT_POW5_SPLIT[47] = { + UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), + UINT64CONST(2251799813685248000), UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), + UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000), UINT64CONST(1717986918400000000), + UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000), + UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), + UINT64CONST(2048000000000000000), UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), + UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000), UINT64CONST(1562500000000000000), + UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000), + UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), + UINT64CONST(1862645149230957031), UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), + UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594), UINT64CONST(1421085471520200371), + UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675), + UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), + UINT64CONST(1694065894508600678), UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), + UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187), UINT64CONST(1292469707114105741), + UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)}; + +static inline uint32 pow5Factor(uint32 value) +{ + uint32 count = 0; + + for (;;) { + Assert(value != 0); + const uint32 q = value / 5; + const uint32 r = value % 5; + + if (r != 0) { + break; + } + + value = q; + ++count; + } + return count; +} + +/* Returns true if value is divisible by 5^p. */ +static inline bool multipleOfPowerOf5(const uint32 value, const uint32 p) +{ + return pow5Factor(value) >= p; +} + +/* Returns true if value is divisible by 2^p. */ +static inline bool multipleOfPowerOf2(const uint32 value, const uint32 p) +{ + return (value & ((1u << p) - 1)) == 0; +} + +/* + * It seems to be slightly faster to avoid uint128_t here, although the + * generated code for uint128_t looks slightly nicer. + */ +static inline uint32 mulShift(const uint32 m, const uint64 factor, const int32 shift) +{ + /* + * The casts here help MSVC to avoid calls to the __allmul library + * function. + */ + const uint32 factorLo = (uint32)(factor); + const uint32 factorHi = (uint32)(factor >> 32); + const uint64 bits0 = (uint64)m * factorLo; + const uint64 bits1 = (uint64)m * factorHi; + + Assert(shift > 32); + +#ifdef RYU_32_BIT_PLATFORM + + /* + * On 32-bit platforms we can avoid a 64-bit shift-right since we only + * need the upper 32 bits of the result and the shift value is > 32. + */ + const uint32 bits0Hi = (uint32)(bits0 >> 32); + uint32 bits1Lo = (uint32)(bits1); + uint32 bits1Hi = (uint32)(bits1 >> 32); + + bits1Lo += bits0Hi; + bits1Hi += (bits1Lo < bits0Hi); + + const int32 s = shift - 32; + + return (bits1Hi << (32 - s)) | (bits1Lo >> s); + +#else /* RYU_32_BIT_PLATFORM */ + + const uint64 sum = (bits0 >> 32) + bits1; + const uint64 shiftedSum = sum >> (shift - 32); + + Assert(shiftedSum <= UINT32_MAX); + return (uint32)shiftedSum; + +#endif /* RYU_32_BIT_PLATFORM */ +} + +static inline uint32 mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j) +{ + return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j); +} + +static inline uint32 mulPow5divPow2(const uint32 m, const uint32 i, const int32 j) +{ + return mulShift(m, FLOAT_POW5_SPLIT[i], j); +} + +static inline uint32 decimalLength(const uint32 v) +{ + /* Function precondition: v is not a 10-digit number. */ + /* (9 digits are sufficient for round-tripping.) */ + Assert(v < 1000000000); + if (v >= 100000000) { + return 9; + } + if (v >= 10000000) { + return 8; + } + if (v >= 1000000) { + return 7; + } + if (v >= 100000) { + return 6; + } + if (v >= 10000) { + return 5; + } + if (v >= 1000) { + return 4; + } + if (v >= 100) { + return 3; + } + if (v >= 10) { + return 2; + } + return 1; +} + +/* A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_32 { + uint32 mantissa; + int32 exponent; +} floating_decimal_32; + +static inline floating_decimal_32 f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent) +{ + int32 e2; + uint32 m2; + + if (ieeeExponent == 0) { + /* We subtract 2 so that the bounds computation has 2 additional bits. */ + e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } else { + e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; + } + +#if STRICTLY_SHORTEST + const bool even = (m2 & 1) == 0; + const bool acceptBounds = even; +#else + const bool acceptBounds = false; +#endif + + /* Step 2: Determine the interval of legal decimal representations. */ + const uint32 mv = 4 * m2; + const uint32 mp = 4 * m2 + 2; + + /* Implicit bool -> int conversion. True is 1, false is 0. */ + const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + const uint32 mm = 4 * m2 - 1 - mmShift; + + /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */ + uint32 vr, vp, vm; + int32 e10; + bool vmIsTrailingZeros = false; + bool vrIsTrailingZeros = false; + uint8 lastRemovedDigit = 0; + + if (e2 >= 0) { + const uint32 q = log10Pow2(e2); + + e10 = q; + + const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1; + const int32 i = -e2 + q + k; + + vr = mulPow5InvDivPow2(mv, q, i); + vp = mulPow5InvDivPow2(mp, q, i); + vm = mulPow5InvDivPow2(mm, q, i); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + /* + * We need to know one removed digit even if we are not going to + * loop below. We could use q = X - 1 above, except that would + * require 33 bits for the result, and we've found that 32-bit + * arithmetic is faster even on 64-bit machines. + */ + const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; + + lastRemovedDigit = (uint8)(mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10); + } + if (q <= 9) { + /* + * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 + * seems to be safe as well. + * + * Only one of mp, mv, and mm can be a multiple of 5, if any. + */ + if (mv % 5 == 0) { + vrIsTrailingZeros = multipleOfPowerOf5(mv, q); + } else if (acceptBounds) { + vmIsTrailingZeros = multipleOfPowerOf5(mm, q); + } else { + vp -= multipleOfPowerOf5(mp, q); + } + } + } else { + const uint32 q = log10Pow5(-e2); + + e10 = q + e2; + + const int32 i = -e2 - q; + const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT; + int32 j = q - k; + + vr = mulPow5divPow2(mv, i, j); + vp = mulPow5divPow2(mp, i, j); + vm = mulPow5divPow2(mm, i, j); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); + lastRemovedDigit = (uint8)(mulPow5divPow2(mv, i + 1, j) % 10); + } + if (q <= 1) { + /* + * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q + * trailing 0 bits. + */ + /* mv = 4 * m2, so it always has at least two trailing 0 bits. */ + vrIsTrailingZeros = true; + if (acceptBounds) { + /* + * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff + * mmShift == 1. + */ + vmIsTrailingZeros = mmShift == 1; + } else { + /* + * mp = mv + 2, so it always has at least one trailing 0 bit. + */ + --vp; + } + } else if (q < 31) { + vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); + } + } + + /* + * Step 4: Find the shortest decimal representation in the interval of + * legal representations. + */ + uint32 removed = 0; + uint32 output; + + if (vmIsTrailingZeros || vrIsTrailingZeros) { + /* General case, which happens rarely (~4.0%). */ + while (vp / 10 > vm / 10) { + vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + if (vmIsTrailingZeros) { + while (vm % 10 == 0) { + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + } + + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) { + /* Round even if the exact number is .....50..0. */ + lastRemovedDigit = 4; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } else { + /* + * Specialized for the common case (~96.0%). Percentages below are + * relative to this. + * + * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2: + * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% + */ + while (vp / 10 > vm / 10) { + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + (vr == vm || lastRemovedDigit >= 5); + } + + const int32 exp = e10 + removed; + + floating_decimal_32 fd; + + fd.exponent = exp; + fd.mantissa = output; + return fd; +} + +static inline int to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + int32 exp = v.exponent; + errno_t rc = EOK; + + /*---- + * On entry, mantissa * 10^exp is the result to be output. + * Caller has already done the - sign if needed. + * + * We want to insert the point somewhere depending on the output length + * and exponent, which might mean adding zeros: + * + * exp | format + * 1+ | ddddddddd000000 + * 0 | ddddddddd + * -1 .. -len+1 | dddddddd.d to d.ddddddddd + * -len ... | 0.ddddddddd to 0.000dddddd + */ + uint32 i = 0; + int32 nexp = exp + olength; + + if (nexp <= 0) { + /* -nexp is number of 0s to add after '.' */ + Assert(nexp >= -3); + /* 0.000ddddd */ + index = 2 - nexp; + /* copy 8 bytes rather than 5 to let compiler optimize */ + rc = memcpy_s(result, 8, "0.000000", 8); + securec_check(rc, "\0", "\0"); + } else if (exp < 0) { + /* + * dddd.dddd; leave space at the start and move the '.' in after + */ + index = 1; + } else { + /* + * We can save some code later by pre-filling with zeros. We know + * that there can be no more than 6 output digits in this form, + * otherwise we would not choose fixed-point output. memset 8 + * rather than 6 bytes to let the compiler optimize it. + */ + Assert(exp < 6 && exp + olength <= 6); + rc = memset_s(result, 8, '0', 8); + securec_check(rc, "\0", "\0"); + } + + while (output >= 10000) { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c0, 2); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(result + index + olength - i - 4, 2, DIGIT_TABLE + c1, 2); + securec_check(rc, "\0", "\0"); + i += 4; + } + if (output >= 100) { + const uint32 c = (output % 100) << 1; + + output /= 100; + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + i += 2; + } + if (output >= 10) { + const uint32 c = output << 1; + + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + } else { + result[index] = (char)('0' + output); + } + + if (index == 1) { + /* + * nexp is 1..6 here, representing the number of digits before the + * point. A value of 7+ is not possible because we switch to + * scientific notation when the display exponent reaches 6. + */ + Assert(nexp < 7); + /* gcc only seems to want to optimize memmove for small 2^n */ + if (nexp & 4) { + rc = memmove_s(result + index - 1, 4, result + index, 4); + securec_check(rc, "\0", "\0"); + index += 4; + } + if (nexp & 2) { + rc = memmove_s(result + index - 1, 2, result + index, 2); + securec_check(rc, "\0", "\0"); + index += 2; + } + if (nexp & 1) { + result[index - 1] = result[index]; + } + result[nexp] = '.'; + index = olength + 1; + } else if (exp >= 0) { + /* we supplied the trailing zeros earlier, now just set the length. */ + index = olength + exp; + } else { + index = olength + (2 - nexp); + } + + return index; +} + +static inline int ToChars(const floating_decimal_32 v, const bool sign, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + uint32 olength = decimalLength(output); + int32 exp = v.exponent + olength - 1; + errno_t rc = EOK; + + if (sign) { + result[index++] = '-'; + } + + /* + * The thresholds for fixed-point output are chosen to match printf + * defaults. Beware that both the code of to_chars_f and the value + * of FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. + */ + if (exp >= -4 && exp < 6) { + return to_chars_f(v, olength, result + index) + sign; + } + + /* + * If v.exponent is exactly 0, we might have reached here via the small + * integer fast path, in which case v.mantissa might contain trailing + * (decimal) zeros. For scientific notation we need to move these zeros + * into the exponent. (For fixed point this doesn't matter, which is why + * we do this here rather than above.) + * + * Since we already calculated the display exponent (exp) above based on + * the old decimal length, that value does not change here. Instead, we + * just reduce the display length for each digit removed. + * + * If we didn't get here via the fast path, the raw exponent will not + * usually be 0, and there will be no trailing zeros, so we pay no more + * than one div10/multiply extra cost. We claw back half of that by + * checking for divisibility by 2 before dividing by 10. + */ + if (v.exponent == 0) { + while ((output & 1) == 0) { + const uint32 q = output / 10; + const uint32 r = output - 10 * q; + + if (r != 0) { + break; + } + output = q; + --olength; + } + } + + /*---- + * Print the decimal digits. + * The following code is equivalent to: + * + * for (uint32 i = 0; i < olength - 1; ++i) { + * const uint32 c = output % 10; output /= 10; + * result[index + olength - i] = (char) ('0' + c); + * } + * result[index] = '0' + output % 10; + */ + uint32 i = 0; + + while (output >= 10000) { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + rc = memcpy_s(result + index + olength - i - 1, 2, DIGIT_TABLE + c0, 2); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(result + index + olength - i - 3, 2, DIGIT_TABLE + c1, 2); + securec_check(rc, "\0", "\0"); + i += 4; + } + if (output >= 100) { + const uint32 c = (output % 100) << 1; + + output /= 100; + rc = memcpy_s(result + index + olength - i - 1, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + i += 2; + } + if (output >= 10) { + const uint32 c = output << 1; + + /* + * We can't use memcpy here: the decimal dot goes between these two + * digits. + */ + result[index + olength - i] = DIGIT_TABLE[c + 1]; + result[index] = DIGIT_TABLE[c]; + } else { + result[index] = (char)('0' + output); + } + + /* Print decimal point if needed. */ + if (olength > 1) { + result[index + 1] = '.'; + index += olength + 1; + } else { + ++index; + } + + /* Print the exponent. */ + result[index++] = 'e'; + if (exp < 0) { + result[index++] = '-'; + exp = -exp; + } else { + result[index++] = '+'; + } + + rc = memcpy_s(result + index, 2, DIGIT_TABLE + 2 * exp, 2); + securec_check(rc, "\0", "\0"); + index += 2; + + return index; +} + +static inline bool f2d_small_int(const uint32 ieeeMantissa, const uint32 ieeeExponent, floating_decimal_32 *v) +{ + const int32 e2 = (int32)ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS; + + /* + * Avoid using multiple "return false;" here since it tends to provoke the + * compiler into inlining multiple copies of f2d, which is undesirable. + */ + + if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0) { + /*---- + * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23: + * 1 <= f = m2 / 2^-e2 < 2^24. + * + * Test if the lower -e2 bits of the significand are 0, i.e. whether + * the fraction is 0. We can use ieeeMantissa here, since the implied + * 1 bit can never be tested by this; the implied 1 can only be part + * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already + * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24) + */ + const uint32 mask = (1U << -e2) - 1; + const uint32 fraction = ieeeMantissa & mask; + + if (fraction == 0) { + /*---- + * f is an integer in the range [1, 2^24). + * Note: mantissa might contain trailing (decimal) 0's. + * Note: since 2^24 < 10^9, there is no need to adjust + * decimalLength(). + */ + const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa; + + v->mantissa = m2 >> -e2; + v->exponent = 0; + return true; + } + } + + return false; +} + +/* + * Store the shortest decimal representation of the given float as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int FloatToShortestDecimalBufn(float f, char *result) +{ + /* + * Step 1: Decode the floating-point number, and unify normalized and + * subnormal cases. + */ + const uint32 bits = float_to_bits(f); + + /* Decode bits into sign, mantissa, and exponent. */ + const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; + const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); + const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); + + /* Case distinction; exit early for the easy cases. */ + if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) { + return copy_special_str(result, ieeeSign, ieeeExponent, ieeeMantissa); + } + + floating_decimal_32 v; + const bool isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v); + if (!isSmallInt) { + v = f2d(ieeeMantissa, ieeeExponent); + } + + return ToChars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given float as a + * null-terminated string in the caller's supplied buffer (which must be at + * least FLOAT_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int FloatToShortestDecimalBuf(float f, char *result) +{ + const int index = FloatToShortestDecimalBufn(f, result); + + /* Terminate the string. */ + Assert(index < FLOAT_SHORTEST_DECIMAL_LEN); + result[index] = '\0'; + return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char *FloatToShortestDecimal(float f) +{ + char *const result = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN); + + FloatToShortestDecimalBuf(f, result); + return result; +} diff --git a/src/common/backend/utils/adt/halfutils.cpp b/src/common/backend/utils/adt/halfutils.cpp new file mode 100644 index 0000000000..cb62c054c0 --- /dev/null +++ b/src/common/backend/utils/adt/halfutils.cpp @@ -0,0 +1,275 @@ +#include "postgres.h" + +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" + +#ifdef HALFVEC_DISPATCH +#include + +#if defined(USE__GET_CPUID) +#include +#else +#include +#endif + +#define TARGET_F16C +#endif + +float (*HalfvecL2SquaredDistance)(int dim, half *ax, half *bx); +float (*HalfvecInnerProduct)(int dim, half *ax, half *bx); +double (*HalfvecCosineSimilarity)(int dim, half *ax, half *bx); +float (*HalfvecL1Distance)(int dim, half *ax, half *bx); + +static float HalfvecL2SquaredDistanceDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]); + + distance += diff * diff; + } + + return distance; +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static float HalfvecL2SquaredDistanceF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + __m256 diff = _mm256_sub_ps(axs, bxs); + + dist = _mm256_fmadd_ps(diff, diff, dist); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) { + float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]); + + distance += diff * diff; + } + + return distance; +} +#endif + +static float HalfvecInnerProductDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) + distance += HalfToFloat4(ax[i]) * HalfToFloat4(bx[i]); + + return distance; +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static float HalfvecInnerProductF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + dist = _mm256_fmadd_ps(axs, bxs, dist); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) + distance += HalfToFloat4(ax[i]) * HalfToFloat4(bx[i]); + + return distance; +} +#endif + +static double HalfvecCosineSimilarityDefault(int dim, half *ax, half *bx) +{ + float similarity = 0.0; + float norma = 0.0; + float normb = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float axi = HalfToFloat4(ax[i]); + float bxi = HalfToFloat4(bx[i]); + + similarity += axi * bxi; + norma += axi * axi; + normb += bxi * bxi; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return static_cast(similarity) / sqrt(static_cast(norma) * static_cast(normb)); +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static double HalfvecCosineSimilarityF16c(int dim, half *ax, half *bx) +{ + float similarity; + float norma; + float normb; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 sim = _mm256_setzero_ps(); + __m256 na = _mm256_setzero_ps(); + __m256 nb = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + sim = _mm256_fmadd_ps(axs, bxs, sim); + na = _mm256_fmadd_ps(axs, axs, na); + nb = _mm256_fmadd_ps(bxs, bxs, nb); + } + + _mm256_storeu_ps(s, sim); + similarity = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + _mm256_storeu_ps(s, na); + norma = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + _mm256_storeu_ps(s, nb); + normb = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + /* Auto-vectorized */ + for (; i < dim; i++) { + float axi = HalfToFloat4(ax[i]); + float bxi = HalfToFloat4(bx[i]); + + similarity += axi * bxi; + norma += axi * axi; + normb += bxi * bxi; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return (double)similarity / sqrt((double)norma * (double)normb); +} +#endif + +static float HalfvecL1DistanceDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) + distance += fabsf(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); + + return distance; +} + +#ifdef HALFVEC_DISPATCH +/* Does not require FMA, but keep logic simple */ +TARGET_F16C static float HalfvecL1DistanceF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + __m256 sign = _mm256_set1_ps(-0.0); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + dist = _mm256_add_ps(dist, _mm256_andnot_ps(sign, _mm256_sub_ps(axs, bxs))); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) + distance += fabsf(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); + + return distance; +} +#endif + +#ifdef HALFVEC_DISPATCH +#define CPU_FEATURE_FMA (1 << 12) +#define CPU_FEATURE_OSXSAVE (1 << 27) +#define CPU_FEATURE_AVX (1 << 28) +#define CPU_FEATURE_F16C (1 << 29) + +#ifdef _MSC_VER +#define TARGET_XSAVE +#else +#define TARGET_XSAVE __attribute__((target("xsave"))) +#endif + +TARGET_XSAVE static bool SupportsCpuFeature(unsigned int feature) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(USE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuid(exx, 1); +#endif + + /* Check OS supports XSAVE */ + if ((exx[2] & CPU_FEATURE_OSXSAVE) != CPU_FEATURE_OSXSAVE) + return false; + + /* Check XMM and YMM registers are enabled */ + if ((_xgetbv(0) & 6) != 6) + return false; + + /* Now check features */ + return (exx[2] & feature) == feature; +} +#endif + +void HalfvecInit(void) +{ + /* + * Could skip pointer when single function, but no difference in + * performance + */ + HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceDefault; + HalfvecInnerProduct = HalfvecInnerProductDefault; + HalfvecCosineSimilarity = HalfvecCosineSimilarityDefault; + HalfvecL1Distance = HalfvecL1DistanceDefault; + +#ifdef HALFVEC_DISPATCH + if (SupportsCpuFeature(CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA)) { + HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c; + HalfvecInnerProduct = HalfvecInnerProductF16c; + HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c; + /* Does not require FMA, but keep logic simple */ + HalfvecL1Distance = HalfvecL1DistanceF16c; + } +#endif +} diff --git a/src/common/backend/utils/adt/halfvec.cpp b/src/common/backend/utils/adt/halfvec.cpp new file mode 100644 index 0000000000..3ee57c1cf7 --- /dev/null +++ b/src/common/backend/utils/adt/halfvec.cpp @@ -0,0 +1,1123 @@ +#include "postgres.h" + +#include + +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "port.h" /* for strtof() */ +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "access/datavec/vector.h" + +#if PG_VERSION_NUM < 130000 +#define TYPALIGN_DOUBLE 'd' +#define TYPALIGN_INT 'i' +#endif + +#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) +#define CreateStateDatums(dim) palloc(sizeof(Datum) * ((dim) + 1)) + +/* + * Get a half from a message buffer + */ +static half pq_getmsghalf(StringInfo msg) +{ + union { + half h; + uint16 i; + } swap; + + swap.i = pq_getmsgint(msg, 2); + return swap.h; +} + +/* + * Append a half to a StringInfo buffer + */ +static void pq_sendhalf(StringInfo buf, half h) +{ + union { + half h; + uint16 i; + } swap; + + swap.h = h; + pq_sendint16(buf, swap.i); +} + +/* + * Ensure same dimensions + */ +static inline void CheckDims(HalfVector *a, HalfVector *b) +{ + if (a->dim != b->dim) { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different halfvec dimensions %d and %d", a->dim, b->dim))); + } +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); + } +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + + if (dim > HALFVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("halfvec cannot have more than %d dimensions", HALFVEC_MAX_DIM))); +} + +/* + * Ensure finite element + */ +static inline void CheckElement(half value) +{ + if (HalfIsNan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in halfvec"))); + + if (HalfIsInf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in halfvec"))); +} + +/* + * Allocate and initialize a new half vector + */ +HalfVector *InitHalfVector(int dim) +{ + HalfVector *result; + int size; + + size = HALFVEC_SIZE(dim); + result = (HalfVector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool HalfvecIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Check state array + */ +static float8 *CheckStateArray(ArrayType *statearray, const char *caller) +{ + if (ARR_NDIM(statearray) != 1 || ARR_DIMS(statearray)[0] < 1 || ARR_HASNULL(statearray)) + elog(ERROR, "%s: expected state array", caller); + return (float8 *)ARR_DATA_PTR(statearray); +} + +#if PG_VERSION_NUM < 120003 +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +static pg_noinline void float_underflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: underflow"))); +} +#endif + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_in); +Datum halfvec_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + half x[HALFVEC_MAX_DIM]; + int dim = 0; + char *pt = lit; + HalfVector *result; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt != '[') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit), + errdetail("Vector contents must start with \"[\"."))); + + pt++; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt == ']') + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + + for (;;) { + float val; + char *stringEnd; + + if (dim == HALFVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("halfvec cannot have more than %d dimensions", HALFVEC_MAX_DIM))); + + while (HalfvecIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + + errno = 0; + + /* Postgres sets LC_NUMERIC to C on startup */ + val = strtof(pt, &stringEnd); + + if (stringEnd == pt) { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + } + + x[dim] = Float4ToHalfUnchecked(val); + + /* Check for range error like float4in */ + if ((errno == ERANGE && isinf(val)) || (HalfIsInf(x[dim]) && !isinf(val))) { + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type halfvec", pnstrdup(pt, stringEnd - pt)))); + } + + CheckElement(x[dim]); + dim++; + + pt = stringEnd; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == ']') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + } + } + + /* Only whitespace is allowed after the closing brace */ + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit), + errdetail("Junk after closing right brace."))); + } + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = x[i]; + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_out); +Datum halfvec_out(PG_FUNCTION_ARGS) +{ + HalfVector *vector = PG_GETARG_HALFVEC_P(0); + int dim = vector->dim; + char *buf; + char *ptr; + + /* + * Need: + * + * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * dim - 1 bytes for separator + * + * 3 bytes for [, ], and \0 + */ + buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2); + ptr = buf; + + AppendChar(ptr, '['); + + for (int i = 0; i < dim; i++) { + if (i > 0) { + AppendChar(ptr, ','); + } + + /* + * Use shortest decimal representation of single-precision float for + * simplicity + */ + AppendFloat(ptr, HalfToFloat4(vector->x[i])); + } + + AppendChar(ptr, ']'); + *ptr = '\0'; + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_typmod_in); +Datum halfvec_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + } + + if (*tl < 1) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type halfvec must be at least 1"))); + } + + if (*tl > HALFVEC_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type halfvec cannot exceed %d", HALFVEC_MAX_DIM))); + } + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_recv); +Datum halfvec_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + HalfVector *result; + int16 dim; + int16 unused; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + } + + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = pq_getmsghalf(buf); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_send); +Datum halfvec_send(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, vec->dim, sizeof(int16)); + pq_sendint(&buf, vec->unused, sizeof(int16)); + for (int i = 0; i < vec->dim; i++) { + pq_sendhalf(&buf, vec->x[i]); + } + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert half vector to half vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec); +Datum halfvec(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, vec->dim); + + PG_RETURN_POINTER(vec); +} + +/* + * Convert array to half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_halfvec); +Datum array_to_halfvec(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + int nelemsp; + + if (ARR_NDIM(array) > 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("array must be 1-D"))); + } + + if (ARR_HASNULL(array) && array_contains_nulls(array)) { + ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("array must not contain nulls"))); + } + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + + result = InitHalfVector(nelemsp); + + if (ARR_ELEMTYPE(array) == INT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetInt32(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == FLOAT8OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat8(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == FLOAT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat4(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == NUMERICOID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i]))); + } else { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("unsupported array type"))); + } + + /* + * Free allocation from deconstruct_array. Do not free individual elements + * when pass-by-reference since they point to original array. + */ + pfree(elemsp); + + /* Check elements */ + for (int i = 0; i < result->dim; i++) { + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to float4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_float4); +Datum halfvec_to_float4(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = Float4GetDatum(HalfToFloat4(vec->x[i])); + } + + /* Use TYPALIGN_INT for float4 */ + result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to half vec + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_halfvec); +Datum vector_to_halfvec(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + + CheckDim(vec->dim); + CheckExpectedDim(typmod, vec->dim); + + result = InitHalfVector(vec->dim); + + for (int i = 0; i < vec->dim; i++) + result->x[i] = Float4ToHalf(vec->x[i]); + + PG_RETURN_POINTER(result); +} + +/* + * Get the L2 distance between half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_distance); +Datum halfvec_l2_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)HalfvecL2SquaredDistance(a->dim, a->x, b->x))); +} + +/* + * Get the L2 squared distance between half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_squared_distance); +Datum halfvec_l2_squared_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecL2SquaredDistance(a->dim, a->x, b->x)); +} + +/* + * Get the inner product of two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_inner_product); +Datum halfvec_inner_product(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the negative inner product of two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_negative_inner_product); +Datum halfvec_negative_inner_product(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-HalfvecInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the cosine distance between two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_cosine_distance); +Datum halfvec_cosine_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + double similarity; + + CheckDims(a, b); + + similarity = HalfvecCosineSimilarity(a->dim, a->x, b->x); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) { + PG_RETURN_FLOAT8(NAN); + } +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1; + } else if (similarity < -1) { + similarity = -1; + } + + PG_RETURN_FLOAT8(1 - similarity); +} + +/* + * Get the distance for spherical k-means + * Currently uses angular distance since needs to satisfy triangle inequality + * Assumes inputs are unit vectors (skips norm) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_spherical_distance); +Datum halfvec_spherical_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + double distance; + + CheckDims(a, b); + + distance = (double)HalfvecInnerProduct(a->dim, a->x, b->x); + /* Prevent NaN with acos with loss of precision */ + if (distance > 1) { + distance = 1; + } else if (distance < -1) { + distance = -1; + } + + PG_RETURN_FLOAT8(acos(distance) / M_PI); +} + +/* + * Get the L1 distance between two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l1_distance); +Datum halfvec_l1_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecL1Distance(a->dim, a->x, b->x)); +} + +/* + * Get the dimensions of a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_vector_dims); +Datum halfvec_vector_dims(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + + PG_RETURN_INT32(a->dim); +} + +/* + * Get the L2 norm of a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_norm); +Datum halfvec_l2_norm(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + double axi = (double)HalfToFloat4(ax[i]); + + norm += axi * axi; + } + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +/* + * Normalize a half vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_normalize); +Datum halfvec_l2_normalize(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + double norm = 0; + HalfVector *result; + half *rx; + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + norm += (double)HalfToFloat4(ax[i]) * (double)HalfToFloat4(ax[i]); + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + for (int i = 0; i < a->dim; i++) + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) / norm); + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) { + if (HalfIsInf(rx[i])) + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Add half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_add); +Datum halfvec_add(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] + bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) + HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Subtract half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_sub); +Datum halfvec_sub(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] - bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Multiply half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_mul); +Datum halfvec_mul(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] * bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) * HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow and underflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + + if (HalfIsZero(rx[i]) && !(HalfIsZero(ax[i]) || HalfIsZero(bx[i]))) { + float_underflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Concatenate half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_concat); +Datum halfvec_concat(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + HalfVector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitHalfVector(dim); + + for (int i = 0; i < a->dim; i++) { + result->x[i] = a->x[i]; + } + + for (int i = 0; i < b->dim; i++) { + result->x[i + a->dim] = b->x[i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Quantize a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_binary_quantize); +Datum halfvec_binary_quantize(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + VarBit *result = InitBitVector(a->dim); + unsigned char *rx = VARBITS(result); + + for (int i = 0; i < a->dim; i++) { + rx[i / 8] |= (HalfToFloat4(ax[i]) > 0) << (7 - (i % 8)); + } + + PG_RETURN_VARBIT_P(result); +} + +/* + * Get a subvector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_subvector); +Datum halfvec_subvector(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + int32 start = PG_GETARG_INT32(1); + int32 count = PG_GETARG_INT32(2); + int32 end; + half *ax = a->x; + HalfVector *result; + int32 dim; + + if (count < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + } + + /* + * Check if (start + count > a->dim), avoiding integer overflow. a->dim + * and count are both positive, so a->dim - count won't overflow. + */ + if (start > a->dim - count) { + end = a->dim + 1; + } else { + end = start + count; + } + + /* Indexing starts at 1, like substring */ + if (start < 1) { + start = 1; + } else if (start > a->dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + } + + dim = end - start; + CheckDim(dim); + result = InitHalfVector(dim); + + for (int i = 0; i < dim; i++) { + result->x[i] = ax[start - 1 + i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare half vectors + */ +static int halfvec_cmp_internal(HalfVector *a, HalfVector *b) +{ + int dim = Min(a->dim, b->dim); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < dim; i++) { + if (HalfToFloat4(a->x[i]) < HalfToFloat4(b->x[i])) { + return -1; + } + + if (HalfToFloat4(a->x[i]) > HalfToFloat4(b->x[i])) { + return 1; + } + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_lt); +Datum halfvec_lt(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_le); +Datum halfvec_le(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_eq); +Datum halfvec_eq(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_ne); +Datum halfvec_ne(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_ge); +Datum halfvec_ge(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_gt); +Datum halfvec_gt(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) > 0); +} + +/* + * Compare half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_cmp); +Datum halfvec_cmp(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_INT32(halfvec_cmp_internal(a, b)); +} + +/* + * Accumulate half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_accum); +Datum halfvec_accum(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + HalfVector *newval = PG_GETARG_HALFVEC_P(1); + float8 *statevalues; + int16 dim; + bool newarr; + float8 n; + Datum *statedatums; + half *x = newval->x; + ArrayType *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "halfvec_accum"); + dim = STATE_DIMS(statearray); + newarr = dim == 0; + + if (newarr) + dim = newval->dim; + else + CheckExpectedDim(dim, newval->dim); + + n = statevalues[0] + 1.0; + + statedatums = (Datum *)CreateStateDatums(dim); + statedatums[0] = Float8GetDatum(n); + + if (newarr) { + for (int i = 0; i < dim; i++) + statedatums[i + 1] = Float8GetDatum((double)HalfToFloat4(x[i])); + } else { + for (int i = 0; i < dim; i++) { + double v = statevalues[i + 1] + (double)HalfToFloat4(x[i]); + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i + 1] = Float8GetDatum(v); + } + } + + /* Use float8 array like float4_accum */ + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Average half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_avg); +Datum halfvec_avg(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + float8 *statevalues; + float8 n; + uint16 dim; + HalfVector *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "halfvec_avg"); + n = statevalues[0]; + + /* SQL defines AVG of no values to be NULL */ + if (n == 0.0) { + PG_RETURN_NULL(); + } + + /* Create half vector */ + dim = STATE_DIMS(statearray); + CheckDim(dim); + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = Float4ToHalf(statevalues[i + 1] / n); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert sparse vector to half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_halfvec); +Datum sparsevec_to_halfvec(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + int dim = svec->dim; + float *values = SPARSEVEC_VALUES(svec); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitHalfVector(dim); + for (int i = 0; i < svec->nnz; i++) { + result->x[svec->indices[i]] = Float4ToHalf(values[i]); + } + + PG_RETURN_POINTER(result); +} diff --git a/src/common/backend/utils/adt/sparsevec.cpp b/src/common/backend/utils/adt/sparsevec.cpp new file mode 100644 index 0000000000..c0d4d0db52 --- /dev/null +++ b/src/common/backend/utils/adt/sparsevec.cpp @@ -0,0 +1,1053 @@ +#include "postgres.h" + +#include +#include + +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "libpq/pqformat.h" +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "access/datavec/vector.h" + +#if PG_VERSION_NUM >= 120000 +#include "common/shortest_dec.h" +#include "utils/float.h" +#else +#include +#include "utils/builtins.h" +#endif + +typedef struct SparseInputElement { + int32 index; + float value; +} SparseInputElement; + +/* + * Ensure same dimensions + */ +static inline void CheckDims(SparseVector *a, SparseVector *b) +{ + if (a->dim != b->dim) { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different sparsevec dimensions %d and %d", a->dim, b->dim))); + } +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); + } +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec must have at least 1 dimension"))); + } + + if (dim > SPARSEVEC_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d dimensions", SPARSEVEC_MAX_DIM))); + } +} + +/* + * Ensure valid nnz + */ +static inline void CheckNnz(int nnz, int dim) +{ + if (nnz < 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec cannot have negative number of elements"))); + + if (nnz > SPARSEVEC_MAX_NNZ) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d non-zero elements", SPARSEVEC_MAX_NNZ))); + + if (nnz > dim) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more elements than dimensions"))); +} + +/* + * Ensure valid index + */ +static inline void CheckIndex(int32 *indices, int i, int dim) +{ + int32 index = indices[i]; + + if (index < 0 || index >= dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec index out of bounds"))); + } + + if (i > 0) { + if (index < indices[i - 1]) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec indices must be in ascending order"))); + + if (index == indices[i - 1]) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec indices must not contain duplicates"))); + } +} + +/* + * Ensure finite element + */ +static inline void CheckElement(float value) +{ + if (isnan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in sparsevec"))); + + if (isinf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in sparsevec"))); +} + +/* + * Allocate and initialize a new sparse vector + */ +SparseVector *InitSparseVector(int dim, int nnz) +{ + SparseVector *result; + int size; + + size = SPARSEVEC_SIZE(nnz); + result = (SparseVector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + result->nnz = nnz; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool SparsevecIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Compare indices + */ +static int CompareIndices(const void *a, const void *b) +{ + if (((SparseInputElement *)a)->index < ((SparseInputElement *)b)->index) { + return -1; + } + + if (((SparseInputElement *)a)->index > ((SparseInputElement *)b)->index) { + return 1; + } + + return 0; +} + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_in); +Datum sparsevec_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + long dim; + char *pt = lit; + char *stringEnd; + SparseVector *result; + float *rvalues; + SparseInputElement *elements; + int maxNnz; + int nnz = 0; + + maxNnz = 1; + while (*pt != '\0') { + if (*pt == ',') { + maxNnz++; + } + + pt++; + } + + if (maxNnz > SPARSEVEC_MAX_NNZ) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d non-zero elements", SPARSEVEC_MAX_NNZ))); + + elements = (SparseInputElement *)palloc(maxNnz * sizeof(SparseInputElement)); + + pt = lit; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '{') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), + errdetail("Vector contents must start with \"{\"."))); + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt == '}') { + pt++; + } else { + for (;;) { + long index; + float value; + + if (nnz == maxNnz) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("ran out of buffer: \"%s\"", lit))); + + while (SparsevecIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Use similar logic as int2vectorin */ + index = strtol(pt, &stringEnd, 10); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Keep in int range for correct error message later */ + if (index > INT_MAX) { + index = INT_MAX; + } else if (index < INT_MIN + 1) { + index = INT_MIN + 1; + } + + pt = stringEnd; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != ':') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + errno = 0; + + /* Use strtof like float4in to avoid a double-rounding problem */ + /* Postgres sets LC_NUMERIC to C on startup */ + value = strtof(pt, &stringEnd); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Check for range error like float4in */ + if (errno == ERANGE && (value == 0 || isinf(value))) + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type sparsevec", pnstrdup(pt, stringEnd - pt)))); + + CheckElement(value); + + /* Do not store zero values */ + if (value != 0) { + /* Convert 1-based numbering (SQL) to 0-based (C) */ + elements[nnz].index = index - 1; + elements[nnz].value = value; + nnz++; + } + + pt = stringEnd; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == '}') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + } + } + } + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '/') { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), + errdetail("Unexpected end of input."))); + } + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + /* Use similar logic as int2vectorin */ + dim = strtol(pt, &stringEnd, 10); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Keep in int range for correct error message later */ + if (dim > INT_MAX) { + dim = INT_MAX; + } else if (dim < INT_MIN) { + dim = INT_MIN; + } + + pt = stringEnd; + + /* Only whitespace is allowed after the closing brace */ + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), errdetail("Junk after closing."))); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + qsort(elements, nnz, sizeof(SparseInputElement), CompareIndices); + + result = InitSparseVector(dim, nnz); + rvalues = SPARSEVEC_VALUES(result); + for (int i = 0; i < nnz; i++) { + result->indices[i] = elements[i].index; + rvalues[i] = elements[i].value; + + CheckIndex(result->indices, i, dim); + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +#if PG_VERSION_NUM >= 140000 +#define AppendInt(ptr, i) ((ptr) += pg_ltoa((i), (ptr))) +#else +#define AppendInt(ptr, i) \ + do { \ + pg_ltoa(i, ptr); \ + while (*ptr != '\0') \ + ptr++; \ + } while (0) +#endif + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_out); +Datum sparsevec_out(PG_FUNCTION_ARGS) +{ + SparseVector *sparsevec = PG_GETARG_SPARSEVEC_P(0); + float *values = SPARSEVEC_VALUES(sparsevec); + char *buf; + char *ptr; + + /* + * Need: + * + * nnz * 10 bytes for index (positive integer) + * + * nnz bytes for : + * + * nnz * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * nnz - 1 bytes for , + * + * 10 bytes for dimensions + * + * 4 bytes for {, }, /, and \0 + */ + buf = (char *)palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 13); + ptr = buf; + + AppendChar(ptr, '{'); + + for (int i = 0; i < sparsevec->nnz; i++) { + if (i > 0) + AppendChar(ptr, ','); + + /* Convert 0-based numbering (C) to 1-based (SQL) */ + AppendInt(ptr, sparsevec->indices[i] + 1); + AppendChar(ptr, ':'); + AppendFloat(ptr, values[i]); + } + + AppendChar(ptr, '}'); + AppendChar(ptr, '/'); + AppendInt(ptr, sparsevec->dim); + *ptr = '\0'; + + PG_FREE_IF_COPY(sparsevec, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_typmod_in); +Datum sparsevec_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type sparsevec must be at least 1"))); + + if (*tl > SPARSEVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type sparsevec cannot exceed %d", SPARSEVEC_MAX_DIM))); + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_recv); +Datum sparsevec_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + SparseVector *result; + int32 dim; + int32 nnz; + int32 unused; + float *values; + + dim = pq_getmsgint(buf, sizeof(int32)); + nnz = pq_getmsgint(buf, sizeof(int32)); + unused = pq_getmsgint(buf, sizeof(int32)); + + CheckDim(dim); + CheckNnz(nnz, dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + + /* Binary representation uses zero-based numbering for indices */ + for (int i = 0; i < nnz; i++) { + result->indices[i] = pq_getmsgint(buf, sizeof(int32)); + CheckIndex(result->indices, i, dim); + } + + for (int i = 0; i < nnz; i++) { + values[i] = pq_getmsgfloat4(buf); + CheckElement(values[i]); + + if (values[i] == 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("binary representation of sparsevec cannot contain zero values"))); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_send); +Datum sparsevec_send(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + float *values = SPARSEVEC_VALUES(svec); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, svec->dim, sizeof(int32)); + pq_sendint(&buf, svec->nnz, sizeof(int32)); + pq_sendint(&buf, svec->unused, sizeof(int32)); + + /* Binary representation uses zero-based numbering for indices */ + for (int i = 0; i < svec->nnz; i++) + pq_sendint(&buf, svec->indices[i], sizeof(int32)); + + for (int i = 0; i < svec->nnz; i++) + pq_sendfloat4(&buf, values[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert sparse vector to sparse vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec); +Datum sparsevec(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, svec->dim); + + PG_RETURN_POINTER(svec); +} + +/* + * Convert dense vector to sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_sparsevec); +Datum vector_to_sparsevec(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + SparseVector *result; + int dim = vec->dim; + int nnz = 0; + float *values; + int j = 0; + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + for (int i = 0; i < dim; i++) { + if (vec->x[i] != 0) { + nnz++; + } + } + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + for (int i = 0; i < dim; i++) { + if (vec->x[i] != 0) { + /* Safety check */ + if (j >= result->nnz) + elog(ERROR, "safety check failed"); + + result->indices[j] = i; + values[j] = vec->x[i]; + j++; + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_sparsevec); +Datum halfvec_to_sparsevec(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + SparseVector *result; + int dim = vec->dim; + int nnz = 0; + float *values; + int j = 0; + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + for (int i = 0; i < dim; i++) { + if (!HalfIsZero(vec->x[i])) { + nnz++; + } + } + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + for (int i = 0; i < dim; i++) { + if (!HalfIsZero(vec->x[i])) { + /* Safety check */ + if (j >= result->nnz) + elog(ERROR, "safety check failed"); + + result->indices[j] = i; + values[j] = HalfToFloat4(vec->x[i]); + j++; + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Get the L2 squared distance between sparse vectors + */ +static float SparsevecL2SquaredDistance(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + int bi = -1; + + for (int j = bpos; j < b->nnz; j++) { + bi = b->indices[j]; + + if (ai == bi) { + float diff = ax[i] - bx[j]; + + distance += diff * diff; + } else if (ai > bi) + distance += bx[j] * bx[j]; + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + + if (ai != bi) + distance += ax[i] * ax[i]; + } + + for (int j = bpos; j < b->nnz; j++) + distance += bx[j] * bx[j]; + + return distance; +} + +/* + * Get the L2 distance between sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_distance); +Datum sparsevec_l2_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)SparsevecL2SquaredDistance(a, b))); +} + +/* + * Get the L2 squared distance between sparse vectors + * This saves a sqrt calculation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_squared_distance); +Datum sparsevec_l2_squared_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)SparsevecL2SquaredDistance(a, b)); +} + +/* + * Get the inner product of two sparse vectors + */ +static float SparsevecInnerProduct(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + + for (int j = bpos; j < b->nnz; j++) { + int bi = b->indices[j]; + + /* Only update when the same index */ + if (ai == bi) + distance += ax[i] * bx[j]; + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + } + + return distance; +} + +/* + * Get the inner product of two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_inner_product); +Datum sparsevec_inner_product(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)SparsevecInnerProduct(a, b)); +} + +/* + * Get the negative inner product of two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_negative_inner_product); +Datum sparsevec_negative_inner_product(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-SparsevecInnerProduct(a, b)); +} + +/* + * Get the cosine distance between two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cosine_distance); +Datum sparsevec_cosine_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float norma = 0.0; + float normb = 0.0; + double similarity; + + CheckDims(a, b); + + similarity = SparsevecInnerProduct(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norma += ax[i] * ax[i]; + + /* Auto-vectorized */ + for (int i = 0; i < b->nnz; i++) + normb += bx[i] * bx[i]; + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + similarity /= sqrt(static_cast(norma) * static_cast(normb)); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) + PG_RETURN_FLOAT8(NAN); +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1.0; + } else if (similarity < -1) { + similarity = -1.0; + } + + PG_RETURN_FLOAT8(1.0 - similarity); +} + +/* + * Get the L1 distance between two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l1_distance); +Datum sparsevec_l1_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + CheckDims(a, b); + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + int bi = -1; + + for (int j = bpos; j < b->nnz; j++) { + bi = b->indices[j]; + + if (ai == bi) + distance += fabsf(ax[i] - bx[j]); + else if (ai > bi) + distance += fabsf(bx[j]); + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + + if (ai != bi) + distance += fabsf(ax[i]); + } + + for (int j = bpos; j < b->nnz; j++) + distance += fabsf(bx[j]); + + PG_RETURN_FLOAT8(static_cast(distance)); +} + +/* + * Get the L2 norm of a sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_norm); +Datum sparsevec_l2_norm(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + float *ax = SPARSEVEC_VALUES(a); + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norm += (double)ax[i] * (double)ax[i]; + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +/* + * Normalize a sparse vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_normalize); +Datum sparsevec_l2_normalize(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + float *ax = SPARSEVEC_VALUES(a); + double norm = 0; + SparseVector *result; + float *rx; + + result = InitSparseVector(a->dim, a->nnz); + rx = SPARSEVEC_VALUES(result); + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norm += (double)ax[i] * (double)ax[i]; + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + int zeros = 0; + + for (int i = 0; i < a->nnz; i++) { + result->indices[i] = a->indices[i]; + rx[i] = ax[i] / norm; + + if (isinf(rx[i])) + float_overflow_error(); + + if (rx[i] == 0) + zeros++; + } + + /* Allocate a new vector in the unlikely event there are zeros */ + if (zeros > 0) { + SparseVector *newResult = InitSparseVector(result->dim, result->nnz - zeros); + float *nx = SPARSEVEC_VALUES(newResult); + int j = 0; + + for (int i = 0; i < result->nnz; i++) { + if (rx[i] == 0) + continue; + + /* Safety check */ + if (j >= newResult->nnz) + elog(ERROR, "safety check failed"); + + newResult->indices[j] = result->indices[i]; + nx[j] = rx[i]; + j++; + } + + pfree(result); + + PG_RETURN_POINTER(newResult); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare sparse vectors + */ +static int sparsevec_cmp_internal(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + int nnz = Min(a->nnz, b->nnz); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < nnz; i++) { + if (a->indices[i] < b->indices[i]) { + return ax[i] < 0 ? -1 : 1; + } + + if (a->indices[i] > b->indices[i]) { + return bx[i] < 0 ? 1 : -1; + } + + if (ax[i] < bx[i]) { + return -1; + } + + if (ax[i] > bx[i]) { + return 1; + } + } + + if (a->nnz < b->nnz && b->indices[nnz] < a->dim) { + return bx[nnz] < 0 ? 1 : -1; + } + + if (a->nnz > b->nnz && a->indices[nnz] < b->dim) { + return ax[nnz] < 0 ? -1 : 1; + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_lt); +Datum sparsevec_lt(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_le); +Datum sparsevec_le(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_eq); +Datum sparsevec_eq(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_ne); +Datum sparsevec_ne(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_ge); +Datum sparsevec_ge(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_gt); +Datum sparsevec_gt(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) > 0); +} + +/* + * Compare sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cmp); +Datum sparsevec_cmp(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_INT32(sparsevec_cmp_internal(a, b)); +} diff --git a/src/common/backend/utils/adt/vector.cpp b/src/common/backend/utils/adt/vector.cpp new file mode 100644 index 0000000000..0daff7aee6 --- /dev/null +++ b/src/common/backend/utils/adt/vector.cpp @@ -0,0 +1,1379 @@ +#include "postgres.h" + +#include + +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/ivfflat.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "port.h" /* for strtof() */ +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "commands/extension.h" +#include "knl/knl_session.h" +#include "access/datavec/vector.h" + +#if PG_VERSION_NUM >= 160000 +#include "varatt.h" +#endif + +#if PG_VERSION_NUM < 130000 +#define TYPALIGN_DOUBLE 'd' +#define TYPALIGN_INT 'i' +#endif + +#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) +#define CreateStateDatums(dim) palloc(sizeof(Datum) * ((dim) + 1)) + +#if defined(USE_TARGET_CLONES) && !defined(__FMA__) +#define VECTOR_TARGET_CLONES __attribute__((target_clones("default", "fma"))) +#else +#define VECTOR_TARGET_CLONES +#endif + +#if PG_VERSION_NUM < 150000 +#define MarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x) +#endif + +uint32 datavec_index; + +void set_extension_index(uint32 index) +{ + datavec_index = index; +} + +datavec_session_context *get_session_context() +{ + if (u_sess->attr.attr_common.extension_session_vars_array[datavec_index] == NULL) { + init_session_vars(); + } + return (datavec_session_context *)u_sess->attr.attr_common.extension_session_vars_array[datavec_index]; +} + +void init_session_vars(void) +{ + RepallocSessionVarsArrayIfNecessary(); + datavec_session_context *ctx = + (datavec_session_context *)MemoryContextAllocZero(u_sess->self_mem_cxt, sizeof(datavec_session_context)); + u_sess->attr.attr_common.extension_session_vars_array[datavec_index] = ctx; + + ctx->hnsw_ef_search = 0; + ctx->ivfflat_probes = 0; + + DefineCustomIntVariable("hnsw.ef_search", "Sets the size of the dynamic candidate list for search", + "Valid range is 1..1000.", &(get_session_context()->hnsw_ef_search), HNSW_DEFAULT_EF_SEARCH, + HNSW_MIN_EF_SEARCH, HNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL); + + MarkGUCPrefixReserved("hnsw"); + + DefineCustomIntVariable("ivfflat.probes", "Sets the number of probes", "Valid range is 1..lists.", + &(get_session_context()->ivfflat_probes), IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS, + IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL); + + MarkGUCPrefixReserved("ivfflat"); +} + +/* + * Ensure same dimensions + */ +static inline void CheckDims(Vector *a, Vector *b) +{ + if (a->dim != b->dim) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different vector dimensions %d and %d", a->dim, b->dim))); +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + + if (dim > VECTOR_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); +} + +/* + * Ensure finite element + */ +static inline void CheckElement(float value) +{ + if (isnan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in vector"))); + + if (isinf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in vector"))); +} + +/* + * Allocate and initialize a new vector + */ +Vector *InitVector(int dim) +{ + Vector *result; + int size; + + size = VECTOR_SIZE(dim); + result = (Vector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool VectorIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Check state array + */ +static float8 *CheckStateArray(ArrayType *statearray, const char *caller) +{ + if (ARR_NDIM(statearray) != 1 || ARR_DIMS(statearray)[0] < 1 || ARR_HASNULL(statearray)) + elog(ERROR, "%s: expected state array", caller); + return (float8 *)ARR_DATA_PTR(statearray); +} + +#if PG_VERSION_NUM < 120003 +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +static pg_noinline void float_underflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: underflow"))); +} +#endif + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_in); +Datum vector_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + float x[VECTOR_MAX_DIM]; + int dim = 0; + char *pt = lit; + Vector *result; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt != '[') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit), + errdetail("Vector contents must start with \"[\"."))); + + pt++; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt == ']') { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + for (;;) { + float val; + char *stringEnd; + + if (dim == VECTOR_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); + + while (VectorIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + + errno = 0; + + /* Use strtof like float4in to avoid a double-rounding problem */ + /* Postgres sets LC_NUMERIC to C on startup */ + val = strtof(pt, &stringEnd); + + if (stringEnd == pt) { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + } + + /* Check for range error like float4in */ + if (errno == ERANGE && isinf(val)) + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type vector", pnstrdup(pt, stringEnd - pt)))); + + CheckElement(val); + x[dim++] = val; + + pt = stringEnd; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == ']') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + } + } + + /* Only whitespace is allowed after the closing brace */ + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit), + errdetail("Junk after closing right brace."))); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = x[i]; + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_out); +Datum vector_out(PG_FUNCTION_ARGS) +{ + Vector *vector = PG_GETARG_VECTOR_P(0); + int dim = vector->dim; + char *buf; + char *ptr; + + /* + * Need: + * + * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * dim - 1 bytes for separator + * + * 3 bytes for [, ], and \0 + */ + buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2); + ptr = buf; + + AppendChar(ptr, '['); + + for (int i = 0; i < dim; i++) { + if (i > 0) { + AppendChar(ptr, ','); + } + + AppendFloat(ptr, vector->x[i]); + } + + AppendChar(ptr, ']'); + *ptr = '\0'; + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Print vector - useful for debugging + */ +void PrintVector(char *msg, Vector *vector) +{ + char *out = DatumGetPointer(DirectFunctionCall1(vector_out, PointerGetDatum(vector))); + + elog(INFO, "%s = %s", msg, out); + pfree(out); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_typmod_in); +Datum vector_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + } + + if (*tl < 1) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type vector must be at least 1"))); + } + + if (*tl > VECTOR_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type vector cannot exceed %d", VECTOR_MAX_DIM))); + } + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_recv); +Datum vector_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + Vector *result; + int16 dim; + int16 unused; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + } + + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = pq_getmsgfloat4(buf); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_send); +Datum vector_send(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, vec->dim, sizeof(int16)); + pq_sendint(&buf, vec->unused, sizeof(int16)); + for (int i = 0; i < vec->dim; i++) + pq_sendfloat4(&buf, vec->x[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert vector to vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector); +Datum vector(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, vec->dim); + + PG_RETURN_POINTER(vec); +} + +/* + * Convert array to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_vector); +Datum array_to_vector(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + int nelemsp; + + if (ARR_NDIM(array) > 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("array must be 1-D"))); + } + + if (ARR_HASNULL(array) && array_contains_nulls(array)) { + ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("array must not contain nulls"))); + } + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + + result = InitVector(nelemsp); + + if (ARR_ELEMTYPE(array) == INT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetInt32(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == FLOAT8OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat8(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == FLOAT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == NUMERICOID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i])); + } else { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("unsupported array type"))); + } + + /* + * Free allocation from deconstruct_array. Do not free individual elements + * when pass-by-reference since they point to original array. + */ + pfree(elemsp); + + /* Check elements */ + for (int i = 0; i < result->dim; i++) { + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to float4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_float4); +Datum vector_to_float4(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = Float4GetDatum(vec->x[i]); + } + + /* Use TYPALIGN_INT for float4 */ + result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_vector); +Datum halfvec_to_vector(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + + CheckDim(vec->dim); + CheckExpectedDim(typmod, vec->dim); + + result = InitVector(vec->dim); + + for (int i = 0; i < vec->dim; i++) { + result->x[i] = HalfToFloat4(vec->x[i]); + } + + PG_RETURN_POINTER(result); +} + +VECTOR_TARGET_CLONES static float VectorL2SquaredDistance(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float diff = ax[i] - bx[i]; + + distance += diff * diff; + } + + return distance; +} + +/* + * Get the L2 distance between vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_distance); +Datum l2_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)VectorL2SquaredDistance(a->dim, a->x, b->x))); +} + +/* + * Get the L2 squared distance between vectors + * This saves a sqrt calculation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_l2_squared_distance); +Datum vector_l2_squared_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorL2SquaredDistance(a->dim, a->x, b->x)); +} + +VECTOR_TARGET_CLONES static float VectorInnerProduct(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + distance += ax[i] * bx[i]; + } + + return distance; +} + +/* + * Get the inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(inner_product); +Datum inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the negative inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_negative_inner_product); +Datum vector_negative_inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-VectorInnerProduct(a->dim, a->x, b->x)); +} + +VECTOR_TARGET_CLONES static double VectorCosineSimilarity(int dim, float *ax, float *bx) +{ + float similarity = 0.0; + float norma = 0.0; + float normb = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + similarity += ax[i] * bx[i]; + norma += ax[i] * ax[i]; + normb += bx[i] * bx[i]; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return static_cast(similarity) / sqrt(static_cast(norma) * static_cast(normb)); +} + +/* + * Get the cosine distance between two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(cosine_distance); +Datum cosine_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + double similarity; + + CheckDims(a, b); + + similarity = VectorCosineSimilarity(a->dim, a->x, b->x); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) { + PG_RETURN_FLOAT8(NAN); + } +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1.0; + } else if (similarity < -1) { + similarity = -1.0; + } + + PG_RETURN_FLOAT8(1.0 - similarity); +} + +/* + * Get the distance for spherical k-means + * Currently uses angular distance since needs to satisfy triangle inequality + * Assumes inputs are unit vectors (skips norm) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_spherical_distance); +Datum vector_spherical_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + double distance; + + CheckDims(a, b); + + distance = (double)VectorInnerProduct(a->dim, a->x, b->x); + /* Prevent NaN with acos with loss of precision */ + if (distance > 1) { + distance = 1; + } else if (distance < -1) { + distance = -1; + } + + PG_RETURN_FLOAT8(acos(distance) / M_PI); +} + +/* Does not require FMA, but keep logic simple */ +VECTOR_TARGET_CLONES static float VectorL1Distance(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + distance += fabsf(ax[i] - bx[i]); + } + + return distance; +} + +/* + * Get the L1 distance between two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l1_distance); +Datum l1_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorL1Distance(a->dim, a->x, b->x)); +} + +/* + * Get the dimensions of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_dims); +Datum vector_dims(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + + PG_RETURN_INT32(a->dim); +} + +/* + * Get the L2 norm of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_norm); +Datum vector_norm(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + norm += (double)ax[i] * (double)ax[i]; + } + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +/* + * Normalize a vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_normalize); +Datum l2_normalize(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0; + Vector *result; + float *rx; + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + norm += (double)ax[i] * (double)ax[i]; + } + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + for (int i = 0; i < a->dim; i++) { + rx[i] = ax[i] / norm; + } + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Add vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_add); +Datum vector_add(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] + bx[i]; + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Subtract vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_sub); +Datum vector_sub(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] - bx[i]; + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Multiply vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_mul); +Datum vector_mul(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] * bx[i]; + } + + /* Check for overflow and underflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + + if (rx[i] == 0 && !(ax[i] == 0 || bx[i] == 0)) { + float_underflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Concatenate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_concat); +Datum vector_concat(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + Vector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitVector(dim); + + for (int i = 0; i < a->dim; i++) { + result->x[i] = a->x[i]; + } + + for (int i = 0; i < b->dim; i++) { + result->x[i + a->dim] = b->x[i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Quantize a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(binary_quantize); +Datum binary_quantize(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + VarBit *result = InitBitVector(a->dim); + unsigned char *rx = VARBITS(result); + + for (int i = 0; i < a->dim; i++) { + rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8)); + } + + PG_RETURN_VARBIT_P(result); +} + +/* + * Get a subvector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(subvector); +Datum subvector(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + int32 start = PG_GETARG_INT32(1); + int32 count = PG_GETARG_INT32(2); + int32 end; + float *ax = a->x; + Vector *result; + int dim; + + if (count < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + /* + * Check if (start + count > a->dim), avoiding integer overflow. a->dim + * and count are both positive, so a->dim - count won't overflow. + */ + if (start > a->dim - count) { + end = a->dim + 1; + } else { + end = start + count; + } + + /* Indexing starts at 1, like substring */ + if (start < 1) { + start = 1; + } else if (start > a->dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + dim = end - start; + CheckDim(dim); + result = InitVector(dim); + + for (int i = 0; i < dim; i++) { + result->x[i] = ax[start - 1 + i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare vectors + */ +int vector_cmp_internal(Vector *a, Vector *b) +{ + int dim = Min(a->dim, b->dim); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < dim; i++) { + if (a->x[i] < b->x[i]) { + return -1; + } + + if (a->x[i] > b->x[i]) { + return 1; + } + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_lt); +Datum vector_lt(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_le); +Datum vector_le(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_eq); +Datum vector_eq(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ne); +Datum vector_ne(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ge); +Datum vector_ge(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_gt); +Datum vector_gt(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) > 0); +} + +/* + * Compare vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_cmp); +Datum vector_cmp(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_INT32(vector_cmp_internal(a, b)); +} + +/* + * Accumulate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_accum); +Datum vector_accum(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + Vector *newval = PG_GETARG_VECTOR_P(1); + float8 *statevalues; + int16 dim; + bool newarr; + float8 n; + Datum *statedatums; + float *x = newval->x; + ArrayType *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_accum"); + dim = STATE_DIMS(statearray); + newarr = dim == 0; + + if (newarr) { + dim = newval->dim; + } else { + CheckExpectedDim(dim, newval->dim); + } + + n = statevalues[0] + 1.0; + + statedatums = (Datum *)CreateStateDatums(dim); + statedatums[0] = Float8GetDatum(n); + + if (newarr) { + for (int i = 0; i < dim; i++) { + statedatums[i + 1] = Float8GetDatum((double)x[i]); + } + } else { + for (int i = 0; i < dim; i++) { + double v = statevalues[i + 1] + x[i]; + + /* Check for overflow */ + if (isinf(v)) { + float_overflow_error(); + } + + statedatums[i + 1] = Float8GetDatum(v); + } + } + + /* Use float8 array like float4_accum */ + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Combine vectors or half vectors (also used for halfvec_combine) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_combine); +Datum vector_combine(PG_FUNCTION_ARGS) +{ + /* Must also update parameters of halfvec_combine if modifying */ + ArrayType *statearray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *statearray2 = PG_GETARG_ARRAYTYPE_P(1); + float8 *statevalues1; + float8 *statevalues2; + float8 n; + float8 n1; + float8 n2; + int16 dim; + Datum *statedatums; + ArrayType *result; + + /* Check arrays before using */ + statevalues1 = CheckStateArray(statearray1, "vector_combine"); + statevalues2 = CheckStateArray(statearray2, "vector_combine"); + + n1 = statevalues1[0]; + n2 = statevalues2[0]; + + if (n1 == 0.0) { + n = n2; + dim = STATE_DIMS(statearray2); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues2[i]); + } else if (n2 == 0.0) { + n = n1; + dim = STATE_DIMS(statearray1); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues1[i]); + } else { + n = n1 + n2; + dim = STATE_DIMS(statearray1); + CheckExpectedDim(dim, STATE_DIMS(statearray2)); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) { + double v = statevalues1[i] + statevalues2[i]; + + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i] = Float8GetDatum(v); + } + } + + statedatums[0] = Float8GetDatum(n); + + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Average vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_avg); +Datum vector_avg(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + float8 *statevalues; + float8 n; + uint16 dim; + Vector *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_avg"); + n = statevalues[0]; + + /* SQL defines AVG of no values to be NULL */ + if (n == 0.0) { + PG_RETURN_NULL(); + } + + /* Create vector */ + dim = STATE_DIMS(statearray); + CheckDim(dim); + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = statevalues[i + 1] / n; + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert sparse vector to dense vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_vector); +Datum sparsevec_to_vector(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + int dim = svec->dim; + float *values = SPARSEVEC_VALUES(svec); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (int i = 0; i < svec->nnz; i++) { + result->x[svec->indices[i]] = values[i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * WAL-log a range of blocks in a relation. + * + * An image of all pages with block numbers 'startblk' <= X < 'endblk' is + * written to the WAL. If the range is large, this is done in multiple WAL + * records. + * + * If all page follows the standard page layout, with a PageHeader and unused + * space between pd_lower and pd_upper, set 'page_std' to true. That allows + * the unused space to be left out from the WAL records, making them smaller. + * + * NOTE: This function acquires exclusive-locks on the pages. Typically, this + * is used on a newly-built relation, and the caller is holding a + * AccessExclusiveLock on it, so no other backend can be accessing it at the + * same time. If that's not the case, you must ensure that this does not + * cause a deadlock through some other means. + */ +void LogNewpageRange(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std) +{ + int flags; + BlockNumber blkno; + + flags = REGBUF_FORCE_IMAGE; + if (page_std) { + flags |= REGBUF_STANDARD; + } + + /* + * Iterate over all the pages in the range. They are collected into + * batches of XLR_MAX_BLOCK_ID pages, and a single WAL-record is written + * for each batch. + */ + XLogEnsureRecordSpace(XLR_MAX_BLOCK_ID - 1, 0); + + blkno = startblk; + while (blkno < endblk) { + Buffer bufpack[XLR_MAX_BLOCK_ID]; + XLogRecPtr recptr; + int nbufs; + int i; + + CHECK_FOR_INTERRUPTS(); + + /* Collect a batch of blocks. */ + nbufs = 0; + while (nbufs < XLR_MAX_BLOCK_ID && blkno < endblk) { + Buffer buf = ReadBufferExtended(rel, forknum, blkno, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + /* + * Completely empty pages are not WAL-logged. Writing a WAL record + * would change the LSN, and we don't want that. We want the page + * to stay empty. + */ + if (!PageIsNew(BufferGetPage(buf))) { + bufpack[nbufs++] = buf; + } else { + UnlockReleaseBuffer(buf); + } + blkno++; + } + + /* Nothing more to do if all remaining blocks were empty. */ + if (nbufs == 0) { + break; + } + + /* Write WAL record for this batch. */ + XLogBeginInsert(); + + START_CRIT_SECTION(); + for (i = 0; i < nbufs; i++) { + MarkBufferDirty(bufpack[i]); + XLogRegisterBuffer(i, bufpack[i], flags); + } + + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); + + for (i = 0; i < nbufs; i++) { + PageSetLSN(BufferGetPage(bufpack[i]), recptr); + UnlockReleaseBuffer(bufpack[i]); + } + END_CRIT_SECTION(); + } +} + +int PlanCreateIndexWorkers(Relation heapRelation, IndexInfo *indexInfo) +{ + int parallelWorkers = RelationGetParallelWorkers(heapRelation, 0); + int maxHashbucketIndexWorker = 32; + + if (parallelWorkers != 0) { + parallelWorkers = Min(maxHashbucketIndexWorker, parallelWorkers); + } + + if (indexInfo->ii_Concurrent && indexInfo->ii_ParallelWorkers > 0) { + ereport(NOTICE, (errmsg("switch off parallel mode when concurrently flag is set"))); + parallelWorkers = 0; + } + + if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP && indexInfo->ii_ParallelWorkers > 0) { + ereport(NOTICE, (errmsg("switch off parallel mode for global temp table"))); + parallelWorkers = 0; + } + + /* disable parallel building index for system table */ + if (IsCatalogRelation(heapRelation)) { + parallelWorkers = 0; + } + return parallelWorkers; +} diff --git a/src/common/backend/utils/misc/guc/guc_sql.cpp b/src/common/backend/utils/misc/guc/guc_sql.cpp index 48f46f2f6a..6c5fdffe9c 100755 --- a/src/common/backend/utils/misc/guc/guc_sql.cpp +++ b/src/common/backend/utils/misc/guc/guc_sql.cpp @@ -148,6 +148,8 @@ #include "utils/xml.h" #include "workload/cpwlm.h" #include "workload/workload.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/ivfflat.h" #include "utils/guc_sql.h" #define DEFAULT_USTATS_TRACKER_NAPTIME 20 @@ -2564,6 +2566,32 @@ static void InitSqlConfigureNamesInt() NULL, NULL, NULL}, + {{"hnsw.ef_search", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Sets the size of the dynamic candidate list for search"), + gettext_noop("Valid range is 1..1000.")}, + &u_sess->datavec_ctx.hnsw_ef_search, + HNSW_DEFAULT_EF_SEARCH, + HNSW_MIN_EF_SEARCH, + HNSW_MAX_EF_SEARCH, + NULL, + NULL, + NULL}, + {{"ivfflat.probes", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Sets the number of probes"), + gettext_noop("Valid range is 1..lists."),}, + &u_sess->datavec_ctx.ivfflat_probes, + IVFFLAT_DEFAULT_PROBES, + IVFFLAT_MIN_LISTS, + IVFFLAT_MAX_LISTS, + NULL, + NULL, + NULL}, #endif /* End-of-list marker */ {{NULL, diff --git a/src/gausskernel/CMakeLists.txt b/src/gausskernel/CMakeLists.txt index 3774f8e9b7..62208ea794 100755 --- a/src/gausskernel/CMakeLists.txt +++ b/src/gausskernel/CMakeLists.txt @@ -213,6 +213,7 @@ list(APPEND gaussdb_objects $ $ $ + $ $ $ $ diff --git a/src/gausskernel/process/threadpool/knl_session.cpp b/src/gausskernel/process/threadpool/knl_session.cpp index 7891194c08..da08250ed4 100755 --- a/src/gausskernel/process/threadpool/knl_session.cpp +++ b/src/gausskernel/process/threadpool/knl_session.cpp @@ -1476,6 +1476,12 @@ static void knl_u_libsw_init(knl_u_libsw_context* libsw_cxt) libsw_cxt->redirect_manager = New(CurrentMemoryContext) RedirectManager(); } +static void knl_u_datavec_init(knl_u_datavec_context* datavec_cxt) +{ + datavec_cxt->hnsw_ef_search = 0; + datavec_cxt->ivfflat_probes = 0; +} + void knl_session_init(knl_session_context* sess_cxt) { Assert (0 != strncmp(CurrentMemoryContext->name, "ErrorContext", sizeof("ErrorContext"))); diff --git a/src/gausskernel/storage/access/CMakeLists.txt b/src/gausskernel/storage/access/CMakeLists.txt index 6e9ab44f95..6f3f78929d 100755 --- a/src/gausskernel/storage/access/CMakeLists.txt +++ b/src/gausskernel/storage/access/CMakeLists.txt @@ -22,7 +22,8 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/table ${CMAKE_CURRENT_SOURCE_DIR}/transam ${CMAKE_CURRENT_SOURCE_DIR}/ubtree - ${CMAKE_CURRENT_SOURCE_DIR}/ustore + ${CMAKE_CURRENT_SOURCE_DIR}/ustore + ${CMAKE_CURRENT_SOURCE_DIR}/datavec ) if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") @@ -50,3 +51,4 @@ add_subdirectory(table) add_subdirectory(transam) add_subdirectory(ubtree) add_subdirectory(ustore) +add_subdirectory(datavec) diff --git a/src/gausskernel/storage/access/Makefile b/src/gausskernel/storage/access/Makefile index 52c39249cb..fe5b2c81d4 100644 --- a/src/gausskernel/storage/access/Makefile +++ b/src/gausskernel/storage/access/Makefile @@ -2,7 +2,7 @@ subdir = src/gausskernel/storage/access top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = cbtree common heap index nbtree ubtree psort rmgrdesc transam obs hash spgist gist gin hbstore redo table ustore +SUBDIRS = cbtree common heap index nbtree ubtree psort rmgrdesc transam obs hash spgist gist gin hbstore redo table ustore datavec ifeq ($(enable_lite_mode), no) SUBDIRS += archive endif diff --git a/src/gausskernel/storage/access/datavec/CMakeLists.txt b/src/gausskernel/storage/access/datavec/CMakeLists.txt new file mode 100644 index 0000000000..1b2a4b6c90 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/CMakeLists.txt @@ -0,0 +1,16 @@ +#This is the main CMAKE for build all components. +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_datavec_SRC) + +set(TGT_datavec_INC + ${PROJECT_SRC_DIR}/include + ${LZ4_INCLUDE_PATH} + ${LIBCGROUP_INCLUDE_PATH} + ${EVENT_INCLUDE_PATH} + ${ZLIB_INCLUDE_PATH} +) + +set(datavec_DEF_OPTIONS ${MACRO_OPTIONS}) +set(datavec_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(datavec_LINK_OPTIONS ${BIN_LINK_OPTIONS}) +add_static_objtarget(gausskernel_storage_access_datavec TGT_datavec_SRC TGT_datavec_INC "${datavec_DEF_OPTIONS}" "${datavec_COMPILE_OPTIONS}" "${datavec_LINK_OPTIONS}") + diff --git a/src/gausskernel/storage/access/datavec/Makefile b/src/gausskernel/storage/access/datavec/Makefile new file mode 100644 index 0000000000..c6c1736f66 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/Makefile @@ -0,0 +1,16 @@ +subdir = src/gausskernel/storage/access/datavec +top_builddir = ../../../../.. +include $(top_builddir)/src/Makefile.global + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif + +OBJS = bitutils.o hnsw.o hnswbuild.o hnswdelete.o hnswinsert.o hnswscan.o hnswutils.o hnswvacuum.o \ + ivfbuild.o ivfflat.o ivfinsert.o ivfkmeans.o ivfscan.o ivfutils.o ivfvacuum.o vecindex.o + +include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/datavec/bitutils.cpp b/src/gausskernel/storage/access/datavec/bitutils.cpp new file mode 100644 index 0000000000..dc245477de --- /dev/null +++ b/src/gausskernel/storage/access/datavec/bitutils.cpp @@ -0,0 +1,215 @@ +#include "postgres.h" + +#include "access/datavec/bitvec.h" +#include "access/datavec/halfvec.h" /* for USE_DISPATCH and USE_TARGET_CLONES */ +#include "port/pg_bitutils.h" + +#if defined(USE_DISPATCH) +#define BIT_DISPATCH +#endif + +#ifdef BIT_DISPATCH +#include + +#if defined(USE__GET_CPUID) +#include +#else +#include +#endif + +#define TARGET_AVX512_POPCOUNT +#endif + +/* Disable for LLVM due to crash with bitcode generation */ +#if defined(USE_TARGET_CLONES) && !defined(__POPCNT__) && !defined(__llvm__) +#define BIT_TARGET_CLONES __attribute__((target_clones("default", "popcnt"))) +#else +#define BIT_TARGET_CLONES +#endif + +/* Use built-ins when possible for inlining */ +#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_INT_64) +#define popcount64(x) __builtin_popcountl(x) +#elif defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_LONG_INT_64) +#define popcount64(x) __builtin_popcountll(x) +#elif !defined(_MSC_VER) +/* Fails to resolve with MSVC */ +#define popcount64(x) pg_popcount64(x) +#endif + +BIT_TARGET_CLONES static uint64 BitHammingDistanceDefault(uint32 bytes, unsigned char *ax, unsigned char *bx, + uint64 distance) +{ +#ifdef popcount64 + errno_t rc = EOK; + for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64)) { + uint64 axs; + uint64 bxs; + + /* Ensure aligned */ + rc = memcpy_s(&axs, sizeof(uint64), ax, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(&bxs, sizeof(uint64), bx, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + + distance += popcount64(axs ^ bxs); + + ax += sizeof(uint64); + bx += sizeof(uint64); + } +#endif + + for (uint32 i = 0; i < bytes; i++) + distance += pg_number_of_ones[ax[i] ^ bx[i]]; + + return distance; +} + +#ifdef BIT_DISPATCH +TARGET_AVX512_POPCOUNT static uint64 BitHammingDistanceAvx512Popcount(uint32 bytes, unsigned char *ax, + unsigned char *bx, uint64 distance) +{ + __m512i dist = _mm512_setzero_si512(); + + for (; bytes >= sizeof(__m512i); bytes -= sizeof(__m512i)) { + __m512i axs = _mm512_loadu_si512((const __m512i *)ax); + __m512i bxs = _mm512_loadu_si512((const __m512i *)bx); + + dist = _mm512_add_epi64(dist, _mm512_popcnt_epi64(_mm512_xor_si512(axs, bxs))); + + ax += sizeof(__m512i); + bx += sizeof(__m512i); + } + + distance += _mm512_reduce_add_epi64(dist); + + return BitHammingDistanceDefault(bytes, ax, bx, distance); +} +#endif + +BIT_TARGET_CLONES static double BitJaccardDistanceDefault(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, + uint64 aa, uint64 bb) +{ +#ifdef popcount64 + errno_t rc = EOK; + for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64)) { + uint64 axs; + uint64 bxs; + + /* Ensure aligned */ + rc = memcpy_s(&axs, sizeof(uint64), ax, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(&bxs, sizeof(uint64), bx, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + + ab += popcount64(axs & bxs); + aa += popcount64(axs); + bb += popcount64(bxs); + + ax += sizeof(uint64); + bx += sizeof(uint64); + } +#endif + + for (uint32 i = 0; i < bytes; i++) { + ab += pg_number_of_ones[ax[i] & bx[i]]; + aa += pg_number_of_ones[ax[i]]; + bb += pg_number_of_ones[bx[i]]; + } + + if (ab == 0) { + return 1; + } else { + return 1 - (ab / ((double)(aa + bb - ab))); + } +} + +#ifdef BIT_DISPATCH +TARGET_AVX512_POPCOUNT static double BitJaccardDistanceAvx512Popcount(uint32 bytes, unsigned char *ax, + unsigned char *bx, uint64 ab, uint64 aa, + uint64 bb) +{ + __m512i abx = _mm512_setzero_si512(); + __m512i aax = _mm512_setzero_si512(); + __m512i bbx = _mm512_setzero_si512(); + + for (; bytes >= sizeof(__m512i); bytes -= sizeof(__m512i)) { + __m512i axs = _mm512_loadu_si512((const __m512i *)ax); + __m512i bxs = _mm512_loadu_si512((const __m512i *)bx); + + abx = _mm512_add_epi64(abx, _mm512_popcnt_epi64(_mm512_and_si512(axs, bxs))); + aax = _mm512_add_epi64(aax, _mm512_popcnt_epi64(axs)); + bbx = _mm512_add_epi64(bbx, _mm512_popcnt_epi64(bxs)); + + ax += sizeof(__m512i); + bx += sizeof(__m512i); + } + + ab += _mm512_reduce_add_epi64(abx); + aa += _mm512_reduce_add_epi64(aax); + bb += _mm512_reduce_add_epi64(bbx); + + return BitJaccardDistanceDefault(bytes, ax, bx, ab, aa, bb); +} +#endif + +#ifdef BIT_DISPATCH +#define CPU_FEATURE_OSXSAVE (1 << 27) /* F1 ECX */ +#define CPU_FEATURE_AVX512F (1 << 16) /* F7,0 EBX */ +#define CPU_FEATURE_AVX512VPOPCNTDQ (1 << 14) /* F7,0 ECX */ + +#ifdef _MSC_VER +#define TARGET_XSAVE +#else +#define TARGET_XSAVE __attribute__((target("xsave"))) +#endif + +TARGET_XSAVE static bool SupportsAvx512Popcount() +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(USE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuid(exx, 1); +#endif + + /* Check OS supports XSAVE */ + if ((exx[2] & CPU_FEATURE_OSXSAVE) != CPU_FEATURE_OSXSAVE) + return false; + + /* Check XMM, YMM, and ZMM registers are enabled */ + if ((_xgetbv(0) & 0xe6) != 0xe6) + return false; + +#if defined(USE__GET_CPUID) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuidex(exx, 7, 0); +#endif + + /* Check AVX512F */ + if ((exx[1] & CPU_FEATURE_AVX512F) != CPU_FEATURE_AVX512F) + return false; + + /* Check AVX512VPOPCNTDQ */ + return (exx[2] & CPU_FEATURE_AVX512VPOPCNTDQ) == CPU_FEATURE_AVX512VPOPCNTDQ; +} +#endif + +void BitvecInit(void) +{ + /* + * Could skip pointer when single function, but no difference in + * performance + */ + BitHammingDistance = BitHammingDistanceDefault; + BitJaccardDistance = BitJaccardDistanceDefault; + +#ifdef BIT_DISPATCH + if (SupportsAvx512Popcount()) { + BitHammingDistance = BitHammingDistanceAvx512Popcount; + BitJaccardDistance = BitJaccardDistanceAvx512Popcount; + } +#endif +} diff --git a/src/gausskernel/storage/access/datavec/hnsw.cpp b/src/gausskernel/storage/access/datavec/hnsw.cpp new file mode 100644 index 0000000000..a2ad61e056 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnsw.cpp @@ -0,0 +1,337 @@ +#include "postgres.h" + +#include +#include + +#include "access/amapi.h" +#include "access/reloptions.h" +#include "commands/vacuum.h" +#include "access/datavec/hnsw.h" +#include "miscadmin.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" + +int hnsw_lock_tranche_id; +static relopt_kind hnsw_relopt_kind; +static THR_LOCAL bool HnswNeedInitialization = true; + +/* + * Initialize index options and variables + */ +void HnswInit(void) +{ + hnsw_relopt_kind = RELOPT_KIND_DATAVEC; + add_int_reloption(hnsw_relopt_kind, "m", "Max number of connections", HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M); + add_int_reloption(hnsw_relopt_kind, "ef_construction", "Size of the dynamic candidate list for construction", + HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION); + add_int_reloption(hnsw_relopt_kind, "pq_m", "Number of PQ subquantizer", HNSW_DEFAULT_PQ_M, HNSW_MIN_PQ_M, + HNSW_MAX_PQ_M); + add_int_reloption(hnsw_relopt_kind, "pq_ksub", "Number of centroids for each PQ subquantizer", HNSW_DEFAULT_PQ_KSUB, + HNSW_MIN_PQ_KSUB, HNSW_MAX_PQ_KSUB); + add_bool_reloption(hnsw_relopt_kind, "enable_pq", "Whether to enable PQ", HNSW_DEFAULT_ENABLE_PQ); +} + +/* + * Estimate the cost of an index scan + */ +static void hnswcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation) +{ + GenericCosts costs; + int m; + int entryLevel; + Relation index; + + /* Never use index without order */ + if (path->indexorderbys == NULL) { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + HnswGetMetaPageInfo(index, &m, NULL); + index_close(index, NoLock); + + /* Approximate entry level */ + entryLevel = (int)-log(1.0 / path->indexinfo->tuples) * HnswGetMl(m); + + /* TODO Improve estimate of visited tuples (currently underestimates) */ + /* Account for number of tuples (or entry level), m, and ef_search */ + costs.numIndexTuples = (entryLevel + 2) * m; + + genericcostestimate(root, path, loop_count, costs.numIndexTuples, &costs.indexStartupCost, &costs.indexTotalCost, + &costs.indexSelectivity, &costs.indexCorrelation); + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; +} + +/* + * Parse and validate the reloptions + */ +static bytea *hnswoptions_internal(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"m", RELOPT_TYPE_INT, offsetof(HnswOptions, m)}, + {"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)}, + {"enable_pq", RELOPT_TYPE_BOOL, offsetof(HnswOptions, enablePQ)}, + {"pq_m", RELOPT_TYPE_INT, offsetof(HnswOptions, pqM)}, + {"pq_ksub", RELOPT_TYPE_INT, offsetof(HnswOptions, pqKsub)}, + {"parallel_workers", RELOPT_TYPE_INT, offsetof(StdRdOptions, parallel_workers)}, + {"storage_type", RELOPT_TYPE_STRING, offsetof(HnswOptions, storage_type)}}; + +#if PG_VERSION_NUM >= 130000 + return (bytea *)build_reloptions(reloptions, validate, hnsw_relopt_kind, sizeof(HnswOptions), tab, lengthof(tab)); +#else + relopt_value *options; + int numoptions; + HnswOptions *rdopts; + + if (HnswNeedInitialization) { + HnswInit(); + HnswNeedInitialization = false; + } + options = parseRelOptions(reloptions, validate, hnsw_relopt_kind, &numoptions); + rdopts = (HnswOptions *)allocateReloptStruct(sizeof(HnswOptions), options, numoptions); + fillRelOptions((void *)rdopts, sizeof(HnswOptions), options, numoptions, validate, tab, lengthof(tab)); + + return (bytea *)rdopts; +#endif +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool hnswvalidate_internal(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswhandler); +Datum hnswhandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = 3; +#if PG_VERSION_NUM >= 130000 + amroutine->amoptsprocnum = 0; +#endif + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; +#if PG_VERSION_NUM >= 130000 + amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ + amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; +#endif + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + errno_t rc = 0; + rc = strcpy_s(amroutine->ambuildfuncname, NAMEDATALEN, "hnswbuild"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambuildemptyfuncname, NAMEDATALEN, "hnswbuildempty"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->aminsertfuncname, NAMEDATALEN, "hnswinsert"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambulkdeletefuncname, NAMEDATALEN, "hnswbulkdelete"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvacuumcleanupfuncname, NAMEDATALEN, "hnswvacuumcleanup"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amcostestimatefuncname, NAMEDATALEN, "hnswcostestimate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amoptionsfuncname, NAMEDATALEN, "hnswoptions"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvalidatefuncname, NAMEDATALEN, "hnswvalidate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambeginscanfuncname, NAMEDATALEN, "hnswbeginscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amrescanfuncname, NAMEDATALEN, "hnswrescan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amgettuplefuncname, NAMEDATALEN, "hnswgettuple"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amendscanfuncname, NAMEDATALEN, "hnswendscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amdeletefuncname, NAMEDATALEN, "hnswdelete"); + securec_check(rc, "\0", "\0"); + + PG_RETURN_POINTER(amroutine); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbuild); +Datum hnswbuild(PG_FUNCTION_ARGS) +{ + Relation heap = (Relation)PG_GETARG_POINTER(0); + Relation index = (Relation)PG_GETARG_POINTER(1); + IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2); + IndexBuildResult *result = hnswbuild_internal(heap, index, indexinfo); + + PG_RETURN_POINTER(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbuildempty); +Datum hnswbuildempty(PG_FUNCTION_ARGS) +{ + Relation index = (Relation)PG_GETARG_POINTER(0); + hnswbuildempty_internal(index); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswinsert); +Datum hnswinsert(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + bool *isnull = reinterpret_cast(PG_GETARG_POINTER(2)); + ItemPointer ht_ctid = (ItemPointer)PG_GETARG_POINTER(3); + Relation heaprel = (Relation)PG_GETARG_POINTER(4); + IndexUniqueCheck checkunique = (IndexUniqueCheck)PG_GETARG_INT32(5); + bool result = hnswinsert_internal(rel, values, isnull, ht_ctid, heaprel, checkunique); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbulkdelete); +Datum hnswbulkdelete(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback)PG_GETARG_POINTER(2); + void *callbackState = static_cast(PG_GETARG_POINTER(3)); + stats = hnswbulkdelete_internal(info, stats, callback, callbackState); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswvacuumcleanup); +Datum hnswvacuumcleanup(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + stats = hnswvacuumcleanup_internal(info, stats); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswcostestimate); +Datum hnswcostestimate(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0); + IndexPath *path = (IndexPath *)PG_GETARG_POINTER(1); + double loopcount = static_cast(PG_GETARG_FLOAT8(2)); + Cost *startupcost = (Cost *)PG_GETARG_POINTER(3); + Cost *totalcost = (Cost *)PG_GETARG_POINTER(4); + Selectivity *selectivity = (Selectivity *)PG_GETARG_POINTER(5); + double *correlation = reinterpret_cast(PG_GETARG_POINTER(6)); + hnswcostestimate_internal(root, path, loopcount, startupcost, totalcost, selectivity, correlation); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswoptions); +Datum hnswoptions(PG_FUNCTION_ARGS) +{ + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result = hnswoptions_internal(reloptions, validate); + + if (NULL != result) + PG_RETURN_BYTEA_P(result); + + PG_RETURN_NULL(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswvalidate); +Datum hnswvalidate(PG_FUNCTION_ARGS) +{ + Oid opclassoid = PG_GETARG_OID(0); + bool result = hnswvalidate_internal(opclassoid); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbeginscan); +Datum hnswbeginscan(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); + IndexScanDesc scan = hnswbeginscan_internal(rel, nkeys, norderbys); + + PG_RETURN_POINTER(scan); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswrescan); +Datum hnswrescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey)PG_GETARG_POINTER(1); + int nkeys = PG_GETARG_INT32(2); + ScanKey orderbys = (ScanKey)PG_GETARG_POINTER(3); + int norderbys = PG_GETARG_INT32(4); + hnswrescan_internal(scan, scankey, nkeys, orderbys, norderbys); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswgettuple); +Datum hnswgettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanDirection direction = (ScanDirection)PG_GETARG_INT32(1); + + if (NULL == scan) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function hnswgettuple"))); + + bool result = hnswgettuple_internal(scan, direction); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswendscan); +Datum hnswendscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + hnswendscan_internal(scan); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswdelete); +Datum hnswdelete(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + const bool *isnull = (const bool *)PG_GETARG_POINTER(2); + ItemPointer heapTCtid = (ItemPointer)PG_GETARG_POINTER(3); + bool isRollbackIndex = (bool)PG_GETARG_POINTER(4); + + bool result = hnswdelete_internal(rel, values, isnull, heapTCtid, isRollbackIndex); + + PG_RETURN_BOOL(result); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/datavec/hnswbuild.cpp b/src/gausskernel/storage/access/datavec/hnswbuild.cpp new file mode 100644 index 0000000000..af04d6f6ca --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswbuild.cpp @@ -0,0 +1,1051 @@ +/* + * The HNSW build happens in two phases: + * + * 1. In-memory phase + * + * In this first phase, the graph is held completely in memory. When the graph + * is fully built, or we run out of memory reserved for the build (determined + * by maintenance_work_mem), we materialize the graph to disk (see + * FlushPages()), and switch to the on-disk phase. + * + * In a parallel build, a large contiguous chunk of shared memory is allocated + * to hold the graph. Each worker process has its own HnswBuildState struct in + * private memory, which contains information that doesn't change throughout + * the build, and pointers to the shared structs in shared memory. The shared + * memory area is mapped to a different address in each worker process, and + * 'HnswBuildState.hnswarea' points to the beginning of the shared area in the + * worker process's address space. All pointers used in the graph are + * "relative pointers", stored as an offset from 'hnswarea'. + * + * Each element is protected by an LWLock. It must be held when reading or + * modifying the element's neighbors or 'heaptids'. + * + * In a non-parallel build, the graph is held in backend-private memory. All + * the elements are allocated in a dedicated memory context, 'graphCtx', and + * the pointers used in the graph are regular pointers. + * + * 2. On-disk phase + * + * In the on-disk phase, the index is built by inserting each vector to the + * index one by one, just like on INSERT. The only difference is that we don't + * WAL-log the individual inserts. If the graph fit completely in memory and + * was fully built in the in-memory phase, the on-disk phase is skipped. + * + * After we have finished building the graph, we perform one more scan through + * the index and write all the pages to the WAL. + */ +#include "postgres.h" + +#include + +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "postmaster/bgworker.h" +#include "catalog/index.h" +#include "access/datavec/hnsw.h" +#include "miscadmin.h" +#include "storage/buf/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/datum.h" +#include "utils/memutils.h" + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_progress.h" +#else +#include "pgstat.h" +#endif + +#if PG_VERSION_NUM >= 130000 +#define CALLBACK_ITEM_POINTER ItemPointer tid +#else +#define CALLBACK_ITEM_POINTER HeapTuple hup +#endif + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_status.h" +#include "utils/wait_event.h" +#endif + +#define PARALLEL_KEY_HNSW_SHARED UINT64CONST(0xA000000000000001) +#define PARALLEL_KEY_HNSW_AREA UINT64CONST(0xA000000000000002) +#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000003) +#define PROGRESS_CREATEIDX_TUPLES_DONE 0 + +#if PG_VERSION_NUM < 130000 +#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2) +#endif + +/* + * Create the metapage + */ +static void CreateMetaPage(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + HnswMetaPage metap; + + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + + /* Set metapage data */ + metap = HnswPageGetMeta(page); + metap->magicNumber = HNSW_MAGIC_NUMBER; + metap->version = HNSW_VERSION; + metap->dimensions = buildstate->dimensions; + metap->m = buildstate->m; + metap->efConstruction = buildstate->efConstruction; + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + metap->insertPage = InvalidBlockNumber; + ((PageHeader)page)->pd_lower = ((char *)metap + sizeof(HnswMetaPageData)) - (char *)page; + + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* + * Create the append metapage + */ +static void CreateAppendMetaPage(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + HnswAppendMetaPage appMetap; + int slotTypeNum = 2; + + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + /* Set append metapage data */ + appMetap = HnswPageGetAppendMeta(page); + appMetap->magicNumber = HNSW_MAGIC_NUMBER; + appMetap->version = HNSW_VERSION; + appMetap->dimensions = buildstate->dimensions; + appMetap->m = buildstate->m; + appMetap->efConstruction = buildstate->efConstruction; + appMetap->entryBlkno = InvalidBlockNumber; + appMetap->entryOffno = InvalidOffsetNumber; + appMetap->entryLevel = -1; + + /* set PQ info */ + appMetap->enablePQ = buildstate->enablePQ; + appMetap->pqM = buildstate->pqM; + appMetap->pqKsub = buildstate->pqKsub; + appMetap->pqcodeSize = buildstate->pqcodeSize; + if (buildstate->enablePQ) { + appMetap->centerTableSize = (uint32)buildstate->dimensions * sizeof(float); + appMetap->pqTableSize = (uint32)buildstate->dimensions * buildstate->pqKsub * sizeof(float); + appMetap->pqTableNblk = + (uint16)((appMetap->pqTableSize + HNSW_PQTABLE_STORAGE_SIZE - 1) / HNSW_PQTABLE_STORAGE_SIZE); + } else { + appMetap->centerTableSize = 0; + appMetap->pqTableSize = 0; + appMetap->pqTableNblk = 0; + } + + /* set slot info */ + appMetap->npages = + (HNSW_DEFAULT_NPAGES_PER_SLOT * slotTypeNum) < (g_instance.attr.attr_storage.NBuffers / HNSW_BUFFER_THRESHOLD) + ? HNSW_DEFAULT_NPAGES_PER_SLOT + : (g_instance.attr.attr_storage.NBuffers / (slotTypeNum * HNSW_BUFFER_THRESHOLD)); + appMetap->slotStartBlkno = HNSW_PQTABLE_START_BLKNO + appMetap->pqTableNblk; + appMetap->elementInsertSlot = InvalidBlockNumber; + appMetap->neighborInsertSlot = InvalidBlockNumber; + + ((PageHeader)page)->pd_lower = ((char *)appMetap + sizeof(HnswAppendMetaPageData)) - (char *)page; + + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* + * Add a new page + */ +static void HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, ForkNumber forkNum) +{ + /* Add a new page */ + Buffer newbuf = HnswNewBuffer(index, forkNum); + + /* Update previous page */ + HnswPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Commit */ + MarkBufferDirty(*buf); + UnlockReleaseBuffer(*buf); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); + CHECK_FOR_INTERRUPTS(); + LockBuffer(newbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Prepare new page */ + *buf = newbuf; + *page = BufferGetPage(*buf); + HnswInitPage(*buf, *page); +} + +/* + * Create graph pages + */ +static void CreateGraphPages(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Size maxSize; + HnswElementTuple etup; + HnswNeighborTuple ntup; + BlockNumber insertPage; + HnswElement entryPoint; + Buffer buf; + Page page; + HnswElementPtr iter = buildstate->graph->head; + char *base = buildstate->hnswarea; + IndexTransInfo *idxXid; + + /* Calculate sizes */ + maxSize = HNSW_MAX_SIZE; + + /* Allocate once */ + etup = (HnswElementTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + + /* Prepare first page */ + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + + while (!HnswPtrIsNull(base, iter)) { + HnswElement element = (HnswElement)HnswPtrAccess(base, iter); + Size etupSize; + Size ntupSize; + Size combinedSize; + Pointer valuePtr = (Pointer)HnswPtrAccess(base, element->value); + + /* Update iterator */ + iter = element->next; + + /* Zero memory for each element */ + MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr)); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m); + combinedSize = etupSize + ntupSize + sizeof(ItemIdData); + + if (buildstate->isUStore) { + combinedSize += sizeof(IndexTransInfo); + } + + /* Initial size check */ + if (etupSize > HNSW_TUPLE_ALLOC_SIZE) { + elog(ERROR, "index tuple too large"); + } + + HnswSetElementTuple(base, etup, element); + + /* Keep element and neighbors on the same page if possible */ + if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize)) { + HnswBuildAppendPage(index, &buf, &page, forkNum); + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } + + /* Calculate offsets */ + element->blkno = BufferGetBlockNumber(buf); + element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (combinedSize <= maxSize) { + element->neighborPage = element->blkno; + element->neighborOffno = OffsetNumberNext(element->offno); + } else { + element->neighborPage = element->blkno + 1; + element->neighborOffno = FirstOffsetNumber; + } + + ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno); + + if (buildstate->isUStore) { + ((PageHeader)page)->pd_upper -= sizeof(IndexTransInfo); + idxXid = (IndexTransInfo *)(((char *)page) + ((PageHeader)page)->pd_upper); + idxXid->xmin = FrozenTransactionId; + idxXid->xmax = InvalidTransactionId; + } + + /* Add element */ + if (PageAddItem(page, (Item)etup, etupSize, InvalidOffsetNumber, false, false) != element->offno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + /* Add new page if needed */ + if (PageGetFreeSpace(page) < ntupSize) { + HnswBuildAppendPage(index, &buf, &page, forkNum); + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } + /* Add placeholder for neighbors */ + if (PageAddItem(page, (Item)ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } + + insertPage = BufferGetBlockNumber(buf); + + /* Commit */ + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + + entryPoint = (HnswElement)HnswPtrAccess(base, buildstate->graph->entryPoint); + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, entryPoint, insertPage, forkNum, true); + + pfree(etup); + pfree(ntup); +} + +/* + * Write neighbor tuples + */ +static void WriteNeighborTuples(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + int m = buildstate->m; + HnswElementPtr iter = buildstate->graph->head; + char *base = buildstate->hnswarea; + HnswNeighborTuple ntup; + + /* Allocate once */ + ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + + while (!HnswPtrIsNull(base, iter)) { + HnswElement element = (HnswElement)HnswPtrAccess(base, iter); + Buffer buf; + Page page; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); + + /* Update iterator */ + iter = element->next; + + /* Zero memory for each element */ + MemSet(ntup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = ReadBufferExtended(index, forkNum, element->neighborPage, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + + HnswSetNeighborTuple(base, ntup, element, m); + + if (!page_index_tuple_overwrite(page, element->neighborOffno, (Item)ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } + + pfree(ntup); +} + +/* + * Flush pages + */ +static void FlushPages(HnswBuildState *buildstate) +{ +#ifdef HNSW_MEMORY + elog(INFO, "memory: %zu MB", buildstate->graph->memoryUsed / (1024 * 1024)); +#endif + + CreateMetaPage(buildstate); + CreateGraphPages(buildstate); + WriteNeighborTuples(buildstate); + + buildstate->graph->flushed = true; + MemoryContextReset(buildstate->graphCtx); +} + +/* + * Add a heap TID to an existing element + */ +static bool AddDuplicateInMemory(HnswElement element, HnswElement dup) +{ + LWLockAcquire(&dup->lock, LW_EXCLUSIVE); + + if (dup->heaptidsLength == HNSW_HEAPTIDS) { + LWLockRelease(&dup->lock); + return false; + } + + HnswAddHeapTid(dup, &element->heaptids[0]); + + LWLockRelease(&dup->lock); + + return true; +} + +/* + * Find duplicate element + */ +static bool FindDuplicateInMemory(char *base, HnswElement element) +{ + HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, 0); + Datum value = HnswGetValue(base, element); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *neighbor = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, neighbor->element); + Datum neighborValue = HnswGetValue(base, neighborElement); + /* Exit early since ordered by distance */ + if (!datumIsEqual(value, neighborValue, false, -1)) + return false; + + /* Check for space */ + if (AddDuplicateInMemory(element, neighborElement)) + return true; + } + + return false; +} + +/* + * Add to element list + */ +static void AddElementInMemory(char *base, HnswGraph *graph, HnswElement element) +{ + SpinLockAcquire(&graph->lock); + element->next = graph->head; + HnswPtrStore(base, graph->head, element); + SpinLockRelease(&graph->lock); +} + +/* + * Update neighbors + */ +static void UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswElement e, int m) +{ + for (int lc = e->level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, hc->element); + + /* Keep scan-build happy on Mac x86-64 */ + Assert(neighborElement); + + /* Use element for lock instead of hc since hc can be replaced */ + LWLockAcquire(&neighborElement->lock, LW_EXCLUSIVE); + HnswUpdateConnection(base, e, hc, lm, lc, NULL, NULL, procinfo, collation); + LWLockRelease(&neighborElement->lock); + } + } +} + +/* + * Update graph in memory + */ +static void UpdateGraphInMemory(FmgrInfo *procinfo, Oid collation, HnswElement element, int m, int efConstruction, + HnswElement entryPoint, HnswBuildState *buildstate) +{ + HnswGraph *graph = buildstate->graph; + char *base = buildstate->hnswarea; + + /* Look for duplicate */ + if (FindDuplicateInMemory(base, element)) { + return; + } + + /* Add element */ + AddElementInMemory(base, graph, element); + + /* Update neighbors */ + UpdateNeighborsInMemory(base, procinfo, collation, element, m); + + /* Update entry point if needed (already have lock) */ + if (entryPoint == NULL || element->level > entryPoint->level) { + HnswPtrStore(base, graph->entryPoint, element); + } +} + +/* + * Insert tuple in memory + */ +static void InsertTupleInMemory(HnswBuildState *buildstate, HnswElement element) +{ + FmgrInfo *procinfo = buildstate->procinfo; + Oid collation = buildstate->collation; + HnswGraph *graph = buildstate->graph; + HnswElement entryPoint; + LWLock *entryLock = &graph->entryLock; + LWLock *entryWaitLock = &graph->entryWaitLock; + int efConstruction = buildstate->efConstruction; + int m = buildstate->m; + char *base = buildstate->hnswarea; + + /* Wait if another process needs exclusive lock on entry lock */ + LWLockAcquire(entryWaitLock, LW_EXCLUSIVE); + LWLockRelease(entryWaitLock); + + /* Get entry point */ + LWLockAcquire(entryLock, LW_SHARED); + entryPoint = (HnswElement)HnswPtrAccess(base, graph->entryPoint); + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + LWLockRelease(entryLock); + + /* Tell other processes to wait and get exclusive lock */ + LWLockAcquire(entryWaitLock, LW_EXCLUSIVE); + LWLockAcquire(entryLock, LW_EXCLUSIVE); + LWLockRelease(entryWaitLock); + + /* Get latest entry point after lock is acquired */ + entryPoint = (HnswElement)HnswPtrAccess(base, graph->entryPoint); + } + + /* Find neighbors for element */ + HnswFindElementNeighbors(base, element, entryPoint, NULL, procinfo, collation, m, efConstruction, false); + + /* Update graph in memory */ + UpdateGraphInMemory(procinfo, collation, element, m, efConstruction, entryPoint, buildstate); + + /* Release entry lock */ + LWLockRelease(entryLock); +} + +/* + * Insert tuple + */ +static bool InsertTuple(Relation index, Datum *values, const bool *isnull, ItemPointer heaptid, + HnswBuildState *buildstate) +{ + const HnswTypeInfo *typeInfo = buildstate->typeInfo; + HnswGraph *graph = buildstate->graph; + HnswElement element; + HnswAllocator *allocator = &buildstate->allocator; + Size valueSize; + Pointer valuePtr; + LWLock *flushLock = &graph->flushLock; + char *base = buildstate->hnswarea; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) { + typeInfo->checkValue(DatumGetPointer(value)); + } + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) { + if (!HnswCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) { + return false; + } + + value = HnswNormValue(typeInfo, buildstate->collation, value); + } + + /* Get datum size */ + valueSize = VARSIZE_ANY(DatumGetPointer(value)); + + /* Ensure graph not flushed when inserting */ + LWLockAcquire(flushLock, LW_SHARED); + + /* Are we in the on-disk phase? */ + if (graph->flushed) { + LWLockRelease(flushLock); + + return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + } + + /* + * In a parallel build, the HnswElement is allocated from the shared + * memory area, so we need to coordinate with other processes. + */ + LWLockAcquire(&graph->allocatorLock, LW_EXCLUSIVE); + + /* + * Check that we have enough memory available for the new element now that + * we have the allocator lock, and flush pages if needed. + */ + if (graph->memoryUsed >= graph->memoryTotal) { + LWLockRelease(&graph->allocatorLock); + + LWLockRelease(flushLock); + LWLockAcquire(flushLock, LW_EXCLUSIVE); + + if (!graph->flushed) { + ereport(NOTICE, (errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", + (int64)graph->indtuples), + errdetail("Building will take significantly more time."), + errhint("Increase maintenance_work_mem to speed up builds."))); + + FlushPages(buildstate); + } + + LWLockRelease(flushLock); + + return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + } + + /* Ok, we can proceed to allocate the element */ + element = HnswInitElement(base, heaptid, buildstate->m, buildstate->ml, buildstate->maxLevel, allocator); + valuePtr = (Pointer)HnswAlloc(allocator, valueSize); + + /* + * We have now allocated the space needed for the element, so we don't + * need the allocator lock anymore. Release it and initialize the rest of + * the element. + */ + LWLockRelease(&graph->allocatorLock); + + /* Copy the datum */ + errno_t rc = memcpy_s(valuePtr, valueSize, DatumGetPointer(value), valueSize); + securec_check(rc, "\0", "\0"); + HnswPtrStore(base, element->value, valuePtr); + + /* Create a lock for the element */ + LWLockInitialize(&element->lock, hnsw_lock_tranche_id); + + /* Insert tuple */ + InsertTupleInMemory(buildstate, element); + + /* Release flush lock */ + LWLockRelease(flushLock); + + return true; +} + +/* + * Callback for table_index_build_scan + */ +static void BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + HnswGraph *graph = buildstate->graph; + MemoryContext oldCtx; + +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &hup->t_self; +#endif + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Insert tuple */ + if (InsertTuple(index, values, isnull, tid, buildstate)) { + /* Update progress */ + SpinLockAcquire(&graph->lock); + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++graph->indtuples); + SpinLockRelease(&graph->lock); + } + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Initialize the graph + */ +static void InitGraph(HnswGraph *graph, char *base, long memoryTotal) +{ + HnswPtrStore(base, graph->head, (HnswElement)NULL); + HnswPtrStore(base, graph->entryPoint, (HnswElement)NULL); + graph->memoryUsed = 0; + graph->memoryTotal = memoryTotal; + graph->flushed = false; + graph->indtuples = 0; + SpinLockInit(&graph->lock); + LWLockInitialize(&graph->entryLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->entryWaitLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->allocatorLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->flushLock, hnsw_lock_tranche_id); +} + +/* + * Initialize an allocator + */ +static void InitAllocator(HnswAllocator *allocator, void *(*alloc)(Size size, void *state), void *state) +{ + allocator->alloc = alloc; + allocator->state = state; +} + +/* + * Memory context allocator + */ +static void *HnswMemoryContextAlloc(Size size, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + void *chunk = MemoryContextAlloc(buildstate->graphCtx, size); + +#if PG_VERSION_NUM >= 130000 + buildstate->graphData.memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false); +#else + buildstate->graphData.memoryUsed += MAXALIGN(size); +#endif + + return chunk; +} + +/* + * Shared memory allocator + */ +static void *HnswSharedMemoryAlloc(Size size, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + void *chunk = buildstate->hnswarea + buildstate->graph->memoryUsed; + + buildstate->graph->memoryUsed += MAXALIGN(size); + return chunk; +} + +/* + * Initialize the build state + */ +static void InitBuildState(HnswBuildState *buildstate, Relation heap, Relation index, IndexInfo *indexInfo, + ForkNumber forkNum) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + buildstate->forkNum = forkNum; + buildstate->typeInfo = HnswGetTypeInfo(index); + + buildstate->m = HnswGetM(index); + buildstate->efConstruction = HnswGetEfConstruction(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Disallow varbit since require fixed dimensions */ + if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) { + elog(ERROR, "type not supported for hnsw index"); + } + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) { + elog(ERROR, "column does not have dimensions"); + } + + if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) { + elog(ERROR, "column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions); + } + + if (buildstate->efConstruction < 2 * buildstate->m) { + elog(ERROR, "ef_construction must be greater than or equal to 2 * m"); + } + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + buildstate->collation = index->rd_indcollation[0]; + + InitGraph(&buildstate->graphData, NULL, u_sess->attr.attr_memory.maintenance_work_mem * 1024L); + buildstate->graph = &buildstate->graphData; + buildstate->ml = HnswGetMl(buildstate->m); + buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); + + buildstate->graphCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw build graph context", ALLOCSET_DEFAULT_SIZES); + buildstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw build temporary context", ALLOCSET_DEFAULT_SIZES); + + InitAllocator(&buildstate->allocator, &HnswMemoryContextAlloc, buildstate); + + buildstate->hnswleader = NULL; + buildstate->hnswshared = NULL; + buildstate->hnswarea = NULL; + + buildstate->enablePQ = HnswGetEnablePQ(index); + buildstate->pqM = HnswGetPqM(index); + buildstate->pqKsub = HnswGetPqKsub(index); + buildstate->pqTable = NULL; + buildstate->centerTable = NULL; + buildstate->pqcodeSize = 0; + + buildstate->isUStore = buildstate->heap ? RelationIsUstoreFormat(buildstate->heap) : false; +} + +/* + * Free resources + */ +static void FreeBuildState(HnswBuildState *buildstate) +{ + MemoryContextDelete(buildstate->graphCtx); + MemoryContextDelete(buildstate->tmpCtx); +} + +static double ParallelHeapScan(HnswBuildState *buildstate, int *nparticipanttuplesorts) +{ + HnswShared *hnswshared = buildstate->hnswleader->hnswshared; + double reltuples; + + BgworkerListWaitFinish(&buildstate->hnswleader->nparticipanttuplesorts); + pg_memory_barrier(); + + *nparticipanttuplesorts = buildstate->hnswleader->nparticipanttuplesorts; + buildstate->graph = &hnswshared->graphData; + buildstate->hnswarea = hnswshared->hnswarea; + reltuples = hnswshared->reltuples; + + return reltuples; +} + +/* + * Perform a worker's portion of a parallel insert + */ +static void HnswParallelScanAndInsert(Relation heapRel, Relation indexRel, HnswShared *hnswshared, char *hnswarea) +{ + HnswBuildState buildstate; + TableScanDesc scan; + double reltuples; + IndexInfo *indexInfo; + + /* Join parallel scan */ + indexInfo = BuildIndexInfo(indexRel); + InitBuildState(&buildstate, heapRel, indexRel, indexInfo, MAIN_FORKNUM); + buildstate.graph = &hnswshared->graphData; + buildstate.hnswarea = hnswarea; + InitAllocator(&buildstate.allocator, &HnswSharedMemoryAlloc, &buildstate); + scan = tableam_scan_begin_parallel(heapRel, &hnswshared->heapdesc); + reltuples = tableam_index_build_scan(heapRel, indexRel, indexInfo, true, BuildCallback, (void *)&buildstate, scan); + + /* Record statistics */ + SpinLockAcquire(&hnswshared->mutex); + hnswshared->nparticipantsdone++; + hnswshared->reltuples += reltuples; + SpinLockRelease(&hnswshared->mutex); + + FreeBuildState(&buildstate); +} + +/* + * Perform work within a launched parallel process + */ +void HnswParallelBuildMain(const BgWorkerContext *bwc) +{ + HnswShared *hnswshared; + char *hnswarea; + Relation heapRel; + Relation indexRel; + + /* Look up shared state */ + hnswshared = (HnswShared *)bwc->bgshared; + + /* Open relations within worker */ + heapRel = heap_open(hnswshared->heaprelid, NoLock); + indexRel = index_open(hnswshared->indexrelid, NoLock); + + hnswarea = hnswshared->hnswarea; + + /* Perform inserts */ + HnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea); + + /* Close relations within worker */ + index_close(indexRel, NoLock); + heap_close(heapRel, NoLock); +} + +/* + * End parallel build + */ +static void HnswEndParallel() +{ + BgworkerListSyncQuit(); +} + +static HnswShared *HnswParallelInitshared(HnswBuildState *buildstate) +{ + HnswShared *hnswshared; + char *hnswarea; + Size esthnswarea; + Size estother; + + /* Store shared build state, for which we reserved space */ + hnswshared = + (HnswShared *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(HnswShared)); + + /* Initialize immutable state */ + hnswshared->heaprelid = RelationGetRelid(buildstate->heap); + hnswshared->indexrelid = RelationGetRelid(buildstate->index); + SpinLockInit(&hnswshared->mutex); + /* Initialize mutable state */ + hnswshared->nparticipantsdone = 0; + hnswshared->reltuples = 0; + HeapParallelscanInitialize(&hnswshared->heapdesc, buildstate->heap); + + /* Leave space for other objects in shared memory */ + /* Docker has a default limit of 64 MB for shm_size */ + /* which happens to be the default value of maintenance_work_mem */ + esthnswarea = u_sess->attr.attr_memory.maintenance_work_mem * 1024L; + estother = 3 * 1024 * 1024; + if (esthnswarea > estother) + esthnswarea -= estother; + + hnswarea = (char *)palloc0_huge(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), esthnswarea); + /* Report less than allocated so never fails */ + InitGraph(&hnswshared->graphData, hnswarea, esthnswarea - 1024 * 1024); + + /* + * Avoid base address for relptr for Postgres < 14.5 + * https://github.com/postgres/postgres/commit/7201cd18627afc64850537806da7f22150d1a83b + */ +#if PG_VERSION_NUM < 140005 + hnswshared->graphData.memoryUsed += MAXALIGN(1); +#endif + + hnswshared->hnswarea = hnswarea; + return hnswshared; +} + +/* + * Begin parallel build + */ +static void HnswBeginParallel(HnswBuildState *buildstate, int request) +{ + HnswShared *hnswshared; + HnswLeader *hnswleader = (HnswLeader *)palloc0(sizeof(HnswLeader)); + + Assert(request > 0); + + hnswshared = HnswParallelInitshared(buildstate); + /* Launch workers, saving status for leader/caller */ + hnswleader->nparticipanttuplesorts = LaunchBackgroundWorkers(request, hnswshared, HnswParallelBuildMain, NULL); + hnswleader->hnswshared = hnswshared; + + /* If no workers were successfully launched, back out (do serial build) */ + if (hnswleader->nparticipanttuplesorts == 0) { + HnswEndParallel(); + return; + } + + /* Log participants */ + ereport(DEBUG1, (errmsg("using %d parallel workers", hnswleader->nparticipanttuplesorts))); + + /* Save leader state now that it's clear build will be parallel */ + buildstate->hnswleader = hnswleader; +} + +/* + * Build graph + */ +static void BuildGraph(HnswBuildState *buildstate, ForkNumber forkNum) +{ + int parallel_workers = 0; + + /* Calculate parallel workers */ + if (buildstate->heap != NULL) { + parallel_workers = PlanCreateIndexWorkers(buildstate->heap, buildstate->indexInfo); + } + + /* Attempt to launch parallel worker scan when required */ + if (parallel_workers > 0) { + HnswBeginParallel(buildstate, parallel_workers); + } + + /* Add tuples to graph */ + if (buildstate->heap != NULL) { + if (!buildstate->hnswleader) { + serial_build: + buildstate->reltuples = tableam_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, BuildCallback, (void *)buildstate, NULL); + } else { + int nruns; + buildstate->reltuples = ParallelHeapScan(buildstate, &nruns); + if (nruns == 0) { + /* failed to startup any bgworker, retry to do serial build */ + goto serial_build; + } + } + + buildstate->indtuples = buildstate->graph->indtuples; + } + + /* Flush pages */ + if (!buildstate->graph->flushed) { + FlushPages(buildstate); + } + + /* End parallel build */ + if (buildstate->hnswleader) { + HnswEndParallel(); + } +} + +/* + * Build the index + */ +static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, HnswBuildState *buildstate, + ForkNumber forkNum) +{ +#ifdef HNSW_MEMORY + SeedRandom(42); +#endif + + InitBuildState(buildstate, heap, index, indexInfo, forkNum); + + BuildGraph(buildstate, forkNum); + + if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) + LogNewpageRange(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true); + + FreeBuildState(buildstate); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult *hnswbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + HnswBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void hnswbuildempty_internal(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + HnswBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/src/gausskernel/storage/access/datavec/hnswdelete.cpp b/src/gausskernel/storage/access/datavec/hnswdelete.cpp new file mode 100644 index 0000000000..ff2a24e11f --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswdelete.cpp @@ -0,0 +1,201 @@ +#include "access/ubtree.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/vecindex.h" + +bool HnswIsTIDEquals(ItemPointer p1, ItemPointer p2) +{ + int id; + bool equal = true; + for (id = 0; id < HNSW_HEAPTIDS; id++) { + if (ItemPointerIsValid(&p1[id]) && ItemPointerIsValid(&p2[id])) { + equal = ItemPointerEquals(&p1[id], &p2[id]); + } else if (!ItemPointerIsValid(&p1[id]) && !ItemPointerIsValid(&p2[id])) { + continue; + } else { + equal = false; + } + + if (!equal) { + break; + } + } + + return equal; +} + +bool HnswIsETUPEqual(HnswElementTuple etup1, HnswElementTuple etup2) +{ + if (etup1 == NULL || etup2 == NULL) { + return false; + } + Size len1 = MAXALIGN(VARSIZE_ANY(&etup1->data)); + Size len2 = MAXALIGN(VARSIZE_ANY(&etup2->data)); + if (len1 == 0 || len2 == 0 || len1 != len2) { + return false; + } + return memcmp(&etup1->data, &etup2->data, len1) == 0; +} + +OffsetNumber HnswFindDeleteLocation(Relation index, Buffer buf, HnswElementTuple etup) +{ + OffsetNumber off; + OffsetNumber maxOff; + Page page; + TransactionId xmin; + TransactionId xmax; + + page = BufferGetPage(buf); + maxOff = PageGetMaxOffsetNumber(page); + + if (RelationIsGlobalIndex(index)) { + elog(ERROR, "the GLOBAL partitioned index is not supported.\n"); + } + + for (off = FirstOffsetNumber; off < maxOff; off++) { + ItemId iid; + HnswElementTuple tup; + + iid = PageGetItemId(page, off); + if (!ItemIdIsDead(iid)) { + tup = (HnswElementTuple)PageGetItem(page, iid); + if (!HnswIsTIDEquals(etup->heaptids, tup->heaptids)) { + continue; + } + + if (!HnswIsETUPEqual(etup, tup)) { + continue; + } + + bool xminCommitted = false; + bool xmaxCommitted = false; + bool isDead = VecItupGetXminXmax(page, off, InvalidTransactionId, &xmin, &xmax, &xminCommitted, + &xmaxCommitted, RelationGetNamespace(index) == PG_TOAST_NAMESPACE); + if (!isDead && !TransactionIdIsValid(xmax)) { + return off; + } + } + } + return InvalidOffsetNumber; +} + +void HnswDeleteOnPage(Relation index, Buffer buf, OffsetNumber offset) +{ + ItemId iid; + IndexTransInfo *idxXid; + Page page; + HnswElementTuple etup; + + page = BufferGetPage(buf); + iid = PageGetItemId(page, offset); + etup = (HnswElementTuple)PageGetItem(page, iid); + idxXid = (IndexTransInfo *)VecIndexTupleGetXid(etup); + + idxXid->xmax = GetCurrentTransactionId(); + + MarkBufferDirty(buf); +} + +bool IsHnswEntryPoint(Relation index, BlockNumber blkno, OffsetNumber offno) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + bool res = false; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + if (blkno == metap->entryBlkno && offno == metap->entryOffno) { + res = true; + } + UnlockReleaseBuffer(buf); + return res; +} + +bool HnswDeleteIndex(Relation index, HnswElementTuple etup) +{ + bool found = false; + BlockNumber blkno; + Buffer buf; + char *base = NULL; + Datum q; + List *ep; + List *w; + ListCell *cell; + int m; + HnswElement entryPoint; + FmgrInfo *procinfo; + Oid collation; + OffsetNumber offset; + Page page; + + blkno = InvalidBlockNumber; + procinfo = index_getprocinfo(index, 1, 1); + collation = index->rd_indcollation[0]; + q = (Datum)(&etup->data); + HnswGetMetaPageInfo(index, &m, &entryPoint); + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, false, NULL)); + + for (int lc = entryPoint->level; lc >= 0; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL); + ep = w; + } + + foreach (cell, ep) { + HnswCandidate *hc = (HnswCandidate *)lfirst(cell); + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + blkno = element->blkno; + } + + while (BlockNumberIsValid(blkno)) { + buf = ReadBuffer(index, blkno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + offset = HnswFindDeleteLocation(index, buf, etup); + if (offset != InvalidOffsetNumber && !IsHnswEntryPoint(index, blkno, offset)) { + HnswDeleteOnPage(index, buf, offset); + UnlockReleaseBuffer(buf); + found = true; + break; + } + + page = BufferGetPage(buf); + blkno = HnswPageGetOpaque(page)->nextblkno; + UnlockReleaseBuffer(buf); + } + return found; +} + +HnswElementTuple IndexFormHnswElementTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull, + ItemPointer heapTCtid) +{ + Datum value; + HnswElementTuple etup; + Size etupSize; + errno_t rc = EOK; + + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + etup = (HnswElementTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(DatumGetPointer(value))); + + etup->heaptids[0] = *heapTCtid; + for (int i = 1; i < HNSW_HEAPTIDS; i++) { + ItemPointerSetInvalid(&etup->heaptids[i]); + } + + rc = memcpy_s(&etup->data, VARSIZE_ANY(DatumGetPointer(value)), DatumGetPointer(value), VARSIZE_ANY(DatumGetPointer(value))); + securec_check(rc, "\0", "\0"); + return etup; +} + +bool hnswdelete_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, bool isRollbackIndex) +{ + bool found; + HnswElementTuple etup; + + etup = IndexFormHnswElementTuple(RelationGetDescr(index), values, isnull, heapTCtid); + found = HnswDeleteIndex(index, etup); + + return found; +} diff --git a/src/gausskernel/storage/access/datavec/hnswinsert.cpp b/src/gausskernel/storage/access/datavec/hnswinsert.cpp new file mode 100644 index 0000000000..fc8043e0cf --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswinsert.cpp @@ -0,0 +1,652 @@ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/xact.h" +#include "access/datavec/hnsw.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/datum.h" +#include "utils/memutils.h" + +/* + * Get the insert page + */ +static BlockNumber GetInsertPage(Relation index) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + BlockNumber insertPage; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + + insertPage = metap->insertPage; + + UnlockReleaseBuffer(buf); + + return insertPage; +} + +/* + * Check for a free offset + */ +static bool HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size ntupSize, Buffer *nbuf, + Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno, + BlockNumber *newInsertPage) +{ + OffsetNumber offno; + OffsetNumber maxoffno = PageGetMaxOffsetNumber(page); + + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (etup->deleted) { + BlockNumber elementPage = BufferGetBlockNumber(buf); + BlockNumber neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + OffsetNumber neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + ItemId itemid; + + if (!BlockNumberIsValid(*newInsertPage)) + *newInsertPage = elementPage; + + if (neighborPage == elementPage) { + *nbuf = buf; + *npage = page; + } else { + *nbuf = ReadBuffer(index, neighborPage); + LockBuffer(*nbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Skip WAL for now */ + *npage = BufferGetPage(*nbuf); + } + + itemid = PageGetItemId(*npage, neighborOffno); + /* Check for space on neighbor tuple page */ + if (PageGetFreeSpace(*npage) + ItemIdGetLength(itemid) - sizeof(ItemIdData) >= ntupSize) { + *freeOffno = offno; + *freeNeighborOffno = neighborOffno; + return true; + } else if (*nbuf != buf) + UnlockReleaseBuffer(*nbuf); + } + } + + return false; +} + +/* + * Add a new page + */ +static void HnswInsertAppendPage(Relation index, Buffer *nbuf, Page *npage, GenericXLogState *state, Page page, + bool building) +{ + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + *nbuf = HnswNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + if (building) + *npage = BufferGetPage(*nbuf); + else + *npage = GenericXLogRegisterBuffer(state, *nbuf, GENERIC_XLOG_FULL_IMAGE); + + HnswInitPage(*nbuf, *npage); + + /* Update previous buffer */ + HnswPageGetOpaque(page)->nextblkno = BufferGetBlockNumber(*nbuf); +} + +/* + * Add to element and neighbor pages + */ +static void AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, + BlockNumber *updatedInsertPage, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size etupSize; + Size ntupSize; + Size combinedSize; + Size maxSize; + Size minCombinedSize; + HnswElementTuple etup; + BlockNumber currentPage = insertPage; + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + OffsetNumber freeOffno = InvalidOffsetNumber; + OffsetNumber freeNeighborOffno = InvalidOffsetNumber; + BlockNumber newInsertPage = InvalidBlockNumber; + char *base = NULL; + bool isUStore; + IndexTransInfo *idxXid; + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(HnswPtrAccess(base, e->value))); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m); + combinedSize = etupSize + ntupSize + sizeof(ItemIdData); + maxSize = HNSW_MAX_SIZE; + minCombinedSize = etupSize + HNSW_NEIGHBOR_TUPLE_SIZE(0, m) + sizeof(ItemIdData); + + /* Prepare element tuple */ + etup = (HnswElementTuple)palloc0(etupSize); + HnswSetElementTuple(base, etup, e); + + /* Prepare neighbor tuple */ + ntup = (HnswNeighborTuple)palloc0(ntupSize); + HnswSetNeighborTuple(base, ntup, e, m); + + /* Find a page (or two if needed) to insert the tuples */ + for (;;) { + buf = ReadBuffer(index, currentPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + isUStore = HnswPageGetOpaque(page)->pageType == HNSW_USTORE_PAGE_TYPE; + /* Keep track of first page where element at level 0 can fit */ + if (!BlockNumberIsValid(newInsertPage) && PageGetFreeSpace(page) >= minCombinedSize) { + newInsertPage = currentPage; + } + + /* First, try the fastest path */ + /* Space for both tuples on the current page */ + /* This can split existing tuples in rare cases */ + if (PageGetFreeSpace(page) >= combinedSize) { + nbuf = buf; + npage = page; + break; + } + + /* Next, try space from a deleted element */ + if (HnswFreeOffset(index, buf, page, e, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno, + &newInsertPage)) { + if (nbuf != buf) { + if (building) { + npage = BufferGetPage(nbuf); + } else { + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + } + } + + break; + } + + /* Finally, try space for element only if last page */ + /* Skip if both tuples can fit on the same page */ + if (combinedSize > maxSize && PageGetFreeSpace(page) >= etupSize && + !BlockNumberIsValid(HnswPageGetOpaque(page)->nextblkno)) { + HnswInsertAppendPage(index, &nbuf, &npage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + break; + } + + currentPage = HnswPageGetOpaque(page)->nextblkno; + if (BlockNumberIsValid(currentPage)) { + /* Move to next page */ + if (!building) + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } else { + Buffer newbuf; + Page newpage; + + HnswInsertAppendPage(index, &newbuf, &newpage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + /* Commit */ + if (building) { + MarkBufferDirty(buf); + } else { + GenericXLogFinish(state); + } + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + buf = newbuf; + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Create new page for neighbors if needed */ + if (PageGetFreeSpace(page) < combinedSize) { + HnswInsertAppendPage(index, &nbuf, &npage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } else { + nbuf = buf; + npage = page; + } + + break; + } + } + + e->blkno = BufferGetBlockNumber(buf); + e->neighborPage = BufferGetBlockNumber(nbuf); + + /* Added tuple to new page if newInsertPage is not set */ + /* So can set to neighbor page instead of element page */ + if (!BlockNumberIsValid(newInsertPage)) { + newInsertPage = e->neighborPage; + } + + if (OffsetNumberIsValid(freeOffno)) { + e->offno = freeOffno; + e->neighborOffno = freeNeighborOffno; + } else { + e->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (nbuf == buf) { + e->neighborOffno = OffsetNumberNext(e->offno); + } else { + e->neighborOffno = FirstOffsetNumber; + } + } + + ItemPointerSet(&etup->neighbortid, e->neighborPage, e->neighborOffno); + + /* Add element and neighbors */ + if (OffsetNumberIsValid(freeOffno)) { + if (isUStore) { + ItemId item_id = PageGetItemId(page, e->offno); + Size aligned_size = MAXALIGN(ItemIdGetLength(item_id)); + unsigned offset = ItemIdGetOffset(item_id); + idxXid = (IndexTransInfo *)((char *)page + offset + aligned_size); + idxXid->xmin = GetCurrentTransactionId(); + idxXid->xmax = InvalidTransactionId; + } + if (!page_index_tuple_overwrite(page, e->offno, (Item)etup, etupSize)) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + if (!page_index_tuple_overwrite(npage, e->neighborOffno, (Item)ntup, ntupSize)) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } else { + if (isUStore) { + ((PageHeader)page)->pd_upper -= sizeof(IndexTransInfo); + idxXid = (IndexTransInfo *)(((char *)page) + ((PageHeader)page)->pd_upper); + idxXid->xmin = GetCurrentTransactionId(); + idxXid->xmax = InvalidTransactionId; + } + if (PageAddItem(page, (Item)etup, etupSize, InvalidOffsetNumber, false, false) != e->offno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + if (PageAddItem(npage, (Item)ntup, ntupSize, InvalidOffsetNumber, false, false) != e->neighborOffno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } + + /* Commit */ + if (building) { + MarkBufferDirty(buf); + if (nbuf != buf) + MarkBufferDirty(nbuf); + } else { + GenericXLogFinish(state); + } + UnlockReleaseBuffer(buf); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Update the insert page */ + if (BlockNumberIsValid(newInsertPage) && newInsertPage != insertPage) + *updatedInsertPage = newInsertPage; +} + +/* + * Check if connection already exists + */ +static bool ConnectionExists(HnswElement e, HnswNeighborTuple ntup, int startIdx, int lm) +{ + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &ntup->indextids[startIdx + i]; + + if (!ItemPointerIsValid(indextid)) { + break; + } + + if (ItemPointerGetBlockNumber(indextid) == e->blkno && ItemPointerGetOffsetNumber(indextid) == e->offno) { + return true; + } + } + + return false; +} + +/* + * Update neighbors + */ +void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, + bool checkExisting, bool building) +{ + char *base = NULL; + + for (int lc = e->level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *hc = &neighbors->items[i]; + Buffer buf; + Page page; + GenericXLogState *state; + HnswNeighborTuple ntup; + int idx = -1; + int startIdx; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, hc->element); + OffsetNumber offno = neighborElement->neighborOffno; + + /* Get latest neighbors since they may have changed */ + /* Do not lock yet since selecting neighbors can take time */ + HnswLoadNeighbors(neighborElement, index, m); + + /* + * Could improve performance for vacuuming by checking neighbors + * against list of elements being deleted to find index. It's + * important to exclude already deleted elements for this since + * they can be replaced at any time. + */ + + /* Select neighbors */ + HnswUpdateConnection(NULL, e, hc, lm, lc, &idx, index, procinfo, collation); + + /* New element was not selected as a neighbor */ + if (idx == -1) + continue; + + /* Register page */ + buf = ReadBuffer(index, neighborElement->neighborPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Get tuple */ + ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, offno)); + + /* Calculate index for update */ + startIdx = (neighborElement->level - lc) * m; + + /* Check for existing connection */ + if (checkExisting && ConnectionExists(e, ntup, startIdx, lm)) + idx = -1; + else if (idx == -2) { + /* Find free offset if still exists */ + /* TODO Retry updating connections if not */ + for (int j = 0; j < lm; j++) { + if (!ItemPointerIsValid(&ntup->indextids[startIdx + j])) { + idx = startIdx + j; + break; + } + } + } else + idx += startIdx; + + /* Make robust to issues */ + if (idx >= 0 && idx < ntup->count) { + ItemPointer indextid = &ntup->indextids[idx]; + + /* Update neighbor on the buffer */ + ItemPointerSet(indextid, e->blkno, e->offno); + + /* Commit */ + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + } else if (!building) + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + } +} + +/* + * Add a heap TID to an existing element + */ +static bool AddDuplicateOnDisk(Relation index, HnswElement element, HnswElement dup, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + HnswElementTuple etup; + int i; + + /* Read page */ + buf = ReadBuffer(index, dup->blkno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Find space */ + etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, dup->offno)); + for (i = 0; i < HNSW_HEAPTIDS; i++) { + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + } + + /* Either being deleted or we lost our chance to another backend */ + if (i == 0 || i == HNSW_HEAPTIDS) { + if (!building) + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + return false; + } + + /* Add heap TID, modifying the tuple on the page directly */ + etup->heaptids[i] = element->heaptids[0]; + + /* Commit */ + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + return true; +} + +/* + * Find duplicate element + */ +static bool FindDuplicateOnDisk(Relation index, HnswElement element, bool building) +{ + char *base = NULL; + HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, 0); + Datum value = HnswGetValue(base, element); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *neighbor = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, neighbor->element); + Datum neighborValue = HnswGetValue(base, neighborElement); + /* Exit early since ordered by distance */ + if (!datumIsEqual(value, neighborValue, false, -1)) + return false; + + if (AddDuplicateOnDisk(index, element, neighborElement, building)) + return true; + } + + return false; +} + +/* + * Update graph on disk + */ +static void UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, + int efConstruction, HnswElement entryPoint, bool building) +{ + BlockNumber newInsertPage = InvalidBlockNumber; + + /* Look for duplicate */ + if (FindDuplicateOnDisk(index, element, building)) { + return; + } + + /* Add element */ + AddElementOnDisk(index, element, m, GetInsertPage(index), &newInsertPage, building); + + /* Update insert page if needed */ + if (BlockNumberIsValid(newInsertPage)) { + HnswUpdateMetaPage(index, 0, NULL, newInsertPage, MAIN_FORKNUM, building); + } + + /* Update neighbors */ + HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, false, building); + + /* Update entry point if needed */ + if (entryPoint == NULL || element->level > entryPoint->level) { + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building); + } +} + +/* + * Insert a tuple into the index + */ +bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, const bool *isnull, ItemPointer heap_tid, + bool building) +{ + HnswElement entryPoint; + HnswElement element; + int m; + int efConstruction = HnswGetEfConstruction(index); + FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + Oid collation = index->rd_indcollation[0]; + LOCKMODE lockmode = ShareLock; + char *base = NULL; + + /* + * Get a shared lock. This allows vacuum to ensure no in-flight inserts + * before repairing graph. Use a page lock so it does not interfere with + * buffer lock (or reads when vacuuming). + */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + /* Create an element */ + element = HnswInitElement(base, heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); + HnswPtrStore(base, element->value, DatumGetPointer(value)); + + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + /* Find neighbors for element */ + HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, m, efConstruction, false); + + /* Update graph on disk */ + UpdateGraphOnDisk(index, procinfo, collation, element, m, efConstruction, entryPoint, building); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + return true; +} + +/* + * Insert a tuple into the index + */ +static void HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid) +{ + Datum value; + const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); + FmgrInfo *normprocinfo; + Oid collation = index->rd_indcollation[0]; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) { + typeInfo->checkValue(DatumGetPointer(value)); + } + + /* Normalize if needed */ + normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + if (normprocinfo != NULL) { + if (!HnswCheckNorm(normprocinfo, collation, value)) { + return; + } + + value = HnswNormValue(typeInfo, collation, value); + } + + HnswInsertTupleOnDisk(index, value, values, isnull, heap_tid, false); +} + +/* + * Insert a tuple into the index + */ +bool hnswinsert_internal(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) { + return false; + } + + /* Create memory context */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw insert temporary context", ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + HnswInsertTuple(index, values, isnull, heap_tid); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/src/gausskernel/storage/access/datavec/hnswscan.cpp b/src/gausskernel/storage/access/datavec/hnswscan.cpp new file mode 100644 index 0000000000..fb919d60ae --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswscan.cpp @@ -0,0 +1,211 @@ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/datavec/hnsw.h" +#include "pgstat.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Algorithm 5 from paper + */ +static List *GetScanItems(IndexScanDesc scan, Datum q) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + Relation index = scan->indexRelation; + FmgrInfo *procinfo = so->procinfo; + Oid collation = so->collation; + List *ep; + List *w; + int m; + HnswElement entryPoint; + char *base = NULL; + + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + if (entryPoint == NULL) + return NIL; + + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, false, scan)); + + for (int lc = entryPoint->level; lc >= 1; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL, scan); + ep = w; + } + + int hnsw_ef_search = u_sess->datavec_ctx.hnsw_ef_search; + return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, procinfo, collation, m, false, NULL, scan); +} + +/* + * Get scan value + */ +static Datum GetScanValue(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + Datum value; + + if (scan->orderByData->sk_flags & SK_ISNULL) { + value = PointerGetDatum(NULL); + } else { + value = scan->orderByData->sk_argument; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Normalize if needed */ + if (so->normprocinfo != NULL) { + value = HnswNormValue(so->typeInfo, so->collation, value); + } + } + + return value; +} + +/* + * Prepare for an index scan + */ +IndexScanDesc hnswbeginscan_internal(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + HnswScanOpaque so; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + so = (HnswScanOpaque)palloc(sizeof(HnswScanOpaqueData)); + so->typeInfo = HnswGetTypeInfo(index); + so->first = true; + so->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw scan temporary context", ALLOCSET_DEFAULT_SIZES); + + so->vs.buf = InvalidBuffer; + so->vs.lastSelfModifiedItup = NULL; + so->vs.lastSelfModifiedItupBufferSize = 0; + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + so->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void hnswrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + errno_t rc = EOK; + + if (so->vs.lastSelfModifiedItup) { + IndexTupleSetSize(((IndexTuple)(so->vs.lastSelfModifiedItup)), 0); /* clear */ + } + + so->first = true; + MemoryContextReset(so->tmpCtx); + + if (keys && scan->numberOfKeys > 0) { + rc = memmove_s(scan->keyData, scan->numberOfKeys * sizeof(ScanKeyData), keys, scan->numberOfKeys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } + + if (orderbys && scan->numberOfOrderBys > 0) { + rc = memmove_s(scan->orderByData, scan->numberOfOrderBys * sizeof(ScanKeyData), orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } +} + +/* + * Fetch the next tuple in the given scan + */ +bool hnswgettuple_internal(IndexScanDesc scan, ScanDirection dir) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + MemoryContext oldCtx = MemoryContextSwitchTo(so->tmpCtx); + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) { + Datum value; + + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan hnsw index without order"); + + /* Requires MVCC-compliant snapshot as not able to maintain a pin */ + /* https://www.postgresql.org/docs/current/index-locking.html */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with hnsw"); + + /* Get scan value */ + value = GetScanValue(scan); + + /* + * Get a shared lock. This allows vacuum to ensure no in-flight scans + * before marking tuples as deleted. + */ + LockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->w = GetScanItems(scan, value); + + /* Release shared lock */ + UnlockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->first = false; + +#if defined(HNSW_MEMORY) && PG_VERSION_NUM >= 130000 + elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(so->tmpCtx, false) / MEM_INFO_NUM); +#endif + } + + while (list_length(so->w) > 0) { + char *base = NULL; + HnswCandidate *hc = (HnswCandidate *)linitial(so->w); + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + ItemPointer heaptid; + + /* Move to next element if no valid heap TIDs */ + if (element->heaptidsLength == 0) { + so->w = list_delete_first(so->w); + continue; + } + + heaptid = &element->heaptids[--element->heaptidsLength]; + + MemoryContextSwitchTo(oldCtx); + + scan->xs_ctup.t_self = *heaptid; + scan->xs_recheck = false; + return true; + } + + MemoryContextSwitchTo(oldCtx); + return false; +} + +/* + * End a scan and release resources + */ +void hnswendscan_internal(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + + FREE_POINTER(so->vs.lastSelfModifiedItup); + + MemoryContextDelete(so->tmpCtx); + + pfree(so); + scan->opaque = NULL; +} diff --git a/src/gausskernel/storage/access/datavec/hnswutils.cpp b/src/gausskernel/storage/access/datavec/hnswutils.cpp new file mode 100644 index 0000000000..67db50e6ef --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswutils.cpp @@ -0,0 +1,1393 @@ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/hnsw.h" +#include "lib/pairingheap.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/sparsevec.h" +#include "storage/buf/bufmgr.h" +#include "utils/datum.h" +#include "utils/rel.h" + +#if PG_VERSION_NUM >= 130000 +#include "common/hashfn.h" +#else +#include "utils/hashutils.h" +#endif + +#if PG_VERSION_NUM < 170000 +static inline uint64 murmurhash64(uint64 data) +{ + uint64 h = data; + + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + h ^= h >> 33; + + return h; +} +#endif + +/* TID hash table */ +static uint32 hash_tid(ItemPointerData tid) +{ + union { + uint64 i; + ItemPointerData tid; + } x; + + /* Initialize unused bytes */ + x.i = 0; + x.tid = tid; + + return murmurhash64(x.i); +} + +#define VALGRIND_MAKE_MEM_DEFINED(addr, size) \ + do { \ + } while (0) + +#define SH_PREFIX tidhash +#define SH_ELEMENT_TYPE TidHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_KEY tid +#define SH_HASH_KEY(tb, key) hash_tid(key) +#define SH_EQUAL(tb, a, b) ItemPointerEquals(&(a), &(b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +/* Pointer hash table */ +static uint32 hash_pointer(uintptr_t ptr) +{ +#if SIZEOF_VOID_P == 8 + return murmurhash64((uint64)ptr); +#else + return murmurhash32((uint32)ptr); +#endif +} + +#define SH_PREFIX pointerhash +#define SH_ELEMENT_TYPE PointerHashEntry +#define SH_KEY_TYPE uintptr_t +#define SH_KEY ptr +#define SH_HASH_KEY(tb, key) hash_pointer(key) +#define SH_EQUAL(tb, a, b) ((a) == (b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +/* Offset hash table */ +static uint32 hash_offset(Size offset) +{ +#if SIZEOF_SIZE_T == 8 + return murmurhash64((uint64)offset); +#else + return murmurhash32((uint32)offset); +#endif +} + +#define SH_PREFIX offsethash +#define SH_ELEMENT_TYPE OffsetHashEntry +#define SH_KEY_TYPE Size +#define SH_KEY offset +#define SH_HASH_KEY(tb, key) hash_offset(key) +#define SH_EQUAL(tb, a, b) ((a) == (b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +typedef union { + pointerhash_hash *pointers; + offsethash_hash *offsets; + tidhash_hash *tids; +} VisitedHash; + +/* + * Get the max number of connections in an upper layer for each element in the index + */ +int HnswGetM(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) + return opts->m; + + return HNSW_DEFAULT_M; +} + +/* + * Get the size of the dynamic candidate list in the index + */ +int HnswGetEfConstruction(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) + return opts->efConstruction; + + return HNSW_DEFAULT_EF_CONSTRUCTION; +} + +/* + * Get whether to enable PQ + */ +bool HnswGetEnablePQ(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->enablePQ; + } + + return HNSW_DEFAULT_ENABLE_PQ; +} + +/* + * Get the number of subquantizer + */ +int HnswGetPqM(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->pqM; + } + + return HNSW_DEFAULT_PQ_M; +} + +/* + * Get the number of centroids for each subquantizer + */ +int HnswGetPqKsub(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->pqKsub; + } + + return HNSW_DEFAULT_PQ_KSUB; +} + +/* + * Get proc + */ +FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Normalize value + */ +Datum HnswNormValue(const HnswTypeInfo *typeInfo, Oid collation, Datum value) +{ + return DirectFunctionCall1Coll(typeInfo->normalize, collation, value); +} + +/* + * Check if non-zero norm + */ +bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value) +{ + return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0; +} + +/* + * New buffer + */ +Buffer HnswNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void HnswInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(HnswPageOpaqueData)); + HnswPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + HnswPageGetOpaque(page)->pageType = HNSW_DEFAULT_PAGE_TYPE; + HnswPageGetOpaque(page)->page_id = HNSW_PAGE_ID; +} + +/* + * Allocate a neighbor array + */ +static HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator *allocator) +{ + HnswNeighborArray *a = (HnswNeighborArray *)HnswAlloc(allocator, HNSW_NEIGHBOR_ARRAY_SIZE(lm)); + + a->length = 0; + a->closerSet = false; + return a; +} + +/* + * Allocate neighbors + */ +void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *allocator) +{ + int level = element->level; + HnswNeighborArrayPtr *neighborList = + (HnswNeighborArrayPtr *)HnswAlloc(allocator, sizeof(HnswNeighborArrayPtr) * (level + 1)); + + HnswPtrStore(base, element->neighbors, neighborList); + + for (int lc = 0; lc <= level; lc++) + HnswPtrStore(base, neighborList[lc], HnswInitNeighborArray(HnswGetLayerM(m, lc), allocator)); +} + +/* + * Allocate memory from the allocator + */ +void *HnswAlloc(HnswAllocator *allocator, Size size) +{ + if (allocator) + return (*(allocator)->alloc)(size, (allocator)->state); + + return palloc(size); +} + +/* + * Allocate an element + */ +HnswElement HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel, HnswAllocator *allocator) +{ + HnswElement element = (HnswElement)HnswAlloc(allocator, sizeof(HnswElementData)); + + int level = static_cast(-log(RandomDouble()) * ml); + /* Cap level */ + if (level > maxLevel) { + level = maxLevel; + } + + element->heaptidsLength = 0; + HnswAddHeapTid(element, heaptid); + + element->level = level; + element->deleted = 0; + + HnswInitNeighbors(base, element, m, allocator); + + HnswPtrStore(base, element->value, (Pointer)NULL); + + return element; +} + +/* + * Add a heap TID to an element + */ +void HnswAddHeapTid(HnswElement element, ItemPointer heaptid) +{ + element->heaptids[element->heaptidsLength++] = *heaptid; +} + +/* + * Allocate an element from block and offset numbers + */ +HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno) +{ + HnswElement element = (HnswElement)palloc(sizeof(HnswElementData)); + char *base = NULL; + + element->blkno = blkno; + element->offno = offno; + HnswPtrStore(base, element->neighbors, (HnswNeighborArrayPtr *)NULL); + HnswPtrStore(base, element->value, (Pointer)NULL); + return element; +} + +/* + * Get the metapage info + */ +void HnswGetMetaPageInfo(Relation index, int *m, HnswElement *entryPoint) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + if (unlikely(metap->magicNumber != HNSW_MAGIC_NUMBER)) + elog(ERROR, "hnsw index is not valid"); + + if (m != NULL) + *m = metap->m; + + if (entryPoint != NULL) { + if (BlockNumberIsValid(metap->entryBlkno)) { + *entryPoint = HnswInitElementFromBlock(metap->entryBlkno, metap->entryOffno); + (*entryPoint)->level = metap->entryLevel; + } else { + *entryPoint = NULL; + } + } + + UnlockReleaseBuffer(buf); +} + +/* + * Get the entry point + */ +HnswElement HnswGetEntryPoint(Relation index) +{ + HnswElement entryPoint; + + HnswGetMetaPageInfo(index, NULL, &entryPoint); + + return entryPoint; +} + +/* + * Update the metapage info + */ +static void HnswUpdateMetaPageInfo(Page page, int updateEntry, HnswElement entryPoint, BlockNumber insertPage) +{ + HnswMetaPage metap = HnswPageGetMeta(page); + + if (updateEntry) { + if (entryPoint == NULL) { + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + } else if (entryPoint->level > metap->entryLevel || updateEntry == HNSW_UPDATE_ENTRY_ALWAYS) { + metap->entryBlkno = entryPoint->blkno; + metap->entryOffno = entryPoint->offno; + metap->entryLevel = entryPoint->level; + } + } + + if (BlockNumberIsValid(insertPage)) + metap->insertPage = insertPage; +} + +/* + * Update the append metapage info + */ +static void HnswUpdateAppendMetaPageInfo(Page page, int updateEntry, HnswElement entryPoint, + BlockNumber eleInsertSlotStartPage, BlockNumber neiInsertSlotStartPage) +{ + HnswAppendMetaPage metap = HnswPageGetAppendMeta(page); + + if (updateEntry) { + if (entryPoint == NULL) { + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + } else if (entryPoint->level > metap->entryLevel || updateEntry == HNSW_UPDATE_ENTRY_ALWAYS) { + metap->entryBlkno = entryPoint->blkno; + metap->entryOffno = entryPoint->offno; + metap->entryLevel = entryPoint->level; + } + } + + if (BlockNumberIsValid(eleInsertSlotStartPage)) { + metap->elementInsertSlot = eleInsertSlotStartPage; + } + + if (BlockNumberIsValid(neiInsertSlotStartPage)) { + metap->neighborInsertSlot = neiInsertSlotStartPage; + } +} + +/* + * Update the metapage + */ +void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, + ForkNumber forkNum, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + + buf = ReadBufferExtended(index, forkNum, HNSW_METAPAGE_BLKNO, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + HnswUpdateMetaPageInfo(page, updateEntry, entryPoint, insertPage); + + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Update the append metapage + */ +void HnswUpdateAppendMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber eleInsertPage, + BlockNumber neiInsertPage, ForkNumber forkNum, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + + buf = ReadBufferExtended(index, forkNum, HNSW_METAPAGE_BLKNO, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + HnswUpdateAppendMetaPageInfo(page, updateEntry, entryPoint, eleInsertPage, neiInsertPage); + + if (building) { + MarkBufferDirty(buf); + } else { + GenericXLogFinish(state); + } + UnlockReleaseBuffer(buf); +} + +/* + * Set element tuple, except for neighbor info + */ +void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element) +{ + Pointer valuePtr = (Pointer)HnswPtrAccess(base, element->value); + errno_t rc = EOK; + + etup->type = HNSW_ELEMENT_TUPLE_TYPE; + etup->level = element->level; + etup->deleted = 0; + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + if (i < element->heaptidsLength) + etup->heaptids[i] = element->heaptids[i]; + else + ItemPointerSetInvalid(&etup->heaptids[i]); + } + rc = memcpy_s(&etup->data, VARSIZE_ANY(valuePtr), valuePtr, VARSIZE_ANY(valuePtr)); + securec_check(rc, "\0", "\0"); +} + +/* + * Set neighbor tuple + */ +void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m) +{ + int idx = 0; + + ntup->type = HNSW_NEIGHBOR_TUPLE_TYPE; + + for (int lc = e->level; lc >= 0; lc--) { + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + int lm = HnswGetLayerM(m, lc); + + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &ntup->indextids[idx++]; + + if (i < neighbors->length) { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + + ItemPointerSet(indextid, hce->blkno, hce->offno); + } else { + ItemPointerSetInvalid(indextid); + } + } + } + + ntup->count = idx; +} + +/* + * Load neighbors from page + */ +static void LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) +{ + char *base = NULL; + + HnswNeighborTuple ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + int neighborCount = (element->level + 2) * m; + + Assert(HnswIsNeighborTuple(ntup)); + + HnswInitNeighbors(base, element, m, NULL); + + /* Ensure expected neighbors */ + if (ntup->count != neighborCount) { + return; + } + + for (int i = 0; i < neighborCount; i++) { + HnswElement e; + int level; + HnswCandidate *hc; + ItemPointer indextid; + HnswNeighborArray *neighbors; + + indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) { + continue; + } + + e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + + /* Calculate level based on offset */ + level = element->level - i / m; + if (level < 0) { + level = 0; + } + + neighbors = HnswGetNeighbors(base, element, level); + hc = &neighbors->items[neighbors->length++]; + HnswPtrStore(base, hc->element, e); + } +} + +/* + * Load neighbors + */ +void HnswLoadNeighbors(HnswElement element, Relation index, int m) +{ + Buffer buf; + Page page; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + LoadNeighborsFromPage(element, index, page, m); + + UnlockReleaseBuffer(buf); +} + +/* + * Load an element from a tuple + */ +void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec) +{ + element->level = etup->level; + element->deleted = etup->deleted; + element->neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + element->neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + element->heaptidsLength = 0; + + if (loadHeaptids) { + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + /* Can stop at first invalid */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + HnswAddHeapTid(element, &etup->heaptids[i]); + } + } + + if (loadVec) { + char *base = NULL; + Datum value = datumCopy(PointerGetDatum(&etup->data), false, -1); + + HnswPtrStore(base, element->value, DatumGetPointer(value)); + } +} + +/* + * Load an element and optionally get its distance from q + */ +bool HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, + bool loadVec, float *maxDistance, IndexScanDesc scan) +{ + Buffer buf; + Page page; + HnswElementTuple etup; + bool needRecheck = false; + bool isVisible = true; + + /* Read vector */ + buf = ReadBuffer(index, element->blkno); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + if (scan != NULL && HnswPageGetOpaque(page)->pageType == HNSW_USTORE_PAGE_TYPE) { + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + so->vs.buf = buf; + isVisible = VecVisibilityCheck(scan, page, element->offno, &needRecheck); + } + + etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, element->offno)); + + Assert(HnswIsElementTuple(etup)); + + /* Calculate distance */ + if (distance != NULL) { + if (DatumGetPointer(*q) == NULL) { + *distance = 0; + } else { + *distance = (float)DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); + } + } + + /* Load element */ + if (distance == NULL || maxDistance == NULL || *distance < *maxDistance) { + HnswLoadElementFromTuple(element, etup, true, loadVec); + } + + UnlockReleaseBuffer(buf); + return isVisible; +} + +/* + * Get the distance for a candidate + */ +static float GetCandidateDistance(char *base, HnswCandidate *hc, Datum q, FmgrInfo *procinfo, Oid collation) +{ + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + Datum value = HnswGetValue(base, hce); + + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, value)); +} + +/* + * Create a candidate for the entry point + */ +HnswCandidate *HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, + Oid collation, bool loadVec, IndexScanDesc scan) +{ + HnswCandidate *hc = (HnswCandidate *)palloc(sizeof(HnswCandidate)); + + HnswPtrStore(base, hc->element, entryPoint); + if (index == NULL) { + hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation); + } else { + bool isVisible = + HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL, scan); + if (!isVisible) { + elog(ERROR, "hnsw entryPoint is invisible\n"); + } + } + return hc; +} + +/* + * Compare candidate distances + */ +static int CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const HnswPairingHeapNode *)a)->inner->distance < ((const HnswPairingHeapNode *)b)->inner->distance) { + return 1; + } + + if (((const HnswPairingHeapNode *)a)->inner->distance > ((const HnswPairingHeapNode *)b)->inner->distance) { + return -1; + } + + return 0; +} + +/* + * Compare candidate distances + */ +static int CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const HnswPairingHeapNode *)a)->inner->distance < ((const HnswPairingHeapNode *)b)->inner->distance) { + return -1; + } + + if (((const HnswPairingHeapNode *)a)->inner->distance > ((const HnswPairingHeapNode *)b)->inner->distance) { + return 1; + } + + return 0; +} + +/* + * Create a pairing heap node for a candidate + */ +static HnswPairingHeapNode *CreatePairingHeapNode(HnswCandidate *c) +{ + HnswPairingHeapNode *node = (HnswPairingHeapNode *)palloc(sizeof(HnswPairingHeapNode)); + + node->inner = c; + return node; +} + +/* + * Init visited + */ +static inline void InitVisited(char *base, VisitedHash *v, Relation index, int ef, int m) +{ + if (index != NULL) { + v->tids = tidhash_create(CurrentMemoryContext, ef * m * 2, NULL); + } else if (base != NULL) { + v->offsets = offsethash_create(CurrentMemoryContext, ef * m * 2, NULL); + } else { + v->pointers = pointerhash_create(CurrentMemoryContext, ef * m * 2, NULL); + } +} + +/* + * Add to visited + */ +static inline void AddToVisited(char *base, VisitedHash *v, HnswCandidate *hc, Relation index, bool *found) +{ + if (index != NULL) { + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + ItemPointerData indextid; + + ItemPointerSet(&indextid, element->blkno, element->offno); + tidhash_insert(v->tids, indextid, found); + } else if (base != NULL) { +#if PG_VERSION_NUM >= 130000 + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + + offsethash_insert_hash(v->offsets, HnswPtrOffset(hc->element), element->hash, found); +#else + offsethash_insert(v->offsets, HnswPtrOffset(hc->element), found); +#endif + } else { +#if PG_VERSION_NUM >= 130000 + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + + pointerhash_insert_hash(v->pointers, (uintptr_t)HnswPtrPointer(hc->element), element->hash, found); +#else + pointerhash_insert(v->pointers, (uintptr_t)HnswPtrPointer(hc->element), found); +#endif + } +} + +/* + * Count element towards ef + */ +static inline bool CountElement(char *base, HnswElement skipElement, HnswCandidate *hc) +{ + HnswElement e; + + if (skipElement == NULL) { + return true; + } + + /* Ensure does not access heaptidsLength during in-memory build */ + pg_memory_barrier(); + + e = (HnswElement)HnswPtrAccess(base, hc->element); + return e->heaptidsLength != 0; +} + +/* + * Algorithm 2 from paper + */ +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, + int m, bool inserting, HnswElement skipElement, IndexScanDesc scan) +{ + List *w = NIL; + pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); + pairingheap *W = pairingheap_allocate(CompareFurthestCandidates, NULL); + int wlen = 0; + VisitedHash v; + ListCell *lc2; + HnswNeighborArray *neighborhoodData = NULL; + Size neighborhoodSize; + bool isVisible = true; + errno_t rc = EOK; + + InitVisited(base, &v, index, ef, m); + + /* Create local memory for neighborhood if needed */ + if (index == NULL) { + neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(HnswGetLayerM(m, lc)); + neighborhoodData = (HnswNeighborArray *)palloc(neighborhoodSize); + } + + /* Add entry points to v, C, and W */ + foreach (lc2, ep) { + HnswCandidate *hc = (HnswCandidate *)lfirst(lc2); + bool found; + + AddToVisited(base, &v, hc, index, &found); + + pairingheap_add(C, &(CreatePairingHeapNode(hc)->ph_node)); + pairingheap_add(W, &(CreatePairingHeapNode(hc)->ph_node)); + + /* + * Do not count elements being deleted towards ef when vacuuming. It + * would be ideal to do this for inserts as well, but this could + * affect insert performance. + */ + if (CountElement(base, skipElement, hc)) + wlen++; + } + + while (!pairingheap_is_empty(C)) { + HnswNeighborArray *neighborhood; + HnswCandidate *c = ((HnswPairingHeapNode *)pairingheap_remove_first(C))->inner; + HnswCandidate *f = ((HnswPairingHeapNode *)pairingheap_first(W))->inner; + HnswElement cElement; + + if (c->distance > f->distance) + break; + + cElement = (HnswElement)HnswPtrAccess(base, c->element); + if (HnswPtrIsNull(base, cElement->neighbors)) + HnswLoadNeighbors(cElement, index, m); + + /* Get the neighborhood at layer lc */ + neighborhood = HnswGetNeighbors(base, cElement, lc); + + /* Copy neighborhood to local memory if needed */ + if (index == NULL) { + LWLockAcquire(&cElement->lock, LW_SHARED); + rc = memcpy_s(neighborhoodData, neighborhoodSize, neighborhood, neighborhoodSize); + securec_check(rc, "\0", "\0"); + LWLockRelease(&cElement->lock); + neighborhood = neighborhoodData; + } + + for (int i = 0; i < neighborhood->length; i++) { + HnswCandidate *e = &neighborhood->items[i]; + bool visited; + + AddToVisited(base, &v, e, index, &visited); + + if (!visited) { + float eDistance; + HnswElement eElement = (HnswElement)HnswPtrAccess(base, e->element); + bool alwaysAdd = wlen < ef; + + f = ((HnswPairingHeapNode *)pairingheap_first(W))->inner; + + if (index == NULL) { + eDistance = GetCandidateDistance(base, e, q, procinfo, collation); + } else { + isVisible = HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, + alwaysAdd ? NULL : &f->distance, scan); + } + if (!isVisible) { + continue; + } + + if (eDistance < f->distance || alwaysAdd) { + HnswCandidate *ec; + + Assert(!eElement->deleted); + + /* Make robust to issues */ + if (eElement->level < lc) + continue; + + /* Copy e */ + ec = (HnswCandidate *)palloc(sizeof(HnswCandidate)); + HnswPtrStore(base, ec->element, eElement); + ec->distance = eDistance; + + pairingheap_add(C, &(CreatePairingHeapNode(ec)->ph_node)); + pairingheap_add(W, &(CreatePairingHeapNode(ec)->ph_node)); + + /* + * Do not count elements being deleted towards ef when + * vacuuming. It would be ideal to do this for inserts as + * well, but this could affect insert performance. + */ + if (CountElement(base, skipElement, e)) { + wlen++; + + /* No need to decrement wlen */ + if (wlen > ef) + pairingheap_remove_first(W); + } + } + } + } + } + + /* Add each element of W to w */ + while (!pairingheap_is_empty(W)) { + HnswCandidate *hc = ((HnswPairingHeapNode *)pairingheap_remove_first(W))->inner; + + w = lcons(hc, w); + } + + return w; +} + +/* + * Compare candidate distances with pointer tie-breaker + */ +static int +#if PG_VERSION_NUM >= 130000 + CompareCandidateDistances(const ListCell *a, const ListCell *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(b); +#else + CompareCandidateDistances(const void *a, const void *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(*(ListCell **)a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(*(ListCell **)b); +#endif + + if (hca->distance < hcb->distance) { + return 1; + } + + if (hca->distance > hcb->distance) { + return -1; + } + + if (HnswPtrPointer(hca->element) < HnswPtrPointer(hcb->element)) { + return 1; + } + + if (HnswPtrPointer(hca->element) > HnswPtrPointer(hcb->element)) { + return -1; + } + + return 0; +} + +/* + * Compare candidate distances with offset tie-breaker + */ +static int +#if PG_VERSION_NUM >= 130000 + CompareCandidateDistancesOffset(const ListCell *a, const ListCell *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(b); +#else + CompareCandidateDistancesOffset(const void *a, const void *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(*(ListCell **)a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(*(ListCell **)b); +#endif + + if (hca->distance < hcb->distance) { + return 1; + } + + if (hca->distance > hcb->distance) { + return -1; + } + + if (HnswPtrOffset(hca->element) < HnswPtrOffset(hcb->element)) { + return 1; + } + + if (HnswPtrOffset(hca->element) > HnswPtrOffset(hcb->element)) { + return -1; + } + + return 0; +} + +/* + * Calculate the distance between elements + */ +static float HnswGetDistance(char *base, HnswElement a, HnswElement b, FmgrInfo *procinfo, Oid collation) +{ + Datum aValue = HnswGetValue(base, a); + Datum bValue = HnswGetValue(base, b); + + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, aValue, bValue)); +} + +/* + * Check if an element is closer to q than any element from R + */ +static bool CheckElementCloser(char *base, HnswCandidate *e, List *r, FmgrInfo *procinfo, Oid collation) +{ + HnswElement eElement = (HnswElement)HnswPtrAccess(base, e->element); + ListCell *lc2; + + foreach (lc2, r) { + HnswCandidate *ri = (HnswCandidate *)lfirst(lc2); + HnswElement riElement = (HnswElement)HnswPtrAccess(base, ri->element); + float distance = HnswGetDistance(base, eElement, riElement, procinfo, collation); + + if (distance <= e->distance) { + return false; + } + } + + return true; +} + +/* + * Algorithm 4 from paper + */ +static List *SelectNeighbors(char *base, List *c, int lm, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, + HnswCandidate *newCandidate, HnswCandidate **pruned, bool sortCandidates) +{ + List *r = NIL; + List *w = list_copy(c); + HnswCandidate **wd; + int wdlen = 0; + int wdoff = 0; + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e2, lc); + bool mustCalculate = !neighbors->closerSet; + List *added = NIL; + bool removedAny = false; + + if (list_length(w) <= lm) { + return w; + } + + wd = (HnswCandidate **)palloc(sizeof(HnswCandidate *) * list_length(w)); + + /* Ensure order of candidates is deterministic for closer caching */ + if (sortCandidates) { + if (base == NULL) { + list_sort(w, CompareCandidateDistances); + } else { + list_sort(w, CompareCandidateDistancesOffset); + } + } + + while (list_length(w) > 0 && list_length(r) < lm) { + /* Assumes w is already ordered desc */ + HnswCandidate *e = (HnswCandidate *)linitial(w); + + w = list_delete_first(w); + + /* Use previous state of r and wd to skip work when possible */ + if (mustCalculate) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + } else if (list_length(added) > 0) { + /* Keep Valgrind happy for in-memory, parallel builds */ + if (base != NULL) { + VALGRIND_MAKE_MEM_DEFINED(&e->closer, 1); + } + + /* + * If the current candidate was closer, we only need to compare it + * with the other candidates that we have added. + */ + if (e->closer) { + e->closer = CheckElementCloser(base, e, added, procinfo, collation); + + if (!e->closer) { + removedAny = true; + } + } else { + /* + * If we have removed any candidates from closer, a candidate + * that was not closer earlier might now be. + */ + if (removedAny) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + if (e->closer) { + added = lappend(added, e); + } + } + } + } else if (e == newCandidate) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + if (e->closer) { + added = lappend(added, e); + } + } + + /* Keep Valgrind happy for in-memory, parallel builds */ + if (base != NULL) { + VALGRIND_MAKE_MEM_DEFINED(&e->closer, 1); + } + + if (e->closer) { + r = lappend(r, e); + } else { + wd[wdlen++] = e; + } + } + + /* Cached value can only be used in future if sorted deterministically */ + neighbors->closerSet = sortCandidates; + + /* Keep pruned connections */ + while (wdoff < wdlen && list_length(r) < lm) { + r = lappend(r, wd[wdoff++]); + } + + /* Return pruned for update connections */ + if (pruned != NULL) { + if (wdoff < wdlen) { + *pruned = wd[wdoff]; + } else { + *pruned = (HnswCandidate *)linitial(w); + } + } + + return r; +} + +/* + * Add connections + */ +static void AddConnections(char *base, HnswElement element, List *neighbors, int lc) +{ + ListCell *lc2; + HnswNeighborArray *a = HnswGetNeighbors(base, element, lc); + + foreach (lc2, neighbors) + a->items[a->length++] = *((HnswCandidate *)lfirst(lc2)); +} + +/* + * Update connections + */ +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate *hc, int lm, int lc, int *updateIdx, + Relation index, FmgrInfo *procinfo, Oid collation) +{ + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + HnswNeighborArray *currentNeighbors = HnswGetNeighbors(base, hce, lc); + HnswCandidate hc2; + + HnswPtrStore(base, hc2.element, element); + hc2.distance = hc->distance; + + if (currentNeighbors->length < lm) { + currentNeighbors->items[currentNeighbors->length++] = hc2; + + /* Track update */ + if (updateIdx != NULL) { + *updateIdx = -2; + } + } else { + /* Shrink connections */ + HnswCandidate *pruned = NULL; + + /* Load elements on insert */ + if (index != NULL) { + Datum q = HnswGetValue(base, hce); + + for (int i = 0; i < currentNeighbors->length; i++) { + HnswCandidate *hc3 = ¤tNeighbors->items[i]; + HnswElement hc3Element = (HnswElement)HnswPtrAccess(base, hc3->element); + + if (HnswPtrIsNull(base, hc3Element->value)) + HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); + else + hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation); + + /* Prune element if being deleted */ + if (hc3Element->heaptidsLength == 0) { + pruned = ¤tNeighbors->items[i]; + break; + } + } + } + + if (pruned == NULL) { + List *c = NIL; + + /* Add candidates */ + for (int i = 0; i < currentNeighbors->length; i++) { + c = lappend(c, ¤tNeighbors->items[i]); + } + c = lappend(c, &hc2); + + SelectNeighbors(base, c, lm, lc, procinfo, collation, hce, &hc2, &pruned, true); + + /* Should not happen */ + if (pruned == NULL) + return; + } + + /* Find and replace the pruned element */ + for (int i = 0; i < currentNeighbors->length; i++) { + if (HnswPtrEqual(base, currentNeighbors->items[i].element, pruned->element)) { + currentNeighbors->items[i] = hc2; + + /* Track update */ + if (updateIdx != NULL) { + *updateIdx = i; + } + + break; + } + } + } +} + +/* + * Remove elements being deleted or skipped + */ +static List *RemoveElements(char *base, List *w, HnswElement skipElement) +{ + ListCell *lc2; + List *w2 = NIL; + + /* Ensure does not access heaptidsLength during in-memory build */ + pg_memory_barrier(); + + foreach (lc2, w) { + HnswCandidate *hc = (HnswCandidate *)lfirst(lc2); + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + + /* Skip self for vacuuming update */ + if (skipElement != NULL && hce->blkno == skipElement->blkno && hce->offno == skipElement->offno) { + continue; + } + + if (hce->heaptidsLength != 0) { + w2 = lappend(w2, hc); + } + } + + return w2; +} + +#if PG_VERSION_NUM >= 130000 +/* + * Precompute hash + */ +static void PrecomputeHash(char *base, HnswElement element) +{ + HnswElementPtr ptr; + + HnswPtrStore(base, ptr, element); + + if (base == NULL) { + element->hash = hash_pointer((uintptr_t)HnswPtrPointer(ptr)); + } else { + element->hash = hash_offset(HnswPtrOffset(ptr)); + } +} +#endif + +/* + * Algorithm 1 from paper + */ +void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, + FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing) +{ + List *ep; + List *w; + int level = element->level; + int entryLevel; + Datum q = HnswGetValue(base, element); + HnswElement skipElement = existing ? element : NULL; + +#if PG_VERSION_NUM >= 130000 + /* Precompute hash */ + if (index == NULL) + PrecomputeHash(base, element); +#endif + + /* No neighbors if no entry point */ + if (entryPoint == NULL) + return; + + /* Get entry point and level */ + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, true)); + entryLevel = entryPoint->level; + + /* 1st phase: greedy search to insert level */ + for (int lc = entryLevel; lc >= level + 1; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, true, skipElement); + ep = w; + } + + if (level > entryLevel) { + level = entryLevel; + } + + /* Add one for existing element */ + if (existing) { + efConstruction++; + } + /* 2nd phase */ + for (int lc = level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + List *neighbors; + List *lw; + + w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, procinfo, collation, m, true, skipElement); + + /* Elements being deleted or skipped can help with search */ + /* but should be removed before selecting neighbors */ + if (index != NULL) + lw = RemoveElements(base, w, skipElement); + else + lw = w; + + /* + * Candidates are sorted, but not deterministically. Could set + * sortCandidates to true for in-memory builds to enable closer + * caching, but there does not seem to be a difference in performance. + */ + neighbors = SelectNeighbors(base, lw, lm, lc, procinfo, collation, element, NULL, NULL, false); + + AddConnections(base, element, neighbors, lc); + + ep = w; + } +} + +static void SparsevecCheckValue(Pointer v) +{ + SparseVector *vec = (SparseVector *)v; + + if (vec->nnz > HNSW_MAX_NNZ) { + elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ); + } +} + +/* + * Get type info + */ +const HnswTypeInfo *HnswGetTypeInfo(Relation index) +{ + FmgrInfo *procinfo = HnswOptionalProcInfo(index, HNSW_TYPE_INFO_PROC); + + if (procinfo == NULL) { + static const HnswTypeInfo typeInfo = { + .maxDimensions = HNSW_MAX_DIM, .normalize = l2_normalize, .checkValue = NULL}; + + return (&typeInfo); + } else { + return (const HnswTypeInfo *)DatumGetPointer(OidFunctionCall0Coll(procinfo->fn_oid, InvalidOid)); + } +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_halfvec_support); +Datum hnsw_halfvec_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = { + .maxDimensions = HNSW_MAX_DIM * 2, .normalize = halfvec_l2_normalize, .checkValue = NULL}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_bit_support); +Datum hnsw_bit_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = {.maxDimensions = HNSW_MAX_DIM * 32, .normalize = NULL, .checkValue = NULL}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_sparsevec_support); +Datum hnsw_sparsevec_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = { + .maxDimensions = SPARSEVEC_MAX_DIM, .normalize = sparsevec_l2_normalize, .checkValue = SparsevecCheckValue}; + + PG_RETURN_POINTER(&typeInfo); +}; diff --git a/src/gausskernel/storage/access/datavec/hnswvacuum.cpp b/src/gausskernel/storage/access/datavec/hnswvacuum.cpp new file mode 100644 index 0000000000..c67bae483a --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswvacuum.cpp @@ -0,0 +1,606 @@ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "commands/vacuum.h" +#include "access/datavec/hnsw.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Check if deleted list contains an index TID + */ +static bool DeletedContains(tidhash_hash *deleted, ItemPointer indextid) +{ + return tidhash_lookup(deleted, *indextid) != NULL; +} + +/* + * Remove deleted heap TIDs + * + * OK to remove for entry point, since always considered for searches and inserts + */ +static void RemoveHeapTids(HnswVacuumState *vacuumstate) +{ + BlockNumber blkno = HNSW_HEAD_BLKNO; + HnswElement highestPoint = &vacuumstate->highestPoint; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + HnswElement entryPoint = HnswGetEntryPoint(vacuumstate->index); + IndexBulkDeleteResult *stats = vacuumstate->stats; + + /* Store separately since highestPoint.level is uint8 */ + int highestLevel = -1; + + /* Initialize highest point */ + highestPoint->blkno = InvalidBlockNumber; + highestPoint->offno = InvalidOffsetNumber; + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + bool updated = false; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Iterate over nodes */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + int idx = 0; + bool itemUpdated = false; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (ItemPointerIsValid(&etup->heaptids[0])) { + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + /* Stop at first unused */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + if (vacuumstate->callback(&etup->heaptids[i], vacuumstate->callbackState, InvalidOid, + InvalidBktId)) { + itemUpdated = true; + stats->tuples_removed++; + } else { + /* Move to front of list */ + etup->heaptids[idx++] = etup->heaptids[i]; + stats->num_index_tuples++; + } + } + + if (itemUpdated) { + /* Mark rest as invalid */ + for (int i = idx; i < HNSW_HEAPTIDS; i++) + ItemPointerSetInvalid(&etup->heaptids[i]); + + updated = true; + } + } + + if (!ItemPointerIsValid(&etup->heaptids[0])) { + ItemPointerData ip; + bool found; + + /* Add to deleted list */ + ItemPointerSet(&ip, blkno, offno); + + tidhash_insert(vacuumstate->deleted, ip, &found); + Assert(!found); + } else if (etup->level > highestLevel && + !(entryPoint != NULL && blkno == entryPoint->blkno && offno == entryPoint->offno)) { + /* Keep track of highest non-entry point */ + highestPoint->blkno = blkno; + highestPoint->offno = offno; + highestPoint->level = etup->level; + highestLevel = etup->level; + } + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + if (updated) + GenericXLogFinish(state); + else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } +} + +/* + * Check for deleted neighbors + */ +static bool NeedsUpdated(HnswVacuumState *vacuumstate, HnswElement element) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + Buffer buf; + Page page; + HnswNeighborTuple ntup; + bool needsUpdated = false; + + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + + Assert(HnswIsNeighborTuple(ntup)); + + /* Check neighbors */ + for (int i = 0; i < ntup->count; i++) { + ItemPointer indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + /* Check if in deleted list */ + if (DeletedContains(vacuumstate->deleted, indextid)) { + needsUpdated = true; + break; + } + } + + /* Also update if layer 0 is not full */ + /* This could indicate too many candidates being deleted during insert */ + if (!needsUpdated) + needsUpdated = !ItemPointerIsValid(&ntup->indextids[ntup->count - 1]); + + UnlockReleaseBuffer(buf); + + return needsUpdated; +} + +/* + * Repair graph for a single element + */ +static void RepairGraphElement(HnswVacuumState *vacuumstate, HnswElement element, HnswElement entryPoint) +{ + Relation index = vacuumstate->index; + Buffer buf; + Page page; + GenericXLogState *state; + int m = vacuumstate->m; + int efConstruction = vacuumstate->efConstruction; + FmgrInfo *procinfo = vacuumstate->procinfo; + Oid collation = vacuumstate->collation; + BufferAccessStrategy bas = vacuumstate->bas; + HnswNeighborTuple ntup = vacuumstate->ntup; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); + char *base = NULL; + + /* Skip if element is entry point */ + if (entryPoint != NULL && element->blkno == entryPoint->blkno && element->offno == entryPoint->offno) { + return; + } + + /* Init fields */ + HnswInitNeighbors(base, element, m, NULL); + element->heaptidsLength = 0; + + /* Find neighbors for element, skipping itself */ + HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, m, efConstruction, true); + + /* Zero memory for each element */ + MemSet(ntup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Update neighbor tuple */ + /* Do this before getting page to minimize locking */ + HnswSetNeighborTuple(base, ntup, element, m); + + /* Get neighbor page */ + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + /* Overwrite tuple */ + if (!page_index_tuple_overwrite(page, element->neighborOffno, (Item)ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + /* Update neighbors */ + HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, true, false); +} + +/* + * Repair graph entry point + */ +static void RepairGraphEntryPoint(HnswVacuumState *vacuumstate) +{ + Relation index = vacuumstate->index; + HnswElement highestPoint = &vacuumstate->highestPoint; + HnswElement entryPoint; + MemoryContext oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + if (!BlockNumberIsValid(highestPoint->blkno)) + highestPoint = NULL; + + /* + * Repair graph for highest non-entry point. Highest point may be outdated + * due to inserts that happen during and after RemoveHeapTids. + */ + if (highestPoint != NULL) { + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, ShareLock); + + /* Load element */ + HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + + /* Repair if needed */ + if (NeedsUpdated(vacuumstate, highestPoint)) + RepairGraphElement(vacuumstate, highestPoint, HnswGetEntryPoint(index)); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ShareLock); + } + + /* Prevent concurrent inserts when possibly updating entry point */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Get latest entry point */ + entryPoint = HnswGetEntryPoint(index); + if (entryPoint != NULL) { + ItemPointerData epData; + + ItemPointerSet(&epData, entryPoint->blkno, entryPoint->offno); + + if (DeletedContains(vacuumstate->deleted, &epData)) { + /* + * Replace the entry point with the highest point. If highest + * point is outdated and empty, the entry point will be empty + * until an element is repaired. + */ + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, highestPoint, InvalidBlockNumber, MAIN_FORKNUM, false); + } else { + /* + * Repair the entry point with the highest point. If highest point + * is outdated, this can remove connections at higher levels in + * the graph until they are repaired, but this should be fine. + */ + HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + + if (NeedsUpdated(vacuumstate, entryPoint)) { + /* Reset neighbors from previous update */ + if (highestPoint != NULL) + HnswPtrStore((char *)NULL, highestPoint->neighbors, (HnswNeighborArrayPtr *)NULL); + + RepairGraphElement(vacuumstate, entryPoint, highestPoint); + } + } + } + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); +} + +/* + * Repair graph for all elements + */ +static void RepairGraph(HnswVacuumState *vacuumstate) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + BlockNumber blkno = HNSW_HEAD_BLKNO; + + /* + * Wait for inserts to complete. Inserts before this point may have + * neighbors about to be deleted. Inserts after this point will not. + */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Repair entry point first */ + RepairGraphEntryPoint(vacuumstate); + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + OffsetNumber offno; + OffsetNumber maxoffno; + List *elements = NIL; + ListCell *lc2; + MemoryContext oldCtx; + + vacuum_delay_point(); + + oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Load items into memory to minimize locking */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + HnswElement element; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip updating neighbors if being deleted */ + if (!ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Create an element */ + element = HnswInitElementFromBlock(blkno, offno); + HnswLoadElementFromTuple(element, etup, false, true); + + elements = lappend(elements, element); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + + /* Update neighbor pages */ + foreach (lc2, elements) { + HnswElement element = (HnswElement)lfirst(lc2); + HnswElement entryPoint; + LOCKMODE lockmode = ShareLock; + + /* Check if any neighbors point to deleted values */ + if (!NeedsUpdated(vacuumstate, element)) + continue; + + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Refresh entry point for each element */ + entryPoint = HnswGetEntryPoint(index); + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + /* Repair connections */ + RepairGraphElement(vacuumstate, element, entryPoint); + + /* + * Update metapage if needed. Should only happen if entry point + * was replaced and highest point was outdated. + */ + if (entryPoint == NULL || element->level > entryPoint->level) + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, false); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + } + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); + } +} + +/* + * Mark items as deleted + */ +static void MarkDeleted(HnswVacuumState *vacuumstate) +{ + BlockNumber blkno = HNSW_HEAD_BLKNO; + BlockNumber insertPage = InvalidBlockNumber; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + + /* + * Wait for index scans to complete. Scans before this point may contain + * tuples about to be deleted. Scans after this point will not, since the + * graph has been repaired. + */ + LockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are pinned by + * other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Update element and neighbors together */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + BlockNumber neighborPage; + OffsetNumber neighborOffno; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip deleted tuples */ + if (etup->deleted) { + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + continue; + } + + /* Skip live tuples */ + if (ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Get neighbor page */ + neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + + if (neighborPage == blkno) { + nbuf = buf; + npage = page; + } else { + nbuf = ReadBufferExtended(index, MAIN_FORKNUM, neighborPage, RBM_NORMAL, bas); + LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE); + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + } + + ntup = (HnswNeighborTuple)PageGetItem(npage, PageGetItemId(npage, neighborOffno)); + + /* Overwrite element */ + etup->deleted = 1; + MemSet(&etup->data, 0, VARSIZE_ANY(&etup->data)); + + /* Overwrite neighbors */ + for (int i = 0; i < ntup->count; i++) + ItemPointerSetInvalid(&ntup->indextids[i]); + + /* + * We modified the tuples in place, no need to call + * page_index_tuple_overwrite + */ + + /* Commit */ + GenericXLogFinish(state); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + /* Prepare new xlog */ + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } + + /* Update insert page last, after everything has been marked as deleted */ + HnswUpdateMetaPage(index, 0, NULL, insertPage, MAIN_FORKNUM, false); +} + +/* + * Initialize the vacuum state + */ +static void InitVacuumState(HnswVacuumState *vacuumstate, IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + Relation index = info->index; + + if (stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + vacuumstate->index = index; + vacuumstate->stats = stats; + vacuumstate->callback = callback; + vacuumstate->callbackState = callbackState; + vacuumstate->efConstruction = HnswGetEfConstruction(index); + vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD); + vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + vacuumstate->collation = index->rd_indcollation[0]; + vacuumstate->ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + vacuumstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw vacuum temporary context", ALLOCSET_DEFAULT_SIZES); + + /* Get m from metapage */ + HnswGetMetaPageInfo(index, &vacuumstate->m, NULL); + + /* Create hash table */ + vacuumstate->deleted = tidhash_create(CurrentMemoryContext, 256, NULL); +} + +/* + * Free resources + */ +static void FreeVacuumState(HnswVacuumState *vacuumstate) +{ + tidhash_destroy(vacuumstate->deleted); + FreeAccessStrategy(vacuumstate->bas); + pfree(vacuumstate->ntup); + MemoryContextDelete(vacuumstate->tmpCtx); +} + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult *hnswbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + HnswVacuumState vacuumstate; + + InitVacuumState(&vacuumstate, info, stats, callback, callbackState); + + /* Pass 1: Remove heap TIDs */ + RemoveHeapTids(&vacuumstate); + + /* Pass 2: Repair graph */ + RepairGraph(&vacuumstate); + + /* Pass 3: Mark as deleted */ + MarkDeleted(&vacuumstate); + + FreeVacuumState(&vacuumstate); + + return vacuumstate.stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult *hnswvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/src/gausskernel/storage/access/datavec/ivfbuild.cpp b/src/gausskernel/storage/access/datavec/ivfbuild.cpp new file mode 100644 index 0000000000..3c4a8f4323 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfbuild.cpp @@ -0,0 +1,881 @@ +#include "postgres.h" + +#include + +#include "access/tableam.h" +#include "access/xact.h" +#include "access/datavec/bitvec.h" +#include "catalog/index.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "storage/buf/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/memutils.h" +#include "access/datavec/vector.h" +#include "postmaster/bgworker.h" +#include "commands/vacuum.h" + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_progress.h" +#else +#include "pgstat.h" +#endif + +#if PG_VERSION_NUM >= 130000 +#define CALLBACK_ITEM_POINTER ItemPointer tid +#else +#define CALLBACK_ITEM_POINTER HeapTuple hup +#endif + +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_status.h" +#include "utils/wait_event.h" +#endif + +#define PARALLEL_KEY_IVFFLAT_SHARED UINT64CONST(0xA000000000000001) +#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002) +#define PARALLEL_KEY_IVFFLAT_CENTERS UINT64CONST(0xA000000000000003) +#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) + +/* + * Add sample + */ +static void AddSample(Datum *values, IvfflatBuildState *buildstate) +{ + VectorArray samples = buildstate->samples; + int targsamples = samples->maxlen; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* + * Normalize with KMEANS_NORM_PROC since spherical distance function + * expects unit vectors + */ + if (buildstate->kmeansnormprocinfo != NULL) { + if (!IvfflatCheckNorm(buildstate->kmeansnormprocinfo, buildstate->collation, value)) { + return; + } + + value = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); + } + + if (samples->length < targsamples) { + VectorArraySet(samples, samples->length, DatumGetPointer(value)); + samples->length++; + } else { + if (buildstate->rowstoskip < 0) { + buildstate->rowstoskip = anl_get_next_S(samples->length, targsamples, &buildstate->rstate); + } + + if (buildstate->rowstoskip <= 0) { + int k = static_cast(targsamples * anl_random_fract()); + Assert(k >= 0 && k < targsamples); + VectorArraySet(samples, k, DatumGetPointer(value)); + } + + buildstate->rowstoskip -= 1; + } +} + +/* + * Callback for sampling + */ +static void SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *)state; + MemoryContext oldCtx; + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add sample */ + AddSample(values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Sample rows with same logic as ANALYZE + */ +static void SampleRows(IvfflatBuildState *buildstate) +{ + int targsamples = buildstate->samples->maxlen; + BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap); + + buildstate->rowstoskip = -1; + + BlockSampler_Init(&buildstate->bs, totalblocks, targsamples); + + buildstate->rstate = anl_init_selection_state(targsamples); + while (BlockSampler_HasMore(&buildstate->bs)) { + BlockNumber targblock = BlockSampler_Next(&buildstate->bs); + + tableam_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, false, SampleCallback, + (void *)buildstate, NULL, targblock, 1); + } +} + +/* + * Add tuple to sort + */ +static void AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState *buildstate) +{ + double distance; + double minDistance = DBL_MAX; + int closestCenter = 0; + VectorArray centers = buildstate->centers; + TupleTableSlot *slot = buildstate->slot; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) { + if (!IvfflatCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) { + return; + } + + value = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); + } + + /* Find the list that minimizes the distance */ + for (int i = 0; i < centers->length; i++) { + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, value, + PointerGetDatum(VectorArrayGet(centers, i)))); + if (distance < minDistance) { + minDistance = distance; + closestCenter = i; + } + } + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia += minDistance; + buildstate->listSums[closestCenter] += minDistance; + buildstate->listCounts[closestCenter]++; +#endif + + /* Create a virtual tuple */ + ExecClearTuple(slot); + slot->tts_values[0] = Int32GetDatum(closestCenter); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(tid); + slot->tts_isnull[1] = false; + slot->tts_values[2] = value; + slot->tts_isnull[2] = false; + ExecStoreVirtualTuple(slot); + + /* + * Add tuple to sort + * + * tuplesort_puttupleslot comment: Input data is always copied; the caller + * need not save it. + */ + tuplesort_puttupleslot(buildstate->sortstate, slot); + + buildstate->indtuples++; +} + +/* + * Callback for table_index_build_scan + */ +static void BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *)state; + MemoryContext oldCtx; + +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &hup->t_self; +#endif + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add tuple to sort */ + AddTupleToSort(index, tid, values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Get index tuple from sort state + */ +static inline void GetNextTuple(Tuplesortstate *sortstate, TupleDesc tupdesc, TupleTableSlot *slot, IndexTuple *itup, + int *list) +{ + Datum value; + bool isnull; + + if (tuplesort_gettupleslot(sortstate, true, slot, NULL)) { + *list = DatumGetInt32(heap_slot_getattr(slot, 1, &isnull)); + value = heap_slot_getattr(slot, 3, &isnull); + + /* Form the index tuple */ + *itup = index_form_tuple(tupdesc, &value, &isnull); + (*itup)->t_tid = *((ItemPointer)DatumGetPointer(heap_slot_getattr(slot, 2, &isnull))); + } else { + *list = -1; + } +} + +/* + * Create initial entry pages + */ +static void InsertTuples(Relation index, IvfflatBuildState *buildstate, ForkNumber forkNum) +{ + int list; + IndexTuple itup = NULL; /* silence compiler warning */ + int64 inserted = 0; + + TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->tupdesc); + TupleDesc tupdesc = RelationGetDescr(index); + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + + for (int i = 0; i < buildstate->centers->length; i++) { + Buffer buf; + Page page; + GenericXLogState *state; + BlockNumber startPage; + BlockNumber insertPage; + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + startPage = BufferGetBlockNumber(buf); + + /* Get all tuples for list */ + while (list == i) { + /* Check for free space */ + Size itemsz = MAXALIGN(IndexTupleSize(itup)); + if (PageGetFreeSpace(page) < itemsz) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + /* Add the item */ + if (PageAddItem(page, (Item)itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + pfree(itup); + + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++inserted); + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + } + + insertPage = BufferGetBlockNumber(buf); + + IvfflatCommitBuffer(buf, state); + + /* Set the start and insert pages */ + IvfflatUpdateList(index, buildstate->listInfo[i], insertPage, InvalidBlockNumber, startPage, forkNum); + } +} + +/* + * Initialize the build state + */ +static void InitBuildState(IvfflatBuildState *buildstate, Relation heap, Relation index, IndexInfo *indexInfo) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + buildstate->typeInfo = IvfflatGetTypeInfo(index); + + buildstate->lists = IvfflatGetLists(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Disallow varbit since require fixed dimensions */ + if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) + elog(ERROR, "type not supported for ivfflat index"); + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) + elog(ERROR, "column does not have dimensions"); + + if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) + elog(ERROR, "column cannot have more than %d dimensions for ivfflat index", + buildstate->typeInfo->maxDimensions); + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + buildstate->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + buildstate->kmeansnormprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + buildstate->collation = index->rd_indcollation[0]; + + /* Require more than one dimension for spherical k-means */ + if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1) + elog(ERROR, "dimensions must be greater than one for this opclass"); + + /* Create tuple description for sorting */ + buildstate->tupdesc = CreateTemplateTupleDesc(3, false); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)1, "list", INT4OID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)2, "tid", TIDOID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)3, "vector", RelationGetDescr(index)->attrs[0].atttypid, -1, 0); + + buildstate->slot = MakeSingleTupleTableSlot(buildstate->tupdesc); + + buildstate->centers = VectorArrayInit(buildstate->lists, buildstate->dimensions, + buildstate->typeInfo->itemSize(buildstate->dimensions)); + buildstate->listInfo = (ListInfo *)palloc(sizeof(ListInfo) * buildstate->lists); + + buildstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat build temporary context", ALLOCSET_DEFAULT_SIZES); + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = 0; + buildstate->listSums = palloc0(sizeof(double) * buildstate->lists); + buildstate->listCounts = palloc0(sizeof(int) * buildstate->lists); +#endif + buildstate->ivfleader = NULL; +} + +/* + * Free resources + */ +static void FreeBuildState(IvfflatBuildState *buildstate) +{ + VectorArrayFree(buildstate->centers); + pfree(buildstate->listInfo); + +#ifdef IVFFLAT_KMEANS_DEBUG + pfree(buildstate->listSums); + pfree(buildstate->listCounts); +#endif + + MemoryContextDelete(buildstate->tmpCtx); +} + +/* + * Compute centers + */ +static void ComputeCenters(IvfflatBuildState *buildstate) +{ + int numSamples; + + /* Target 50 samples per list, with at least 10000 samples */ + /* The number of samples has a large effect on index build time */ + numSamples = buildstate->lists * 50; + if (numSamples < 10000) { + numSamples = 10000; + } + + /* Skip samples for unlogged table */ + if (buildstate->heap == NULL) { + numSamples = 1; + } + + /* Sample rows */ + /* TODO Ensure within maintenance_work_mem */ + buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions, buildstate->centers->itemsize); + if (buildstate->heap != NULL) { + SampleRows(buildstate); + if (buildstate->samples->length < buildstate->lists) { + ereport(NOTICE, (errmsg("ivfflat index created with little data"), errdetail("This will cause low recall."), + errhint("Drop the index until the table has more data."))); + } + } + + /* Calculate centers */ + IvfflatBench("k-means", + IvfflatKmeans(buildstate->index, buildstate->samples, buildstate->centers, buildstate->typeInfo)); + + /* Free samples before we allocate more memory */ + VectorArrayFree(buildstate->samples); +} + +/* + * Create the metapage + */ +static void CreateMetaPage(Relation index, int dimensions, int lists, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatMetaPage metap; + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + /* Set metapage data */ + metap = IvfflatPageGetMeta(page); + metap->magicNumber = IVFFLAT_MAGIC_NUMBER; + metap->version = IVFFLAT_VERSION; + metap->dimensions = dimensions; + metap->lists = lists; + ((PageHeader)page)->pd_lower = ((char *)metap + sizeof(IvfflatMetaPageData)) - (char *)page; + + IvfflatCommitBuffer(buf, state); +} + +/* + * Create list pages + */ +static void CreateListPages(Relation index, VectorArray centers, int dimensions, int lists, ForkNumber forkNum, + ListInfo **listInfo) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size listSize; + IvfflatList list; + errno_t rc = EOK; + + listSize = MAXALIGN(IVFFLAT_LIST_SIZE(centers->itemsize)); + list = (IvfflatList)palloc0(listSize); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + for (int i = 0; i < lists; i++) { + OffsetNumber offno; + + /* Zero memory for each list */ + MemSet(list, 0, listSize); + + /* Load list */ + list->startPage = InvalidBlockNumber; + list->insertPage = InvalidBlockNumber; + rc = memcpy_s(&list->center, VARSIZE_ANY(VectorArrayGet(centers, i)), VectorArrayGet(centers, i), VARSIZE_ANY(VectorArrayGet(centers, i))); + securec_check(rc, "\0", "\0"); + + /* Ensure free space */ + if (PageGetFreeSpace(page) < listSize) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + /* Add the item */ + offno = PageAddItem(page, (Item)list, listSize, InvalidOffsetNumber, false, false); + if (offno == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Save location info */ + (*listInfo)[i].blkno = BufferGetBlockNumber(buf); + (*listInfo)[i].offno = offno; + } + + IvfflatCommitBuffer(buf, state); + + pfree(list); +} + +#ifdef IVFFLAT_KMEANS_DEBUG +/* + * Print k-means metrics + */ +static void PrintKmeansMetrics(IvfflatBuildState *buildstate) +{ + elog(INFO, "inertia: %.3e", buildstate->inertia); + + /* Calculate Davies-Bouldin index */ + if (buildstate->lists > 1) { + double db = 0.0; + + /* Calculate average distance */ + for (int i = 0; i < buildstate->lists; i++) { + if (buildstate->listCounts[i] > 0) + buildstate->listSums[i] /= buildstate->listCounts[i]; + } + + for (int i = 0; i < buildstate->lists; i++) { + double max = 0.0; + double distance; + + for (int j = 0; j < buildstate->lists; j++) { + if (j == i) + continue; + + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, + PointerGetDatum(VectorArrayGet(buildstate->centers, i)), + PointerGetDatum(VectorArrayGet(buildstate->centers, j)))); + distance = (buildstate->listSums[i] + buildstate->listSums[j]) / distance; + + if (distance > max) + max = distance; + } + db += max; + } + db /= buildstate->lists; + elog(INFO, "davies-bouldin: %.3f", db); + } +} +#endif + +/* + * Within leader, wait for end of heap scan + */ +static double ParallelHeapScan(IvfflatBuildState *buildstate) +{ + IvfflatShared *ivfshared = buildstate->ivfleader->ivfshared; + double reltuples; + + BgworkerListWaitFinish(&buildstate->ivfleader->nparticipanttuplesorts); + pg_memory_barrier(); + + /* all done, update to the actual number of participants */ + if (ivfshared->sharedsort != NULL) { + ivfshared->sharedsort->actualParticipants = buildstate->ivfleader->nparticipanttuplesorts; + } + + buildstate->indtuples = ivfshared->indtuples; + reltuples = ivfshared->reltuples; +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = ivfshared->inertia; +#endif + + return reltuples; +} + +/* + * Perform a worker's portion of a parallel sort + */ +static void IvfflatParallelScanAndSort(IvfflatSpool *ivfspool, IvfflatShared *ivfshared, Vector *ivfcenters) +{ + SortCoordinate coordinate; + IvfflatBuildState buildstate; + TableScanDesc scan; + double reltuples; + IndexInfo *indexInfo; + errno_t rc = EOK; + + /* Sort options, which must match AssignTuples */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {INT4LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + /* Initialize local tuplesort coordination state */ + coordinate = (SortCoordinate)palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = true; + coordinate->nParticipants = -1; + coordinate->sharedsort = ivfshared->sharedsort; + + int sortmem = ivfshared->workmem / ivfshared->scantuplesortstates; + + /* Join parallel scan */ + indexInfo = BuildIndexInfo(ivfspool->index); + indexInfo->ii_Concurrent = false; + InitBuildState(&buildstate, ivfspool->heap, ivfspool->index, indexInfo); + rc = memcpy_s(buildstate.centers->items, VECTOR_SIZE(buildstate.centers->dim) * buildstate.centers->maxlen, ivfcenters, VECTOR_SIZE(buildstate.centers->dim) * buildstate.centers->maxlen); + securec_check(rc, "\0", "\0"); + buildstate.centers->length = buildstate.centers->maxlen; + ivfspool->sortstate = tuplesort_begin_heap(buildstate.tupdesc, 1, attNums, sortOperators, sortCollations, + nullsFirstFlags, sortmem, false, 0, 0, 1, coordinate); + buildstate.sortstate = ivfspool->sortstate; + + scan = tableam_scan_begin_parallel(ivfspool->heap, &ivfshared->heapdesc); + reltuples = tableam_index_build_scan(ivfspool->heap, ivfspool->index, indexInfo, true, BuildCallback, + (void *)&buildstate, scan); + + /* Execute this worker's part of the sort */ + tuplesort_performsort(ivfspool->sortstate); + + /* Record statistics */ + SpinLockAcquire(&ivfshared->mutex); + ivfshared->nparticipantsdone++; + ivfshared->reltuples += reltuples; + ivfshared->indtuples += buildstate.indtuples; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia += buildstate.inertia; +#endif + SpinLockRelease(&ivfshared->mutex); + + /* We can end tuplesorts immediately */ + tuplesort_end(ivfspool->sortstate); + + FreeBuildState(&buildstate); +} + +/* + * Perform work within a launched parallel process + */ +void IvfflatParallelBuildMain(const BgWorkerContext *bwc) +{ + IvfflatSpool *ivfspool; + IvfflatShared *ivfshared; + Relation heapRel; + Relation indexRel; + + ivfshared = (IvfflatShared *)bwc->bgshared; + + /* Open relations within worker */ + heapRel = heap_open(ivfshared->heaprelid, NoLock); + indexRel = index_open(ivfshared->indexrelid, NoLock); + + /* Initialize worker's own spool */ + ivfspool = (IvfflatSpool *)palloc0(sizeof(IvfflatSpool)); + ivfspool->heap = heapRel; + ivfspool->index = indexRel; + + IvfflatParallelScanAndSort(ivfspool, ivfshared, ivfshared->ivfcenters); + + /* Close relations within worker */ + index_close(indexRel, NoLock); + heap_close(heapRel, NoLock); +} + +/* + * End parallel build + */ +static void IvfflatParallelCleanup(const BgWorkerContext *bwc) +{ + IvfflatShared *ivfshared = (IvfflatShared *)bwc->bgshared; + + /* delete shared fileset */ + Assert(ivfshared->sharedsort); + SharedFileSetDeleteAll(&ivfshared->sharedsort->fileset); + pfree_ext(ivfshared->sharedsort); +} + +static IvfflatShared *IvfflatParallelInitshared(IvfflatBuildState *buildstate, int workmem, int scantuplesortstates) +{ + IvfflatShared *ivfshared; + Sharedsort *sharedsort; + Size estsort; + Size estcenters; + char *ivfcenters; + + /* Store shared build state, for which we reserved space */ + ivfshared = (IvfflatShared *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), + sizeof(IvfflatShared)); + + /* Initialize immutable state */ + ivfshared->heaprelid = RelationGetRelid(buildstate->heap); + ivfshared->indexrelid = RelationGetRelid(buildstate->index); + ivfshared->scantuplesortstates = scantuplesortstates; + SpinLockInit(&ivfshared->mutex); + + /* Initialize mutable state */ + ivfshared->nparticipantsdone = 0; + ivfshared->reltuples = 0; + ivfshared->indtuples = 0; + ivfshared->workmem = workmem; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia = 0; +#endif + HeapParallelscanInitialize(&ivfshared->heapdesc, buildstate->heap); + + /* Store shared tuplesort-private state, for which we reserved space */ + estsort = tuplesort_estimate_shared(scantuplesortstates); + sharedsort = (Sharedsort *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), estsort); + tuplesort_initialize_shared(sharedsort, scantuplesortstates); + ivfshared->sharedsort = sharedsort; + + estcenters = VECTOR_SIZE(buildstate->dimensions) * buildstate->lists; + ivfcenters = (char *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), estcenters); + errno_t rc = memcpy_s(ivfcenters, estcenters, buildstate->centers->items, estcenters); + securec_check(rc, "\0", "\0"); + ivfshared->ivfcenters = (Vector *)ivfcenters; + + return ivfshared; +} + +/* + * Begin parallel build + */ +static void IvfflatBeginParallel(IvfflatBuildState *buildstate, int request, int workmem) +{ + IvfflatShared *ivfshared; + IvfflatLeader *ivfleader = (IvfflatLeader *)palloc0(sizeof(IvfflatLeader)); + + Assert(request > 0); + ivfshared = IvfflatParallelInitshared(buildstate, workmem, request); + + /* Launch workers, saving status for leader/caller */ + ivfleader->nparticipanttuplesorts = + LaunchBackgroundWorkers(request, ivfshared, IvfflatParallelBuildMain, IvfflatParallelCleanup); + + /* If no workers were successfully launched, back out (do serial build) */ + if (ivfleader->nparticipanttuplesorts == 0) { + pfree_ext(ivfshared); + pfree_ext(ivfleader); + return; + } + + /* Log participants */ + ereport(DEBUG1, (errmsg("using %d parallel workers", ivfleader->nparticipanttuplesorts))); + + ivfleader->ivfshared = ivfshared; + /* Save leader state now that it's clear build will be parallel */ + buildstate->ivfleader = ivfleader; +} + +static double AssignTupleUtility(IvfflatBuildState *buildstate) +{ + Relation heap = buildstate->heap; + Relation index = buildstate->index; + IndexInfo *indexInfo = buildstate->indexInfo; + double reltuples = 0; + + /* Fill spool using either serial or parallel heap scan */ + if (!buildstate->ivfleader) { + serial_build: + reltuples = tableam_index_build_scan(heap, index, indexInfo, true, BuildCallback, (void *)buildstate, NULL); + } else { + reltuples = ParallelHeapScan(buildstate); + IvfflatShared *ivfshared = buildstate->ivfleader->ivfshared; + int nruns = ivfshared->sharedsort->actualParticipants; + if (nruns == 0) { + /* failed to startup any bgworker, retry to do serial build */ + goto serial_build; + } + } + return reltuples; +} + +/* + * Shut down workers, destory parallel context, and end parallel mode. + */ +void IvfflatEndParallel() +{ + BgworkerListSyncQuit(); +} + +/* + * Scan table for tuples to index + */ +static void AssignTuples(IvfflatBuildState *buildstate) +{ + SortCoordinate coordinate = NULL; + int parallel_workers = 0; + IndexInfo *indexInfo = buildstate->indexInfo; + UtilityDesc *desc = &indexInfo->ii_desc; + int workmem; + + /* Sort options, which must match IvfflatParallelScanAndSort */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {INT4LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + workmem = (desc->query_mem[0] > 0) ? (desc->query_mem[0] - SIMPLE_THRESHOLD) + : u_sess->attr.attr_memory.maintenance_work_mem; + + /* Calculate parallel workers */ + if (buildstate->heap != NULL) + parallel_workers = PlanCreateIndexWorkers(buildstate->heap, indexInfo); + + /* Attempt to launch parallel worker scan when required */ + if (parallel_workers > 0) { + Assert(!indexInfo->ii_Concurrent); + IvfflatBeginParallel(buildstate, parallel_workers, workmem); + } + + /* Set up coordination state if at least one worker launched */ + if (buildstate->ivfleader) { + coordinate = (SortCoordinate)palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = false; + coordinate->nParticipants = buildstate->ivfleader->nparticipanttuplesorts; + coordinate->sharedsort = buildstate->ivfleader->ivfshared->sharedsort; + } + + /* Begin serial/leader tuplesort */ + buildstate->sortstate = + tuplesort_begin_heap(buildstate->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, + u_sess->attr.attr_memory.maintenance_work_mem, false, 0, 0, 1, coordinate); + + /* Add tuples to sort */ + if (buildstate->heap != NULL) { + buildstate->reltuples = AssignTupleUtility(buildstate); + +#ifdef IVFFLAT_KMEANS_DEBUG + PrintKmeansMetrics(buildstate); +#endif + } +} + +/* + * Create entry pages + */ +static void CreateEntryPages(IvfflatBuildState *buildstate, ForkNumber forkNum) +{ + /* Assign */ + IvfflatBench("assign tuples", AssignTuples(buildstate)); + + /* Sort */ + IvfflatBench("sort tuples", tuplesort_performsort(buildstate->sortstate)); + + /* Load */ + IvfflatBench("load tuples", InsertTuples(buildstate->index, buildstate, forkNum)); + + /* End sort */ + tuplesort_end(buildstate->sortstate); + + /* End parallel build */ + if (buildstate->ivfleader) { + IvfflatEndParallel(); + } +} + +/* + * Build the index + */ +static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, IvfflatBuildState *buildstate, + ForkNumber forkNum) +{ + InitBuildState(buildstate, heap, index, indexInfo); + + ComputeCenters(buildstate); + + /* Create pages */ + CreateMetaPage(index, buildstate->dimensions, buildstate->lists, forkNum); + CreateListPages(index, buildstate->centers, buildstate->dimensions, buildstate->lists, forkNum, + &buildstate->listInfo); + CreateEntryPages(buildstate, forkNum); + + /* Write WAL for initialization fork since GenericXLog functions do not */ + if (forkNum == INIT_FORKNUM) + LogNewpageRange(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true); + + FreeBuildState(buildstate); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult *ivfflatbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + IvfflatBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void ivfflatbuildempty_internal(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + IvfflatBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/src/gausskernel/storage/access/datavec/ivfflat.cpp b/src/gausskernel/storage/access/datavec/ivfflat.cpp new file mode 100644 index 0000000000..b81cd522b6 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfflat.cpp @@ -0,0 +1,340 @@ +#include "postgres.h" + +#include + +#include "access/amapi.h" +#include "access/reloptions.h" +#include "commands/vacuum.h" +#include "access/datavec/ivfflat.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" +#include "utils/spccache.h" + +static relopt_kind ivfflat_relopt_kind; +static THR_LOCAL bool IvfflatNeedInitialization = true; + +/* + * Initialize index options and variables + */ +void IvfflatInit(void) +{ + ivfflat_relopt_kind = add_reloption_kind(); + add_int_reloption(ivfflat_relopt_kind, "lists", "Number of inverted lists", IVFFLAT_DEFAULT_LISTS, + IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS); +} + +/* + * Estimate the cost of an index scan + */ +static void ivfflatcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation) +{ + GenericCosts costs; + int lists; + double ratio; + double spcSeqPageCost; + Relation index; + double half = 0.5; + + /* Never use index without order */ + if (path->indexorderbys == NULL) { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + IvfflatGetMetaPageInfo(index, &lists, NULL); + index_close(index, NoLock); + + /* Get the ratio of lists that we need to visit */ + ratio = (static_cast(u_sess->datavec_ctx.ivfflat_probes)) / lists; + if (ratio > 1.0) { + ratio = 1.0; + } + + /* + * This gives us the subset of tuples to visit. This value is passed into + * the generic cost estimator to determine the number of pages to visit + * during the index scan. + */ + costs.numIndexTuples = path->indexinfo->tuples * ratio; + + genericcostestimate(root, path, loop_count, costs.numIndexTuples, &costs.indexStartupCost, &costs.indexTotalCost, + &costs.indexSelectivity, &costs.indexCorrelation); + + get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spcSeqPageCost); + + /* Adjust cost if needed since TOAST not included in seq scan cost */ + if (costs.numIndexPages > path->indexinfo->rel->pages && ratio < half) { + /* Change all page cost from random to sequential */ + costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spcSeqPageCost); + + /* Remove cost of extra pages */ + costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spcSeqPageCost; + } else { + /* Change some page cost from random to sequential */ + costs.indexTotalCost -= half * costs.numIndexPages * (costs.spc_random_page_cost - spcSeqPageCost); + } + + /* + * If the list selectivity is lower than what is returned from the generic + * cost estimator, use that. + */ + if (ratio < costs.indexSelectivity) { + costs.indexSelectivity = ratio; + } + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; +} + +/* + * Parse and validate the reloptions + */ +static bytea *ivfflatoptions_internal(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)}, + {"parallel_workers", RELOPT_TYPE_INT, offsetof(StdRdOptions, parallel_workers)}}; + +#if PG_VERSION_NUM >= 130000 + return (bytea *)build_reloptions(reloptions, validate, ivfflat_relopt_kind, sizeof(IvfflatOptions), tab, + lengthof(tab)); +#else + relopt_value *options; + int numoptions; + IvfflatOptions *rdopts; + + if (IvfflatNeedInitialization) { + IvfflatInit(); + IvfflatNeedInitialization = false; + } + + options = parseRelOptions(reloptions, validate, ivfflat_relopt_kind, &numoptions); + rdopts = (IvfflatOptions *)allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions); + fillRelOptions((void *)rdopts, sizeof(IvfflatOptions), options, numoptions, validate, tab, lengthof(tab)); + + return (bytea *)rdopts; +#endif +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool ivfflatvalidate_internal(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflathandler); +Datum ivfflathandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = 5; +#if PG_VERSION_NUM >= 130000 + amroutine->amoptsprocnum = 0; +#endif + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; +#if PG_VERSION_NUM >= 130000 + amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ + amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; +#endif + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + errno_t rc; + rc = strcpy_s(amroutine->ambuildfuncname, NAMEDATALEN, "ivfflatbuild"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambuildemptyfuncname, NAMEDATALEN, "ivfflatbuildempty"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->aminsertfuncname, NAMEDATALEN, "ivfflatinsert"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambulkdeletefuncname, NAMEDATALEN, "ivfflatbulkdelete"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvacuumcleanupfuncname, NAMEDATALEN, "ivfflatvacuumcleanup"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amcostestimatefuncname, NAMEDATALEN, "ivfflatcostestimate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amoptionsfuncname, NAMEDATALEN, "ivfflatoptions"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvalidatefuncname, NAMEDATALEN, "ivfflatvalidate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambeginscanfuncname, NAMEDATALEN, "ivfflatbeginscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amrescanfuncname, NAMEDATALEN, "ivfflatrescan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amgettuplefuncname, NAMEDATALEN, "ivfflatgettuple"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amendscanfuncname, NAMEDATALEN, "ivfflatendscan"); + securec_check(rc, "\0", "\0"); + + PG_RETURN_POINTER(amroutine); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbuild); +Datum ivfflatbuild(PG_FUNCTION_ARGS) +{ + Relation heap = (Relation)PG_GETARG_POINTER(0); + Relation index = (Relation)PG_GETARG_POINTER(1); + IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2); + IndexBuildResult *result = ivfflatbuild_internal(heap, index, indexinfo); + + PG_RETURN_POINTER(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbuildempty); +Datum ivfflatbuildempty(PG_FUNCTION_ARGS) +{ + Relation index = (Relation)PG_GETARG_POINTER(0); + ivfflatbuildempty_internal(index); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatinsert); +Datum ivfflatinsert(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + bool *isnull = reinterpret_cast(PG_GETARG_POINTER(2)); + ItemPointer ht_ctid = (ItemPointer)PG_GETARG_POINTER(3); + Relation heaprel = (Relation)PG_GETARG_POINTER(4); + IndexUniqueCheck checkunique = (IndexUniqueCheck)PG_GETARG_INT32(5); + bool result = ivfflatinsert_internal(rel, values, isnull, ht_ctid, heaprel, checkunique); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbulkdelete); +Datum ivfflatbulkdelete(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback)PG_GETARG_POINTER(2); + void *callbackState = static_cast(PG_GETARG_POINTER(3)); + stats = ivfflatbulkdelete_internal(info, stats, callback, callbackState); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatvacuumcleanup); +Datum ivfflatvacuumcleanup(PG_FUNCTION_ARGS) +{ + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + stats = ivfflatvacuumcleanup_internal(info, stats); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatcostestimate); +Datum ivfflatcostestimate(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0); + IndexPath *path = (IndexPath *)PG_GETARG_POINTER(1); + double loopcount = static_cast(PG_GETARG_FLOAT8(2)); + Cost *startupcost = (Cost *)PG_GETARG_POINTER(3); + Cost *totalcost = (Cost *)PG_GETARG_POINTER(4); + Selectivity *selectivity = (Selectivity *)PG_GETARG_POINTER(5); + double *correlation = reinterpret_cast(PG_GETARG_POINTER(6)); + ivfflatcostestimate_internal(root, path, loopcount, startupcost, totalcost, selectivity, correlation); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatoptions); +Datum ivfflatoptions(PG_FUNCTION_ARGS) +{ + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result = ivfflatoptions_internal(reloptions, validate); + + if (NULL != result) + PG_RETURN_BYTEA_P(result); + + PG_RETURN_NULL(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatvalidate); +Datum ivfflatvalidate(PG_FUNCTION_ARGS) +{ + Oid opclassoid = PG_GETARG_OID(0); + bool result = ivfflatvalidate_internal(opclassoid); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbeginscan); +Datum ivfflatbeginscan(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); + IndexScanDesc scan = ivfflatbeginscan_internal(rel, nkeys, norderbys); + + PG_RETURN_POINTER(scan); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatrescan); +Datum ivfflatrescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey)PG_GETARG_POINTER(1); + int nkeys = PG_GETARG_INT32(2); + ScanKey orderbys = (ScanKey)PG_GETARG_POINTER(3); + int norderbys = PG_GETARG_INT32(4); + ivfflatrescan_internal(scan, scankey, nkeys, orderbys, norderbys); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatgettuple); +Datum ivfflatgettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanDirection direction = (ScanDirection)PG_GETARG_INT32(1); + + if (NULL == scan) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function ivfflatgettuple"))); + + bool result = ivfflatgettuple_internal(scan, direction); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatendscan); +Datum ivfflatendscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ivfflatendscan_internal(scan); + + PG_RETURN_VOID(); +} diff --git a/src/gausskernel/storage/access/datavec/ivfinsert.cpp b/src/gausskernel/storage/access/datavec/ivfinsert.cpp new file mode 100644 index 0000000000..0d19d9738d --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfinsert.cpp @@ -0,0 +1,198 @@ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/datavec/ivfflat.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Find the list that minimizes the distance function + */ +static void FindInsertPage(Relation index, Datum *values, BlockNumber *insertPage, ListInfo *listInfo) +{ + double minDistance = DBL_MAX; + BlockNumber nextblkno = IVFFLAT_HEAD_BLKNO; + FmgrInfo *procinfo; + Oid collation; + + /* Avoid compiler warning */ + listInfo->blkno = nextblkno; + listInfo->offno = FirstOffsetNumber; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(index, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IvfflatList list; + double distance; + + list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, offno)); + distance = + DatumGetFloat8(FunctionCall2Coll(procinfo, collation, values[0], PointerGetDatum(&list->center))); + if (distance < minDistance || !BlockNumberIsValid(*insertPage)) { + *insertPage = list->insertPage; + listInfo->blkno = nextblkno; + listInfo->offno = offno; + minDistance = distance; + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +/* + * Insert a tuple into the index + */ +static void InsertTuple(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heapRel) +{ + const IvfflatTypeInfo *typeInfo = IvfflatGetTypeInfo(index); + IndexTuple itup; + Datum value; + FmgrInfo *normprocinfo; + Buffer buf; + Page page; + GenericXLogState *state; + Size itemsz; + BlockNumber insertPage = InvalidBlockNumber; + ListInfo listInfo; + BlockNumber originalInsertPage; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + if (normprocinfo != NULL) { + Oid collation = index->rd_indcollation[0]; + + if (!IvfflatCheckNorm(normprocinfo, collation, value)) { + return; + } + + value = IvfflatNormValue(typeInfo, collation, value); + } + + /* Ensure index is valid */ + IvfflatGetMetaPageInfo(index, NULL, NULL); + + /* Find the insert page - sets the page and list info */ + FindInsertPage(index, values, &insertPage, &listInfo); + Assert(BlockNumberIsValid(insertPage)); + originalInsertPage = insertPage; + + /* Form tuple */ + itup = index_form_tuple(RelationGetDescr(index), &value, isnull); + itup->t_tid = *heap_tid; + + /* Get tuple size */ + itemsz = MAXALIGN(IndexTupleSize(itup)); + Assert(itemsz <= + BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(IvfflatPageOpaqueData)) - sizeof(ItemIdData)); + + /* Find a page to insert the item */ + for (;;) { + buf = ReadBuffer(index, insertPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + if (PageGetFreeSpace(page) >= itemsz) { + break; + } + + insertPage = IvfflatPageGetOpaque(page)->nextblkno; + if (BlockNumberIsValid(insertPage)) { + /* Move to next page */ + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } else { + Buffer newbuf; + Page newpage; + + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + newbuf = IvfflatNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + newpage = GenericXLogRegisterBuffer(state, newbuf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(newbuf, newpage); + + /* Update insert page */ + insertPage = BufferGetBlockNumber(newbuf); + + /* Update previous buffer */ + IvfflatPageGetOpaque(page)->nextblkno = insertPage; + + /* Commit */ + GenericXLogFinish(state); + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + state = GenericXLogStart(index); + buf = newbuf; + page = GenericXLogRegisterBuffer(state, buf, 0); + break; + } + } + + /* Add to next offset */ + if (PageAddItem(page, (Item)itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + IvfflatCommitBuffer(buf, state); + + /* Update the insert page */ + if (insertPage != originalInsertPage) + IvfflatUpdateList(index, listInfo, insertPage, originalInsertPage, InvalidBlockNumber, MAIN_FORKNUM); +} + +/* + * Insert a tuple into the index + */ +bool ivfflatinsert_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) { + return false; + } + + /* + * Use memory context since detoast, IvfflatNormValue, and + * index_form_tuple can allocate + */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Ivfflat insert temporary context", ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + InsertTuple(index, values, isnull, heap_tid, heap); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/src/gausskernel/storage/access/datavec/ivfkmeans.cpp b/src/gausskernel/storage/access/datavec/ivfkmeans.cpp new file mode 100644 index 0000000000..34af346dd6 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfkmeans.cpp @@ -0,0 +1,558 @@ +#include "postgres.h" + +#include +#include + +#include "access/datavec/bitvec.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/memutils.h" +#include "access/datavec/vector.h" + +/* + * Initialize with kmeans++ + * + * https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf + */ +static void InitCenters(Relation index, VectorArray samples, VectorArray centers, float *lowerBound) +{ + FmgrInfo *procinfo; + Oid collation; + int64 j; + float *weight = (float *)palloc(samples->length * sizeof(float)); + int numCenters = centers->maxlen; + int numSamples = samples->length; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Choose an initial center uniformly at random */ + VectorArraySet(centers, 0, VectorArrayGet(samples, RandomInt() % samples->length)); + centers->length++; + + for (j = 0; j < numSamples; j++) + weight[j] = FLT_MAX; + + for (int i = 0; i < numCenters; i++) { + double sum; + double choice; + + CHECK_FOR_INTERRUPTS(); + + sum = 0.0; + + for (j = 0; j < numSamples; j++) { + Datum vec = PointerGetDatum(VectorArrayGet(samples, j)); + double distance; + + /* Only need to compute distance for new center */ + /* TODO Use triangle inequality to reduce distance calculations */ + distance = DatumGetFloat8( + FunctionCall2Coll(procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, i)))); + + /* Set lower bound */ + lowerBound[j * numCenters + i] = distance; + + /* Use distance squared for weighted probability distribution */ + distance *= distance; + + if (distance < weight[j]) + weight[j] = distance; + + sum += weight[j]; + } + + /* Only compute lower bound on last iteration */ + if (i + 1 == numCenters) { + break; + } + + /* Choose new center using weighted probability distribution. */ + choice = sum * RandomDouble(); + for (j = 0; j < numSamples - 1; j++) { + choice -= weight[j]; + if (choice <= 0) + break; + } + + VectorArraySet(centers, i + 1, VectorArrayGet(samples, j)); + centers->length++; + } + + pfree(weight); +} + +/* + * Norm centers + */ +static void NormCenters(const IvfflatTypeInfo *typeInfo, Oid collation, VectorArray centers) +{ + MemoryContext normCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat norm temporary context", ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(normCtx); + errno_t rc = EOK; + + for (int j = 0; j < centers->length; j++) { + Datum center = PointerGetDatum(VectorArrayGet(centers, j)); + Datum newCenter = IvfflatNormValue(typeInfo, collation, center); + Size size = VARSIZE_ANY(DatumGetPointer(newCenter)); + if (size > centers->itemsize) + elog(ERROR, "safety check failed"); + + rc = memcpy_s(DatumGetPointer(center), size, DatumGetPointer(newCenter), size); + securec_check(rc, "\0", "\0"); + MemoryContextReset(normCtx); + } + + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(normCtx); +} + +/* + * Quick approach if we have no data + */ +static void RandomCenters(Relation index, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + int dimensions = centers->dim; + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + Oid collation = index->rd_indcollation[0]; + float *x = static_cast(palloc(sizeof(float) * dimensions)); + + /* Fill with random data */ + while (centers->length < centers->maxlen) { + Pointer center = VectorArrayGet(centers, centers->length); + + for (int i = 0; i < dimensions; i++) { + x[i] = static_cast(RandomDouble()); + } + + typeInfo->updateCenter(center, dimensions, x); + + centers->length++; + } + + if (normprocinfo != NULL) + NormCenters(typeInfo, collation, centers); +} + +#ifdef IVFFLAT_MEMORY +/* + * Show memory usage + */ +static void ShowMemoryUsage(MemoryContext context, Size estimatedSize) +{ +#if PG_VERSION_NUM >= 130000 + elog(INFO, "total memory: %zu MB", MemoryContextMemAllocated(context, true) / (1024 * 1024)); +#else + MemoryContextStats(context); +#endif + elog(INFO, "estimated memory: %zu MB", estimatedSize / (1024 * 1024)); +} +#endif + +/* + * Sum centers + */ +static void SumCenters(VectorArray samples, float *agg, int *closestCenters, const IvfflatTypeInfo *typeInfo) +{ + for (int j = 0; j < samples->length; j++) { + float *x = agg + ((int64)closestCenters[j] * samples->dim); + + typeInfo->sumCenter(VectorArrayGet(samples, j), x); + } +} + +/* + * Update centers + */ +static void UpdateCenters(float *agg, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + for (int j = 0; j < centers->length; j++) { + float *x = agg + ((int64)j * centers->dim); + + typeInfo->updateCenter(VectorArrayGet(centers, j), centers->dim, x); + } +} + +/* + * Compute new centers + */ +static void ComputeNewCenters(VectorArray samples, float *agg, VectorArray newCenters, int *centerCounts, + int *closestCenters, FmgrInfo *normprocinfo, Oid collation, + const IvfflatTypeInfo *typeInfo) +{ + int dimensions = newCenters->dim; + int numCenters = newCenters->length; + int numSamples = samples->length; + + /* Reset sum and count */ + for (int j = 0; j < numCenters; j++) { + float *x = agg + ((int64)j * dimensions); + + for (int k = 0; k < dimensions; k++) { + x[k] = 0.0; + } + + centerCounts[j] = 0; + } + + /* Increment sum of closest center */ + SumCenters(samples, agg, closestCenters, typeInfo); + + /* Increment count of closest center */ + for (int j = 0; j < numSamples; j++) { + centerCounts[closestCenters[j]] += 1; + } + + /* Divide sum by count */ + for (int j = 0; j < numCenters; j++) { + float *x = agg + ((int64)j * dimensions); + + if (centerCounts[j] > 0) { + /* Double avoids overflow, but requires more memory */ + /* TODO Update bounds */ + for (int k = 0; k < dimensions; k++) { + if (isinf(x[k])) { + x[k] = x[k] > 0 ? FLT_MAX : -FLT_MAX; + } + } + + for (int k = 0; k < dimensions; k++) { + x[k] /= centerCounts[j]; + } + } else { + /* TODO Handle empty centers properly */ + for (int k = 0; k < dimensions; k++) { + x[k] = RandomDouble(); + } + } + } + + /* Set new centers */ + UpdateCenters(agg, newCenters, typeInfo); + + /* Normalize if needed */ + if (normprocinfo != NULL) + NormCenters(typeInfo, collation, newCenters); +} + +/* + * Use Elkan for performance. This requires distance function to satisfy triangle inequality. + * + * We use L2 distance for L2 (not L2 squared like index scan) + * and angular distance for inner product and cosine distance + * + * https://www.aaai.org/Papers/ICML/2003/ICML03-022.pdf + */ +static void ElkanKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + int dimensions = centers->dim; + int numCenters = centers->maxlen; + int numSamples = samples->length; + VectorArray newCenters; + float *agg; + int *centerCounts; + int *closestCenters; + float *lowerBound; + float *upperBound; + float *s; + float *halfcdist; + float *newcdist; + + /* Calculate allocation sizes */ + Size samplesSize = VECTOR_ARRAY_SIZE(samples->maxlen, samples->itemsize); + Size centersSize = VECTOR_ARRAY_SIZE(centers->maxlen, centers->itemsize); + Size newCentersSize = VECTOR_ARRAY_SIZE(numCenters, centers->itemsize); + Size aggSize = sizeof(float) * (int64)numCenters * dimensions; + Size centerCountsSize = sizeof(int) * numCenters; + Size closestCentersSize = sizeof(int) * numSamples; + Size lowerBoundSize = sizeof(float) * numSamples * numCenters; + Size upperBoundSize = sizeof(float) * numSamples; + Size sSize = sizeof(float) * numCenters; + Size halfcdistSize = sizeof(float) * numCenters * numCenters; + Size newcdistSize = sizeof(float) * numCenters; + + /* Calculate total size */ + Size totalSize = samplesSize + centersSize + newCentersSize + aggSize + centerCountsSize + closestCentersSize + + lowerBoundSize + upperBoundSize + sSize + halfcdistSize + newcdistSize; + + /* Check memory requirements */ + /* Add one to error message to ceil */ + if (totalSize > (Size)u_sess->attr.attr_memory.maintenance_work_mem * 1024L) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("memory required is %zu MB, maintenance_work_mem is %d MB", + totalSize / (1024 * 1024) + 1, u_sess->attr.attr_memory.maintenance_work_mem / 1024))); + + /* Ensure indexing does not overflow */ + if (numCenters * numCenters > INT_MAX) + elog(ERROR, "Indexing overflow detected. Please report a bug."); + + /* Set support functions */ + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + collation = index->rd_indcollation[0]; + + /* Allocate space */ + /* Use float instead of double to save memory */ + agg = (float *)palloc(aggSize); + centerCounts = (int *)palloc(centerCountsSize); + closestCenters = (int *)palloc(closestCentersSize); + lowerBound = (float *)MemoryContextAllocExtended(CurrentMemoryContext, lowerBoundSize, MCXT_ALLOC_HUGE); + upperBound = (float *)palloc(upperBoundSize); + s = (float *)palloc(sSize); + halfcdist = (float *)palloc_extended(halfcdistSize, MCXT_ALLOC_HUGE); + newcdist = (float *)palloc(newcdistSize); + + /* Initialize new centers */ + newCenters = VectorArrayInit(numCenters, dimensions, centers->itemsize); + newCenters->length = numCenters; + +#ifdef IVFFLAT_MEMORY + ShowMemoryUsage(MemoryContextGetParent(CurrentMemoryContext)); +#endif + + /* Pick initial centers */ + InitCenters(index, samples, centers, lowerBound); + + /* Assign each x to its closest initial center c(x) = argmin d(x,c) */ + for (int64 j = 0; j < numSamples; j++) { + float minDistance = FLT_MAX; + int closestCenter = 0; + + /* Find closest center */ + for (int64 k = 0; k < numCenters; k++) { + /* TODO Use Lemma 1 in k-means++ initialization */ + float distance = lowerBound[j * numCenters + k]; + + if (distance < minDistance) { + minDistance = distance; + closestCenter = k; + } + } + + upperBound[j] = minDistance; + closestCenters[j] = closestCenter; + } + + /* Give 500 iterations to converge */ + for (int iteration = 0; iteration < 500; iteration++) { + int changes = 0; + bool rjreset; + + /* Can take a while, so ensure we can interrupt */ + CHECK_FOR_INTERRUPTS(); + + /* Step 1: For all centers, compute distance */ + for (int64 j = 0; j < numCenters; j++) { + Datum vec = PointerGetDatum(VectorArrayGet(centers, j)); + + for (int64 k = j + 1; k < numCenters; k++) { + float distance = 0.5 * DatumGetFloat8(FunctionCall2Coll(procinfo, collation, vec, + PointerGetDatum(VectorArrayGet(centers, k)))); + + halfcdist[j * numCenters + k] = distance; + halfcdist[k * numCenters + j] = distance; + } + } + + /* For all centers c, compute s(c) */ + for (int64 j = 0; j < numCenters; j++) { + float minDistance = FLT_MAX; + + for (int64 k = 0; k < numCenters; k++) { + float distance; + + if (j == k) + continue; + + distance = halfcdist[j * numCenters + k]; + if (distance < minDistance) + minDistance = distance; + } + + s[j] = minDistance; + } + + rjreset = iteration != 0; + + for (int64 j = 0; j < numSamples; j++) { + bool rj; + + /* Step 2: Identify all points x such that u(x) <= s(c(x)) */ + if (upperBound[j] <= s[closestCenters[j]]) + continue; + + rj = rjreset; + + for (int64 k = 0; k < numCenters; k++) { + Datum vec; + float dxcx; + + /* Step 3: For all remaining points x and centers c */ + if (k == closestCenters[j]) + continue; + + if (upperBound[j] <= lowerBound[j * numCenters + k]) + continue; + + if (upperBound[j] <= halfcdist[closestCenters[j] * numCenters + k]) + continue; + + vec = PointerGetDatum(VectorArrayGet(samples, j)); + + /* Step 3a */ + if (rj) { + dxcx = DatumGetFloat8(FunctionCall2Coll( + procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, closestCenters[j])))); + + /* d(x,c(x)) computed, which is a form of d(x,c) */ + lowerBound[j * numCenters + closestCenters[j]] = dxcx; + upperBound[j] = dxcx; + + rj = false; + } else + dxcx = upperBound[j]; + + /* Step 3b */ + if (dxcx > lowerBound[j * numCenters + k] || dxcx > halfcdist[closestCenters[j] * numCenters + k]) { + float dxc = DatumGetFloat8( + FunctionCall2Coll(procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, k)))); + + /* d(x,c) calculated */ + lowerBound[j * numCenters + k] = dxc; + + if (dxc < dxcx) { + closestCenters[j] = k; + + /* c(x) changed */ + upperBound[j] = dxc; + + changes++; + } + } + } + } + + /* Step 4: For each center c, let m(c) be mean of all points assigned */ + ComputeNewCenters(samples, agg, newCenters, centerCounts, closestCenters, normprocinfo, collation, typeInfo); + + /* Step 5 */ + for (int j = 0; j < numCenters; j++) + newcdist[j] = + DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(VectorArrayGet(centers, j)), + PointerGetDatum(VectorArrayGet(newCenters, j)))); + + for (int64 j = 0; j < numSamples; j++) { + for (int64 k = 0; k < numCenters; k++) { + float distance = lowerBound[j * numCenters + k] - newcdist[k]; + + if (distance < 0) { + distance = 0; + } + + lowerBound[j * numCenters + k] = distance; + } + } + + /* Step 6 */ + /* We reset r(x) before Step 3 in the next iteration */ + for (int j = 0; j < numSamples; j++) { + upperBound[j] += newcdist[closestCenters[j]]; + } + + /* Step 7 */ + for (int j = 0; j < numCenters; j++) { + VectorArraySet(centers, j, VectorArrayGet(newCenters, j)); + } + + if (changes == 0 && iteration != 0) { + break; + } + } +} + +/* + * Ensure no NaN or infinite values + */ +static void CheckElements(VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + float *scratch = (float *)palloc(sizeof(float) * centers->dim); + + for (int i = 0; i < centers->length; i++) { + for (int j = 0; j < centers->dim; j++) + scratch[j] = 0; + + /* /fp:fast may not propagate NaN with MSVC, but that's alright */ + typeInfo->sumCenter(VectorArrayGet(centers, i), scratch); + + for (int j = 0; j < centers->dim; j++) { + if (isnan(scratch[j])) + elog(ERROR, "NaN detected. Please report a bug."); + + if (isinf(scratch[j])) + elog(ERROR, "Infinite value detected. Please report a bug."); + } + } +} + +/* + * Ensure no zero vectors for cosine distance + */ +static void CheckNorms(VectorArray centers, Relation index) +{ + /* Check NORM_PROC instead of KMEANS_NORM_PROC */ + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + Oid collation = index->rd_indcollation[0]; + + if (normprocinfo == NULL) { + return; + } + + for (int i = 0; i < centers->length; i++) { + double norm = + DatumGetFloat8(FunctionCall1Coll(normprocinfo, collation, PointerGetDatum(VectorArrayGet(centers, i)))); + if (norm == 0) { + elog(ERROR, "Zero norm detected. Please report a bug."); + } + } +} + +/* + * Detect issues with centers + */ +static void CheckCenters(Relation index, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + if (centers->length != centers->maxlen) + elog(ERROR, "Not enough centers. Please report a bug."); + + CheckElements(centers, typeInfo); + CheckNorms(centers, index); +} + +/* + * Perform naive k-means centering + * We use spherical k-means for inner product and cosine + */ +void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + MemoryContext kmeansCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat kmeans temporary context", ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(kmeansCtx); + + if (samples->length == 0) + RandomCenters(index, centers, typeInfo); + else + ElkanKmeans(index, samples, centers, typeInfo); + + CheckCenters(index, centers, typeInfo); + + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(kmeansCtx); +} diff --git a/src/gausskernel/storage/access/datavec/ivfscan.cpp b/src/gausskernel/storage/access/datavec/ivfscan.cpp new file mode 100644 index 0000000000..d24f7979f9 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfscan.cpp @@ -0,0 +1,347 @@ +#include "postgres.h" + +#include + +#include "access/relscan.h" +#include "lib/pairingheap.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/buf/bufmgr.h" + +/* + * Compare list distances + */ +static int CompareLists(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfflatScanList *)a)->distance > ((const IvfflatScanList *)b)->distance) { + return 1; + } + + if (((const IvfflatScanList *)a)->distance < ((const IvfflatScanList *)b)->distance) { + return -1; + } + + return 0; +} + +/* + * Get lists and sort by distance + */ +static void GetScanLists(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + BlockNumber nextblkno = IVFFLAT_HEAD_BLKNO; + int listCount = 0; + double maxDistance = DBL_MAX; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(scan->indexRelation, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IvfflatList list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, offno)); + double distance; + + /* Use procinfo from the index instead of scan key for performance */ + distance = DatumGetFloat8(so->distfunc(so->procinfo, so->collation, PointerGetDatum(&list->center), value)); + + if (listCount < so->probes) { + IvfflatScanList *scanlist; + + scanlist = &so->lists[listCount]; + scanlist->startPage = list->startPage; + scanlist->distance = distance; + listCount++; + + /* Add to heap */ + pairingheap_add(so->listQueue, &scanlist->ph_node); + + /* Calculate max distance */ + if (listCount == so->probes) + maxDistance = ((IvfflatScanList *)pairingheap_first(so->listQueue))->distance; + } else if (distance < maxDistance) { + IvfflatScanList *scanlist; + + /* Remove */ + scanlist = (IvfflatScanList *)pairingheap_remove_first(so->listQueue); + + /* Reuse */ + scanlist->startPage = list->startPage; + scanlist->distance = distance; + pairingheap_add(so->listQueue, &scanlist->ph_node); + + /* Update max distance */ + maxDistance = ((IvfflatScanList *)pairingheap_first(so->listQueue))->distance; + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +/* + * Get items + */ +static void GetScanItems(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + double tuples = 0; + TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc); + + /* + * Reuse same set of shared buffers for scan + * + * See postgres/src/backend/storage/buffer/README for description + */ + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + /* Search closest probes lists */ + while (!pairingheap_is_empty(so->listQueue)) { + BlockNumber searchPage = ((IvfflatScanList *)pairingheap_remove_first(so->listQueue))->startPage; + + /* Search all entry pages for list */ + while (BlockNumberIsValid(searchPage)) { + Buffer buf; + Page page; + OffsetNumber maxoffno; + + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IndexTuple itup; + Datum datum; + bool isnull; + ItemId itemid = PageGetItemId(page, offno); + + itup = (IndexTuple)PageGetItem(page, itemid); + datum = index_getattr(itup, 1, tupdesc, &isnull); + + /* + * Add virtual tuple + * + * Use procinfo from the index instead of scan key for + * performance + */ + ExecClearTuple(slot); + slot->tts_values[0] = so->distfunc(so->procinfo, so->collation, datum, value); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(&itup->t_tid); + slot->tts_isnull[1] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(so->sortstate, slot); + + tuples++; + } + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + } + } + + FreeAccessStrategy(bas); + + if (tuples < 100) + ereport(DEBUG1, + (errmsg("index scan found few tuples"), errdetail("Index may have been created with little data."), + errhint("Recreate the index and possibly decrease lists."))); + + tuplesort_performsort(so->sortstate); +} + +/* + * Zero distance + */ +static Datum ZeroDistance(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2) +{ + return Float8GetDatum(0.0); +} + +/* + * Get scan value + */ +static Datum GetScanValue(IndexScanDesc scan) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + Datum value; + + if (scan->orderByData->sk_flags & SK_ISNULL) { + value = PointerGetDatum(NULL); + so->distfunc = ZeroDistance; + } else { + value = scan->orderByData->sk_argument; + so->distfunc = FunctionCall2Coll; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Normalize if needed */ + if (so->normprocinfo != NULL) + value = IvfflatNormValue(so->typeInfo, so->collation, value); + } + + return value; +} + +/* + * Prepare for an index scan + */ +IndexScanDesc ivfflatbeginscan_internal(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + IvfflatScanOpaque so; + int lists; + int dimensions; + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {FLOAT8LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + int probes = u_sess->datavec_ctx.ivfflat_probes; + int natts = 2; + int attDistance = 1; + int attHeaptid = 2; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + /* Get lists and dimensions from metapage */ + IvfflatGetMetaPageInfo(index, &lists, &dimensions); + + if (probes > lists) { + probes = lists; + } + + so = (IvfflatScanOpaque)palloc(offsetof(IvfflatScanOpaqueData, lists) + probes * sizeof(IvfflatScanList)); + so->typeInfo = IvfflatGetTypeInfo(index); + so->first = true; + so->probes = probes; + so->dimensions = dimensions; + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + so->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + /* Create tuple description for sorting */ + so->tupdesc = CreateTemplateTupleDesc(natts, false); + TupleDescInitEntry(so->tupdesc, (AttrNumber)attDistance, "distance", FLOAT8OID, -1, 0); + TupleDescInitEntry(so->tupdesc, (AttrNumber)attHeaptid, "heaptid", TIDOID, -1, 0); + + /* Prep sort */ + so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, + u_sess->attr.attr_memory.work_mem, NULL, false); + + so->slot = MakeSingleTupleTableSlot(so->tupdesc); + + so->listQueue = pairingheap_allocate(CompareLists, scan); + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void ivfflatrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + errno_t rc = EOK; + +#if PG_VERSION_NUM >= 130000 + if (!so->first) + tuplesort_reset(so->sortstate); +#endif + + so->first = true; + pairingheap_reset(so->listQueue); + + if (keys && scan->numberOfKeys > 0) { + rc = memmove_s(scan->keyData, scan->numberOfKeys * sizeof(ScanKeyData), keys, scan->numberOfKeys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } + + if (orderbys && scan->numberOfOrderBys > 0) { + rc = memmove_s(scan->orderByData, scan->numberOfOrderBys * sizeof(ScanKeyData), orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } +} + +/* + * Fetch the next tuple in the given scan + */ +bool ivfflatgettuple_internal(IndexScanDesc scan, ScanDirection dir) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) { + Datum value; + + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan ivfflat index without order"); + + /* Requires MVCC-compliant snapshot as not able to pin during sorting */ + /* https://www.postgresql.org/docs/current/index-locking.html */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with ivfflat"); + + value = GetScanValue(scan); + IvfflatBench("GetScanLists", GetScanLists(scan, value)); + IvfflatBench("GetScanItems", GetScanItems(scan, value)); + so->first = false; + + /* Clean up if we allocated a new value */ + if (value != scan->orderByData->sk_argument) + pfree(DatumGetPointer(value)); + } + + if (tuplesort_gettupleslot(so->sortstate, true, so->slot, NULL)) { + ItemPointer heaptid = (ItemPointer)DatumGetPointer(heap_slot_getattr(so->slot, 2, &so->isnull)); + + scan->xs_ctup.t_self = *heaptid; + scan->xs_recheck = false; + return true; + } + + return false; +} + +/* + * End a scan and release resources + */ +void ivfflatendscan_internal(IndexScanDesc scan) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + + pairingheap_free(so->listQueue); + tuplesort_end(so->sortstate); + + pfree(so); + scan->opaque = NULL; +} diff --git a/src/gausskernel/storage/access/datavec/ivfutils.cpp b/src/gausskernel/storage/access/datavec/ivfutils.cpp new file mode 100644 index 0000000000..79ab8ea4d1 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfutils.cpp @@ -0,0 +1,332 @@ +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "storage/buf/bufmgr.h" + +/* + * Allocate a vector array + */ +VectorArray VectorArrayInit(int maxlen, int dimensions, Size itemsize) +{ + VectorArray res = (VectorArray)palloc(sizeof(VectorArrayData)); + + /* Ensure items are aligned to prevent UB */ + itemsize = MAXALIGN(itemsize); + + res->length = 0; + res->maxlen = maxlen; + res->dim = dimensions; + res->itemsize = itemsize; + res->items = (char *)palloc_extended(maxlen * itemsize, MCXT_ALLOC_ZERO | MCXT_ALLOC_HUGE); + return res; +} + +/* + * Free a vector array + */ +void VectorArrayFree(VectorArray arr) +{ + pfree(arr->items); + pfree(arr); +} + +/* + * Get the number of lists in the index + */ +int IvfflatGetLists(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) + return opts->lists; + + return IVFFLAT_DEFAULT_LISTS; +} + +/* + * Get proc + */ +FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Normalize value + */ +Datum IvfflatNormValue(const IvfflatTypeInfo *typeInfo, Oid collation, Datum value) +{ + return DirectFunctionCall1Coll(typeInfo->normalize, collation, value); +} + +/* + * Check if non-zero norm + */ +bool IvfflatCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value) +{ + return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0; +} + +/* + * New buffer + */ +Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void IvfflatInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(IvfflatPageOpaqueData)); + IvfflatPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + IvfflatPageGetOpaque(page)->page_id = IVFFLAT_PAGE_ID; +} + +/* + * Init and register page + */ +void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state) +{ + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(*buf, *page); +} + +/* + * Commit buffer + */ +void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state) +{ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Add a new page + * + * The order is very important!! + */ +void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum) +{ + /* Get new buffer */ + Buffer newbuf = IvfflatNewBuffer(index, forkNum); + Page newpage = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + + /* Update the previous buffer */ + IvfflatPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Init new page */ + IvfflatInitPage(newbuf, newpage); + + /* Commit */ + GenericXLogFinish(*state); + + /* Unlock */ + UnlockReleaseBuffer(*buf); + + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + *buf = newbuf; +} + +/* + * Get the metapage info + */ +void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + if (unlikely(metap->magicNumber != IVFFLAT_MAGIC_NUMBER)) + elog(ERROR, "ivfflat index is not valid"); + + if (lists != NULL) + *lists = metap->lists; + + if (dimensions != NULL) + *dimensions = metap->dimensions; + + UnlockReleaseBuffer(buf); +} + +/* + * Update the start or insert page of a list + */ +void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, + BlockNumber startPage, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatList list; + bool changed = false; + + buf = ReadBufferExtended(index, forkNum, listInfo.blkno, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + list = (IvfflatList)PageGetItem(page, PageGetItemId(page, listInfo.offno)); + if (BlockNumberIsValid(insertPage) && insertPage != list->insertPage) { + /* Skip update if insert page is lower than original insert page */ + /* This is needed to prevent insert from overwriting vacuum */ + if (!BlockNumberIsValid(originalInsertPage) || insertPage >= originalInsertPage) { + list->insertPage = insertPage; + changed = true; + } + } + + if (BlockNumberIsValid(startPage) && startPage != list->startPage) { + list->startPage = startPage; + changed = true; + } + + /* Only commit if changed */ + if (changed) { + IvfflatCommitBuffer(buf, state); + } else { + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } +} + +static Size VectorItemSize(int dimensions) +{ + return VECTOR_SIZE(dimensions); +} + +static Size HalfvecItemSize(int dimensions) +{ + return HALFVEC_SIZE(dimensions); +} + +static Size BitItemSize(int dimensions) +{ + return VARBITTOTALLEN(dimensions); +} + +static void VectorUpdateCenter(Pointer v, int dimensions, float *x) +{ + Vector *vec = (Vector *)v; + + SET_VARSIZE(vec, VECTOR_SIZE(dimensions)); + vec->dim = dimensions; + + for (int k = 0; k < dimensions; k++) + vec->x[k] = x[k]; +} + +static void HalfvecUpdateCenter(Pointer v, int dimensions, float *x) +{ + HalfVector *vec = (HalfVector *)v; + + SET_VARSIZE(vec, HALFVEC_SIZE(dimensions)); + vec->dim = dimensions; + + for (int k = 0; k < dimensions; k++) + vec->x[k] = Float4ToHalfUnchecked(x[k]); +} + +static void BitUpdateCenter(Pointer v, int dimensions, float *x) +{ + VarBit *vec = (VarBit *)v; + unsigned char *nx = VARBITS(vec); + + SET_VARSIZE(vec, VARBITTOTALLEN(dimensions)); + VARBITLEN(vec) = dimensions; + + for (uint32 k = 0; k < VARBITBYTES(vec); k++) { + nx[k] = 0; + } + + for (int k = 0; k < dimensions; k++) { + nx[k / 8] |= (x[k] > 0.5 ? 1 : 0) << (7 - (k % 8)); + } +} + +static void VectorSumCenter(Pointer v, float *x) +{ + Vector *vec = (Vector *)v; + + for (int k = 0; k < vec->dim; k++) + x[k] += vec->x[k]; +} + +static void HalfvecSumCenter(Pointer v, float *x) +{ + HalfVector *vec = (HalfVector *)v; + + for (int k = 0; k < vec->dim; k++) + x[k] += HalfToFloat4(vec->x[k]); +} + +static void BitSumCenter(Pointer v, float *x) +{ + VarBit *vec = (VarBit *)v; + + for (int k = 0; k < VARBITLEN(vec); k++) + x[k] += (float)(((VARBITS(vec)[k / 8]) >> (7 - (k % 8))) & 0x01); +} + +/* + * Get type info + */ +const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index) +{ + FmgrInfo *procinfo = IvfflatOptionalProcInfo(index, IVFFLAT_TYPE_INFO_PROC); + + if (procinfo == NULL) { + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM, + .normalize = l2_normalize, + .itemSize = VectorItemSize, + .updateCenter = VectorUpdateCenter, + .sumCenter = VectorSumCenter}; + + return (&typeInfo); + } else { + return (const IvfflatTypeInfo *)DatumGetPointer(OidFunctionCall0Coll(procinfo->fn_oid, InvalidOid)); + } +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_halfvec_support); +Datum ivfflat_halfvec_support(PG_FUNCTION_ARGS) +{ + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM * 2, + .normalize = halfvec_l2_normalize, + .itemSize = HalfvecItemSize, + .updateCenter = HalfvecUpdateCenter, + .sumCenter = HalfvecSumCenter}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_bit_support); +Datum ivfflat_bit_support(PG_FUNCTION_ARGS) +{ + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM * 32, + .normalize = NULL, + .itemSize = BitItemSize, + .updateCenter = BitUpdateCenter, + .sumCenter = BitSumCenter}; + + PG_RETURN_POINTER(&typeInfo); +}; diff --git a/src/gausskernel/storage/access/datavec/ivfvacuum.cpp b/src/gausskernel/storage/access/datavec/ivfvacuum.cpp new file mode 100644 index 0000000000..5a5f256d16 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfvacuum.cpp @@ -0,0 +1,145 @@ +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "commands/vacuum.h" +#include "access/datavec/ivfflat.h" +#include "storage/buf/bufmgr.h" + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult *ivfflatbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + Relation index = info->index; + BlockNumber blkno = IVFFLAT_HEAD_BLKNO; + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + if (stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + /* Iterate over list pages */ + while (BlockNumberIsValid(blkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber coffno; + OffsetNumber cmaxoffno; + BlockNumber startPages[MaxOffsetNumber]; + ListInfo listInfo; + + cbuf = ReadBuffer(index, blkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + cmaxoffno = PageGetMaxOffsetNumber(cpage); + + /* Iterate over lists */ + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) { + IvfflatList list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, coffno)); + + startPages[coffno - FirstOffsetNumber] = list->startPage; + } + + listInfo.blkno = blkno; + blkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) { + BlockNumber searchPage = startPages[coffno - FirstOffsetNumber]; + BlockNumber insertPage = InvalidBlockNumber; + + /* Iterate over entry pages */ + while (BlockNumberIsValid(searchPage)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + OffsetNumber deletable[MaxOffsetNumber]; + int ndeletable; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are + * pinned by other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + maxoffno = PageGetMaxOffsetNumber(page); + ndeletable = 0; + + /* Find deleted tuples */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, offno)); + ItemPointer htup = &(itup->t_tid); + + if (callback(htup, callbackState, InvalidOid, InvalidBktId)) { + deletable[ndeletable++] = offno; + stats->tuples_removed++; + } else + stats->num_index_tuples++; + } + + /* Set to first free page */ + /* Must be set before searchPage is updated */ + if (!BlockNumberIsValid(insertPage) && ndeletable > 0) + insertPage = searchPage; + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + if (ndeletable > 0) { + /* Delete tuples */ + PageIndexMultiDelete(page, deletable, ndeletable); + GenericXLogFinish(state); + } else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + + /* + * Update after all tuples deleted. + * + * We don't add or delete items from lists pages, so offset won't + * change. + */ + if (BlockNumberIsValid(insertPage)) { + listInfo.offno = coffno; + IvfflatUpdateList(index, listInfo, insertPage, InvalidBlockNumber, InvalidBlockNumber, MAIN_FORKNUM); + } + } + } + + FreeAccessStrategy(bas); + + return stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult *ivfflatvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/src/gausskernel/storage/access/datavec/vecindex.cpp b/src/gausskernel/storage/access/datavec/vecindex.cpp new file mode 100644 index 0000000000..618cf632d3 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/vecindex.cpp @@ -0,0 +1,292 @@ +#include "access/transam.h" +#include "access/datavec/hnsw.h" +#include "storage/procarray.h" +#include "access/datavec/vecindex.h" + +VectorScanData *VecGetScanData(IndexScanDesc scan) +{ + switch (scan->indexRelation->rd_rel->relam) { + case HNSW_AM_OID: + return &((HnswScanOpaque)scan->opaque)->vs; + default: + break; + } + return NULL; +} + +size_t VecDefaultMaxItemSize(IndexScanDesc scan) +{ + switch (scan->indexRelation->rd_rel->relam) { + case HNSW_AM_OID: + return HnswDefaultMaxItemSize; + default: + break; + } + return (size_t)-1; +} + +TransactionIdStatus HnswCheckXid(TransactionId xid) +{ + TransactionIdStatus ts = TransactionIdGetStatus(xid); + /* Please refer to HeapTupleSatisfiesVaccum */ + if (ts == XID_INPROGRESS) { + if (TransactionIdIsInProgress(xid)) { + /* Inprogress */ + } else if (TransactionIdDidCommit(xid)) { + ts = XID_COMMITTED; + } else { + ts = XID_ABORTED; + } + } + return ts; +} + +bool VecItupGetXminXmax(Page page, OffsetNumber offnum, TransactionId oldest_xmin, TransactionId *xmin, + TransactionId *xmax, bool *xminCommitted, bool *xmaxCommitted, bool isToast) +{ + ItemId iid = PageGetItemId(page, offnum); + HnswElementTuple itup = (HnswElementTuple)PageGetItem(page, iid); + IndexTransInfo *idxXid = (IndexTransInfo *)VecIndexTupleGetXid(itup); + bool isDead = false; + bool needCheckXmin = true; + + *xminCommitted = *xmaxCommitted = false; + + if (ItemIdIsDead(iid)) { + *xmin = InvalidTransactionId; + *xmax = InvalidTransactionId; + return true; + } + + *xmin = idxXid->xmin; + *xmax = idxXid->xmax; + + /* examine xmax */ + if (TransactionIdIsValid(*xmax)) { + TransactionIdStatus ts = HnswCheckXid(*xmax); + switch (ts) { + case XID_INPROGRESS: + if (TransactionIdEquals(*xmin, *xmax)) { + needCheckXmin = false; + } + break; + case XID_COMMITTED: + *xminCommitted = *xmaxCommitted = true; + needCheckXmin = false; + break; + case XID_ABORTED: + idxXid->xmax = InvalidTransactionId; + *xmax = InvalidTransactionId; + if (TransactionIdEquals(*xmin, *xmax)) { + /* xmin xmax aborted */ + idxXid->xmin = InvalidTransactionId; + *xmin = InvalidTransactionId; + needCheckXmin = false; + } + break; + } + } + + /* examine xmin */ + if (needCheckXmin) { + if (IndexItemIdIsFrozen(iid)) { + *xminCommitted = true; + } else if (TransactionIdIsValid(*xmin)) { + TransactionIdStatus ts = HnswCheckXid(*xmin); + switch (ts) { + case XID_INPROGRESS: + break; + case XID_COMMITTED: + *xminCommitted = true; + break; + case XID_ABORTED: + idxXid->xmin = InvalidTransactionId; + *xmin = InvalidTransactionId; + break; + } + } + } + + /* if there is no passed oldest_xmin, we will ues the current oldest_xmin */ + if (!TransactionIdIsValid(oldest_xmin)) { + if (isToast) { + GetOldestXminForUndo(&oldest_xmin); + } else { + oldest_xmin = u_sess->utils_cxt.RecentGlobalDataXmin; + } + } + /* we can't do bypass in hotstandby read mode, or there will be different between index scan and seq scan */ + if (RecoveryInProgress()) { + oldest_xmin = InvalidTransactionId; + } + + if (!TransactionIdIsValid(*xmin)) { + isDead = true; + } + /* before we mark the tuple as DEAD because of xmax, must comfirm that xmax has committed */ + if (*xmaxCommitted && TransactionIdPrecedes(*xmax, oldest_xmin)) { + isDead = true; + } + + /* before we mark the tuple as FROZEN, must comfirm that xmin has committed */ + if (IndexItemIdIsFrozen(iid)) { + *xmin = FrozenTransactionId; + } else if (*xminCommitted && TransactionIdPrecedes(*xmin, oldest_xmin)) { + IndexItemIdSetFrozen(iid); + *xmin = FrozenTransactionId; + } + + if (isDead) { + ItemIdMarkDead(iid); + *xmin = InvalidTransactionId; + *xmax = InvalidTransactionId; + *xminCommitted = *xmaxCommitted = false; + } + + return isDead; +} + +static bool VecItupEquals(IndexTuple itup1, IndexTuple itup2) +{ + if (itup1 == NULL || itup2 == NULL) { + return false; + } + if (IndexTupleSize(itup1) == 0 || IndexTupleSize(itup2) == 0) { + return false; + } + /* + * compare the binary directly. If these index tuples are formed from the + * same uheap tuple, they should be exactly the same. + */ + return memcmp(itup1, itup2, IndexTupleSize(itup1)) == 0; +} + +static bool VecVisibilityCheckCid(IndexScanDesc scan, IndexTuple itup, bool *needRecheck) +{ + VectorScanData *vs = VecGetScanData(scan); + Assert(vs != NULL); + + if (VecItupEquals((IndexTuple)vs->lastSelfModifiedItup, itup)) { + *needRecheck = false; + return false; /* tuples with same key and TID will only returned once */ + } + + /* save this index tuple as lastSelfModifiedItup */ + /* Step1: Check that the buffer space is large enough. */ + size_t maxItemSize = VecDefaultMaxItemSize(scan); + uint newSize = 0; + int multiSize = 2; + if (vs->lastSelfModifiedItup == NULL) { + newSize = IndexTupleSize(itup); + } else if (vs->lastSelfModifiedItupBufferSize < IndexTupleSize(itup)) { + newSize = MAX(vs->lastSelfModifiedItupBufferSize * multiSize, IndexTupleSize(itup)); + newSize = MIN(newSize, maxItemSize); + pfree(vs->lastSelfModifiedItup); + } + /* Step2: Extend when necessary. */ + if (newSize != 0) { + vs->lastSelfModifiedItup = (char *)palloc(newSize); + vs->lastSelfModifiedItupBufferSize = newSize; + } + /* Step3: Save the current IndexTuple. */ + errno_t rc = 0; + rc = memcpy_s(vs->lastSelfModifiedItup, maxItemSize, itup, IndexTupleSize(itup)); + securec_check(rc, "\0", "\0"); + + *needRecheck = true; + return true; /* treat as visible, but need recheck */ +} + +static bool VecXidSatisfiesMVCC(TransactionId xid, bool committed, Snapshot snapshot, Buffer buffer) +{ + TransactionIdStatus ignore; + + if (!TransactionIdIsValid(xid)) { + return false; /* invisible */ + } + if (xid == FrozenTransactionId) { + return true; /* frozen */ + } + + /* + * We can use snapshot's xmin/xmax as fast bypass after they become valid again. + * Currently, snapshot's csn and xmin/xmax may be inconsistent. The reavsn is + * that there is a problem with the cooperation of committing and subtransaction. + */ + + /* we can't tell visibility by snapshot's xmin/xmax alone, check snapshot */ + return XidVisibleInSnapshot(xid, snapshot, &ignore, (RecoveryInProgress() ? buffer : InvalidBuffer), NULL); +} + +static bool VecVisibilityCheckXid(TransactionId xmin, TransactionId xmax, bool xminCommitted, bool xmaxCommitted, + Snapshot snapshot, Buffer buffer, bool isUpsert) +{ + if (snapshot->satisfies == SNAPSHOT_DIRTY && isUpsert) { + bool xmaxVisible = xmaxCommitted || TransactionIdIsCurrentTransactionId(xmax); + if (xmaxVisible) { + return false; + } + return true; + } + + /* only support MVCC and NOW, ereport used to locate bug */ + if (snapshot->satisfies != SNAPSHOT_VERSION_MVCC && snapshot->satisfies != SNAPSHOT_MVCC && + snapshot->satisfies != SNAPSHOT_NOW) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported snapshot type %u for UBTree index.", snapshot->satisfies), + errhint("This kind of operation may not supported."))); + } + + /* handle snapshot MVCC */ + if (snapshot->satisfies == SNAPSHOT_VERSION_MVCC || snapshot->satisfies == SNAPSHOT_MVCC) { + if (VecXidSatisfiesMVCC(xmax, xmaxCommitted, snapshot, buffer)) { + return false; /* already deleted */ + } + if (!VecXidSatisfiesMVCC(xmin, xminCommitted, snapshot, buffer)) { + return false; /* have not inserted yet */ + } + } + + /* handle snapshot NOW */ + if (snapshot->satisfies == SNAPSHOT_NOW) { + return xminCommitted && !xmaxCommitted; + } + + return true; +} + +bool VecVisibilityCheck(IndexScanDesc scan, Page page, OffsetNumber offnum, bool *needRecheck) +{ + bool needVisibilityCheck = + scan->xs_snapshot->satisfies != SNAPSHOT_ANY && scan->xs_snapshot->satisfies != SNAPSHOT_TOAST; + TransactionId xmin, xmax; + bool xminCommitted = false; + bool xmaxCommitted = false; + bool isDead = VecItupGetXminXmax(page, offnum, InvalidTransactionId, &xmin, &xmax, &xminCommitted, &xmaxCommitted, + RelationGetNamespace(scan->indexRelation) == PG_TOAST_NAMESPACE); + + if (needRecheck == NULL) { + *needRecheck = false; + } + + bool isVisible = !isDead; + if (needVisibilityCheck && !isDead) { + /* + * If this IndexTuple is not visible to the current Snapshot, try to get the next one. + * We're not going to tell heap to skip visibility check, because it doesn't cost a lot and we need heap + * to check the visibility with CID when snapshot's xid equals to xmin or xmax. + */ + if (scan->xs_snapshot->satisfies == SNAPSHOT_MVCC && + (TransactionIdIsCurrentTransactionId(xmin) || TransactionIdIsCurrentTransactionId(xmax))) { + ItemId iid = PageGetItemId(page, offnum); + IndexTuple tuple = (IndexTuple)PageGetItem(page, iid); + isVisible = VecVisibilityCheckCid(scan, tuple, needRecheck); /* need check cid */ + } else { + VectorScanData *vs = VecGetScanData(scan); + isVisible = VecVisibilityCheckXid(xmin, xmax, xminCommitted, xmaxCommitted, scan->xs_snapshot, vs->buf, + scan->isUpsert); + } + } + + return isVisible; +} diff --git a/src/include/access/datavec/bitvec.h b/src/include/access/datavec/bitvec.h new file mode 100644 index 0000000000..9e6cc588b7 --- /dev/null +++ b/src/include/access/datavec/bitvec.h @@ -0,0 +1,18 @@ +#ifndef BITVEC_H +#define BITVEC_H + +#include "postgres.h" +#include "utils/varbit.h" + +extern uint64 (*BitHammingDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance); +extern double (*BitJaccardDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, uint64 aa, + uint64 bb); + +void BitvecInit(void); + +VarBit *InitBitVector(int dim); + +Datum hamming_distance(PG_FUNCTION_ARGS); +Datum jaccard_distance(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/access/datavec/halfutils.h b/src/include/access/datavec/halfutils.h new file mode 100644 index 0000000000..7afceafe5c --- /dev/null +++ b/src/include/access/datavec/halfutils.h @@ -0,0 +1,229 @@ +#ifndef HALFUTILS_H +#define HALFUTILS_H + +#include + +#include "access/datavec/halfvec.h" +#include "access/datavec/shortest_dec.h" + +#ifdef F16C_SUPPORT +#include +#endif + +extern float (*HalfvecL2SquaredDistance)(int dim, half *ax, half *bx); +extern float (*HalfvecInnerProduct)(int dim, half *ax, half *bx); +extern double (*HalfvecCosineSimilarity)(int dim, half *ax, half *bx); +extern float (*HalfvecL1Distance)(int dim, half *ax, half *bx); + +void HalfvecInit(void); + +/* + * Check if half is NaN + */ +static inline bool HalfIsNan(half num) +{ +#ifdef FLT16_SUPPORT + return isnan(num); +#else + return (num & 0x7C00) == 0x7C00 && (num & 0x7FFF) != 0x7C00; +#endif +} + +/* + * Check if half is infinite + */ +static inline bool HalfIsInf(half num) +{ +#ifdef FLT16_SUPPORT + return isinf(num); +#else + return (num & 0x7FFF) == 0x7C00; +#endif +} + +/* + * Check if half is zero + */ +static inline bool HalfIsZero(half num) +{ +#ifdef FLT16_SUPPORT + return num == 0; +#else + return (num & 0x7FFF) == 0x0000; +#endif +} + +/* + * Convert a half to a float4 + */ +static inline float HalfToFloat4(half num) +{ +#if defined(F16C_SUPPORT) + return _cvtsh_ss(num); +#elif defined(FLT16_SUPPORT) + return (float)num; +#else + union { + float f; + uint32 i; + } swapfloat; + + union { + half h; + uint16 i; + } swaphalf; + + uint16 bin; + uint32 exponent; + uint32 mantissa; + uint32 result; + + swaphalf.h = num; + bin = swaphalf.i; + exponent = (bin & 0x7C00) >> 10; + mantissa = bin & 0x03FF; + + /* Sign */ + result = (bin & 0x8000) << 16; + + if (unlikely(exponent == 31)) { + if (mantissa == 0) { + /* Infinite */ + result |= 0x7F800000; + } else { + /* NaN */ + result |= 0x7FC00000; + } + } else if (unlikely(exponent == 0)) { + /* Subnormal */ + if (mantissa != 0) { + exponent = -14; + + for (int i = 0; i < 10; i++) { + mantissa <<= 1; + exponent -= 1; + + if ((mantissa >> 10) % 2 == 1) { + mantissa &= 0x03ff; + break; + } + } + + result |= (exponent + 127) << 23; + } + } else { + /* Normal */ + result |= (exponent - 15 + 127) << 23; + } + + result |= mantissa << 13; + + swapfloat.i = result; + return swapfloat.f; +#endif +} + +/* + * Convert a float4 to a half + */ +static inline half Float4ToHalfUnchecked(float num) +{ +#if defined(F16C_SUPPORT) + return _cvtss_sh(num, 0); +#elif defined(FLT16_SUPPORT) + return num; +#else + union { + float f; + uint32 i; + } swapfloat; + + union { + half h; + uint16 i; + } swaphalf; + + uint32 bin; + int exponent; + int mantissa; + uint16 result; + + swapfloat.f = num; + bin = swapfloat.i; + exponent = (bin & 0x7F800000) >> 23; + mantissa = bin & 0x007FFFFF; + + /* Sign */ + result = (bin & 0x80000000) >> 16; + + if (isinf(num)) { + /* Infinite */ + result |= 0x7C00; + } else if (isnan(num)) { + /* NaN */ + result |= 0x7E00; + result |= mantissa >> 13; + } else if (exponent > 98) { + int m; + int gr; + int s; + + exponent -= 127; + s = mantissa & 0x00000FFF; + + /* Subnormal */ + if (exponent < -14) { + int diff = -exponent - 14; + + mantissa >>= diff; + mantissa += 1 << (23 - diff); + s |= mantissa & 0x00000FFF; + } + + m = mantissa >> 13; + + /* Round */ + gr = (mantissa >> 12) % 4; + if (gr == 3 || (gr == 1 && s != 0)) + m += 1; + + if (m == 1024) { + m = 0; + exponent += 1; + } + + if (exponent > 15) { + /* Infinite */ + result |= 0x7C00; + } else { + if (exponent >= -14) + result |= (exponent + 15) << 10; + + result |= m; + } + } + + swaphalf.i = result; + return swaphalf.h; +#endif +} + +/* + * Convert a float4 to a half + */ +static inline half Float4ToHalf(float num) +{ + half result = Float4ToHalfUnchecked(num); + if (unlikely(HalfIsInf(result)) && !isinf(num)) { + char *buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN); + + FloatToShortestDecimalBuf(num, buf); + + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("\"%s\" is out of range for type halfvec", buf))); + } + + return result; +} + +#endif diff --git a/src/include/access/datavec/halfvec.h b/src/include/access/datavec/halfvec.h new file mode 100644 index 0000000000..ffee898f19 --- /dev/null +++ b/src/include/access/datavec/halfvec.h @@ -0,0 +1,111 @@ +#ifndef HALFVEC_H +#define HALFVEC_H + +#define __STDC_WANT_IEC_60559_TYPES_EXT__ + +#include +#include "fmgr.h" + +/* We use two types of dispatching: intrinsics and target_clones */ +/* TODO Move to better place */ +#ifndef DISABLE_DISPATCH +/* Only enable for more recent compilers to keep build process simple */ +#if defined(__x86_64__) && defined(__GNUC__) && __GNUC__ >= 11 +#define USE_DISPATCH +#elif defined(__x86_64__) && defined(__clang_major__) && __clang_major__ >= 7 +#define USE_DISPATCH +#elif defined(_M_AMD64) && defined(_MSC_VER) && _MSC_VER >= 1920 +#define USE_DISPATCH +#endif +#endif + +/* target_clones requires glibc */ +#if defined(USE_DISPATCH) && defined(__gnu_linux__) && defined(__has_attribute) +/* Use separate line for portability */ +#if __has_attribute(target_clones) +#define USE_TARGET_CLONES +#endif +#endif + +/* Apple clang check needed for universal binaries on Mac */ +#if defined(USE_DISPATCH) && (defined(HAVE__GET_CPUID) || defined(__apple_build_version__)) +#define USE__GET_CPUID +#endif + +#if defined(USE_DISPATCH) +#define HALFVEC_DISPATCH +#endif + +/* F16C has better performance than _Float16 (on x86-64) */ +#if defined(__F16C__) +#define F16C_SUPPORT +#elif defined(__FLT16_MAX__) && !defined(HALFVEC_DISPATCH) +#define FLT16_SUPPORT +#endif + +// TODO support _Float16 +#ifdef FLT16_SUPPORT +#define half float +#define HALF_MAX FLT16_MAX +#else +#define half uint16 +#define HALF_MAX 65504 +#endif + +#define HALFVEC_MAX_DIM 16000 + +#define HALFVEC_SIZE(_dim) (offsetof(HalfVector, x) + sizeof(half) * (_dim)) +#define DatumGetHalfVector(x) ((HalfVector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_HALFVEC_P(x) DatumGetHalfVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_HALFVEC_P(x) PG_RETURN_POINTER(x) + +typedef struct HalfVector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int16 dim; /* number of dimensions */ + int16 unused; /* reserved for future use, always zero */ + half x[FLEXIBLE_ARRAY_MEMBER]; +} HalfVector; + +HalfVector *InitHalfVector(int dim); + +Datum halfvec_in(PG_FUNCTION_ARGS); +Datum halfvec_out(PG_FUNCTION_ARGS); +Datum halfvec_typmod_in(PG_FUNCTION_ARGS); +Datum halfvec_recv(PG_FUNCTION_ARGS); +Datum halfvec_send(PG_FUNCTION_ARGS); +Datum halfvec_l2_distance(PG_FUNCTION_ARGS); +Datum halfvec_inner_product(PG_FUNCTION_ARGS); +Datum halfvec_cosine_distance(PG_FUNCTION_ARGS); +Datum halfvec_l1_distance(PG_FUNCTION_ARGS); +Datum halfvec_vector_dims(PG_FUNCTION_ARGS); +Datum halfvec_l2_norm(PG_FUNCTION_ARGS); +Datum halfvec_l2_normalize(PG_FUNCTION_ARGS); +Datum halfvec_binary_quantize(PG_FUNCTION_ARGS); +Datum halfvec_subvector(PG_FUNCTION_ARGS); +Datum halfvec_add(PG_FUNCTION_ARGS); +Datum halfvec_sub(PG_FUNCTION_ARGS); +Datum halfvec_mul(PG_FUNCTION_ARGS); +Datum halfvec_concat(PG_FUNCTION_ARGS); +Datum halfvec_lt(PG_FUNCTION_ARGS); +Datum halfvec_le(PG_FUNCTION_ARGS); +Datum halfvec_eq(PG_FUNCTION_ARGS); +Datum halfvec_ne(PG_FUNCTION_ARGS); +Datum halfvec_ge(PG_FUNCTION_ARGS); +Datum halfvec_gt(PG_FUNCTION_ARGS); +Datum halfvec_cmp(PG_FUNCTION_ARGS); +Datum halfvec_l2_squared_distance(PG_FUNCTION_ARGS); +Datum halfvec_negative_inner_product(PG_FUNCTION_ARGS); +Datum halfvec_spherical_distance(PG_FUNCTION_ARGS); +Datum halfvec_accum(PG_FUNCTION_ARGS); +Datum halfvec_avg(PG_FUNCTION_ARGS); +Datum halfvec_combine(PG_FUNCTION_ARGS); +Datum halfvec(PG_FUNCTION_ARGS); +Datum halfvec_to_vector(PG_FUNCTION_ARGS); +Datum vector_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum halfvec_to_float4(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/access/datavec/hnsw.h b/src/include/access/datavec/hnsw.h new file mode 100644 index 0000000000..cb2bd468bd --- /dev/null +++ b/src/include/access/datavec/hnsw.h @@ -0,0 +1,630 @@ +#ifndef HNSW_H +#define HNSW_H + +#include "postgres.h" + +#include "access/genam.h" +#include "lib/pairingheap.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "access/datavec/vector.h" +#include "access/datavec/vecindex.h" + +#define HNSW_MAX_DIM 2000 +#define HNSW_MAX_NNZ 1000 + +/* Support functions */ +#define HNSW_DISTANCE_PROC 1 +#define HNSW_NORM_PROC 2 +#define HNSW_TYPE_INFO_PROC 3 + +#define HNSW_VERSION 1 +#define HNSW_MAGIC_NUMBER 0xA953A953 +#define HNSW_PAGE_ID 0xFF90 + +/* Preserved page numbers */ +#define HNSW_METAPAGE_BLKNO 0 +#define HNSW_HEAD_BLKNO 1 /* first element page */ +#define HNSW_PQTABLE_START_BLKNO 1 /* pqtable start page */ +#define HNSW_PQTABLE_STORAGE_SIZE (uint16)(6 * 1024) /* pqtable storage size in each page */ + +/* Append page slot info */ +#define HNSW_DEFAULT_NPAGES_PER_SLOT 50 +#define HNSW_BUFFER_THRESHOLD 4 + +/* Must correspond to page numbers since page lock is used */ +#define HNSW_UPDATE_LOCK 0 +#define HNSW_SCAN_LOCK 1 + +/* HNSW parameters */ +#define HNSW_DEFAULT_M 16 +#define HNSW_MIN_M 2 +#define HNSW_MAX_M 100 +#define HNSW_DEFAULT_EF_CONSTRUCTION 64 +#define HNSW_MIN_EF_CONSTRUCTION 4 +#define HNSW_MAX_EF_CONSTRUCTION 1000 +#define HNSW_DEFAULT_EF_SEARCH 40 +#define HNSW_MIN_EF_SEARCH 1 +#define HNSW_MAX_EF_SEARCH 1000 +#define HNSW_DEFAULT_ENABLE_PQ false +#define HNSW_DEFAULT_PQ_M 1 +#define HNSW_MIN_PQ_M 1 +#define HNSW_MAX_PQ_M 65535 +#define HNSW_DEFAULT_PQ_KSUB 1 +#define HNSW_MIN_PQ_KSUB 1 +#define HNSW_MAX_PQ_KSUB 65535 + +/* Tuple types */ +#define HNSW_ELEMENT_TUPLE_TYPE 1 +#define HNSW_NEIGHBOR_TUPLE_TYPE 2 + +/* page types */ +#define HNSW_DEFAULT_PAGE_TYPE 0 +#define HNSW_ELEMENT_PAGE_TYPE 1 +#define HNSW_NEIGHBOR_PAGE_TYPE 2 +#define HNSW_USTORE_PAGE_TYPE 3 + +/* Make graph robust against non-HOT updates */ +#define HNSW_HEAPTIDS 10 + +#define HNSW_UPDATE_ENTRY_GREATER 1 +#define HNSW_UPDATE_ENTRY_ALWAYS 2 + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_HNSW_PHASE_LOAD 2 + +#define HNSW_MAX_SIZE \ + (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - sizeof(ItemIdData)) +#define HNSW_TUPLE_ALLOC_SIZE BLCKSZ + +#define HNSW_ELEMENT_TUPLE_SIZE(size) MAXALIGN(offsetof(HnswElementTupleData, data) + (size)) +#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) \ + MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData)) + +#define HNSW_NEIGHBOR_ARRAY_SIZE(lm) (offsetof(HnswNeighborArray, items) + sizeof(HnswCandidate) * (lm)) + +#define HnswPageGetOpaque(page) ((HnswPageOpaque)PageGetSpecialPointer(page)) +#define HnswPageGetMeta(page) ((HnswMetaPageData *)PageGetContents(page)) +#define HnswPageGetAppendMeta(page) ((HnswAppendMetaPageData *)PageGetContents(page)) + +#define HnswDefaultMaxItemSize \ + MAXALIGN_DOWN((BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData) + sizeof(ItemPointerData)) - \ + MAXALIGN(sizeof(HnswPageOpaqueData)))) + +#if PG_VERSION_NUM >= 150000 +#define RandomDouble() pg_prng_double(&pg_global_prng_state) +#define SeedRandom(seed) pg_prng_seed(&pg_global_prng_state, seed) +#else +#define RandomDouble() (((double)random()) / MAX_RANDOM_VALUE) +#define SeedRandom(seed) srandom(seed) +#endif + +#if PG_VERSION_NUM < 130000 +#define list_delete_last(list) list_truncate(list, list_length(list) - 1) +#define list_sort(list, cmp) \ + do { \ + ListCell *cell; \ + int i; \ + int len = list_length(list); \ + ListCell **list_arr; \ + List *new_list; \ + \ + if (len == 0) { \ + list = NIL; \ + return list; \ + } \ + i = 0; \ + list_arr = (ListCell **)palloc(sizeof(ListCell *) * len); \ + foreach (cell, list) \ + list_arr[i++] = cell; \ + \ + qsort(list_arr, len, sizeof(ListCell *), cmp); \ + \ + new_list = (List *)palloc(sizeof(List)); \ + new_list->type = (list->type); \ + new_list->length = len; \ + new_list->head = list_arr[len - 1]; \ + new_list->tail = list_arr[0]; \ + \ + for (i = len - 1; i > 0; i--) \ + list_arr[i]->next = list_arr[i - 1]; \ + \ + list_arr[0]->next = NULL; \ + pfree(list_arr); \ + list = new_list; \ + } while (0) +#endif + +#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE) +#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE) + +/* 2 * M connections for ground layer */ +#define HnswGetLayerM(m, layer) ((layer == 0) ? (m) * 2 : (m)) + +/* Optimal ML from paper */ +#define HnswGetMl(m) (1 / log(m)) + +/* Ensure fits on page and in uint8 */ +#define HnswGetMaxLevel(m) \ + Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - \ + offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / \ + (sizeof(ItemPointerData)) / (m)) - \ + 2, \ + 255) + +#define HnswGetValue(base, element) PointerGetDatum(HnswPtrAccess(base, (element)->value)) + +#if PG_VERSION_NUM < 140005 +#define relptr_offset(rp) ((rp).relptr_off - 1) +#endif + +/* Pointer macros */ +#define HnswPtrAccess(base, hp) ((base) == NULL ? (hp).ptr : relptr_access(base, (hp).relptr)) +#define HnswPtrStore(base, hp, value) \ + ((base) == NULL ? (void)((hp).ptr = (value)) : (void)relptr_store(base, (hp).relptr, value)) +#define HnswPtrIsNull(base, hp) ((base) == NULL ? (hp).ptr == NULL : relptr_is_null((hp).relptr)) +#define HnswPtrEqual(base, hp1, hp2) \ + ((base) == NULL ? (hp1).ptr == (hp2).ptr : relptr_offset((hp1).relptr) == relptr_offset((hp2).relptr)) + +/* For code paths dedicated to each type */ +#define HnswPtrPointer(hp) (hp).ptr +#define HnswPtrOffset(hp) relptr_offset((hp).relptr) + +/* Variables */ +extern int hnsw_lock_tranche_id; + +typedef struct HnswElementData HnswElementData; +typedef struct HnswNeighborArray HnswNeighborArray; + +#define relptr(type) \ + union { \ + type *relptr_type; \ + Size relptr_off; \ + } + +#define relptr_declare(type, relptrtype) typedef relptr(type) (relptrtype) + +#ifdef HAVE__BUILTIN_TYPES_COMPATIBLE_P +#define relptr_access(base, rp) \ + (AssertVariableIsOfTypeMacro(base, char *), \ + (__typeof__((rp).relptr_type))((rp).relptr_off == 0 ? NULL : (base) + (rp).relptr_off - 1)) +#else +/* + * If we don't have __builtin_types_compatible_p, assume we might not have + * __typeof__ either. + */ +#define relptr_access(base, rp) \ + (AssertVariableIsOfTypeMacro(base, char *), (void *)((rp).relptr_off == 0 ? NULL : (base) + (rp).relptr_off - 1)) +#endif + +#define relptr_is_null(rp) ((rp).relptr_off == 0) + +#define relptr_offset(rp) ((rp).relptr_off - 1) + +/* We use this inline to avoid double eval of "val" in relptr_store */ +static inline Size relptr_store_eval(char *base, char *val) +{ + if (val == NULL) { + return 0; + } else { + Assert(val >= base); + return val - base + 1; + } +} + +#ifdef HAVE__BUILTIN_TYPES_COMPATIBLE_P +#define relptr_store(base, rp, val) \ + (AssertVariableIsOfTypeMacro(base, char *), AssertVariableIsOfTypeMacro(val, __typeof__((rp).relptr_type)), \ + (rp).relptr_off = relptr_store_eval((base), (char *)(val))) +#else +/* + * If we don't have __builtin_types_compatible_p, assume we might not have + * __typeof__ either. + */ +#define relptr_store(base, rp, val) \ + (AssertVariableIsOfTypeMacro(base, char *), (rp).relptr_off = relptr_store_eval((base), (char *)(val))) +#endif + +#define HnswPtrDeclare(type, relptrtype, ptrtype) \ + relptr_declare(type, relptrtype); \ + typedef union { \ + type *ptr; \ + relptrtype relptr; \ + } (ptrtype); + +/* Pointers that can be absolute or relative */ +/* Use char for HnswDatumPtr so works with Pointer */ +HnswPtrDeclare(HnswElementData, HnswElementRelptr, HnswElementPtr); +HnswPtrDeclare(HnswNeighborArray, HnswNeighborArrayRelptr, HnswNeighborArrayPtr); +HnswPtrDeclare(HnswNeighborArrayPtr, HnswNeighborsRelptr, HnswNeighborsPtr); +HnswPtrDeclare(char, DatumRelptr, HnswDatumPtr); + +struct HnswElementData { + HnswElementPtr next; + ItemPointerData heaptids[HNSW_HEAPTIDS]; + uint8 heaptidsLength; + uint8 level; + uint8 deleted; + uint32 hash; + HnswNeighborsPtr neighbors; + BlockNumber blkno; + OffsetNumber offno; + OffsetNumber neighborOffno; + BlockNumber neighborPage; + HnswDatumPtr value; + uint8 *pqcodes; + LWLock lock; +}; + +typedef HnswElementData *HnswElement; + +typedef struct HnswCandidate { + HnswElementPtr element; + float distance; + bool closer; +} HnswCandidate; + +struct HnswNeighborArray { + int length; + bool closerSet; + HnswCandidate items[FLEXIBLE_ARRAY_MEMBER]; +}; + +typedef struct HnswPairingHeapNode { + pairingheap_node ph_node; + HnswCandidate *inner; +} HnswPairingHeapNode; + +/* HNSW index options */ +typedef struct HnswOptions { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int m; /* number of connections */ + int efConstruction; /* size of dynamic candidate list */ + bool enablePQ; + int pqM; /* number of subquantizer */ + int pqKsub; /* number of centroids for each subquantizer */ + char *storage_type; /* table access method kind */ +} HnswOptions; + +typedef struct HnswGraph { + /* Graph state */ + slock_t lock; + HnswElementPtr head; + double indtuples; + + /* Entry state */ + LWLock entryLock; + LWLock entryWaitLock; + HnswElementPtr entryPoint; + + /* Allocations state */ + LWLock allocatorLock; + long memoryUsed; + long memoryTotal; + + /* Flushed state */ + LWLock flushLock; + bool flushed; +} HnswGraph; + +typedef struct HnswShared { + /* Immutable state */ + Oid heaprelid; + Oid indexrelid; + + /* Mutex for mutable state */ + slock_t mutex; + + /* Mutable state */ + int nparticipantsdone; + double reltuples; + HnswGraph graphData; + + char *hnswarea; + ParallelHeapScanDescData heapdesc; +} HnswShared; + +typedef struct HnswLeader { + int nparticipanttuplesorts; + HnswShared *hnswshared; +} HnswLeader; + +typedef struct HnswAllocator { + void *(*alloc)(Size size, void *state); + void *state; +} HnswAllocator; + +typedef struct HnswTypeInfo { + int maxDimensions; + Datum (*normalize)(PG_FUNCTION_ARGS); + void (*checkValue)(Pointer v); +} HnswTypeInfo; + +typedef struct HnswBuildState { + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + ForkNumber forkNum; + const HnswTypeInfo *typeInfo; + + /* Settings */ + int dimensions; + int m; + int efConstruction; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + /* Variables */ + HnswGraph graphData; + HnswGraph *graph; + double ml; + int maxLevel; + + /* Memory */ + MemoryContext graphCtx; + MemoryContext tmpCtx; + HnswAllocator allocator; + + /* Parallel builds */ + HnswLeader *hnswleader; + HnswShared *hnswshared; + char *hnswarea; + + /* PQ info */ + bool enablePQ; + int pqM; + int pqKsub; + float *pqTable; + float *centerTable; + uint16 pqcodeSize; + + /* storage page info */ + bool isUStore; /* false means astore */ +} HnswBuildState; + +typedef struct HnswMetaPageData { + uint32 magicNumber; + uint32 version; + uint32 dimensions; + uint16 m; + uint16 efConstruction; + BlockNumber entryBlkno; + OffsetNumber entryOffno; + int16 entryLevel; + BlockNumber insertPage; +} HnswMetaPageData; + +typedef HnswMetaPageData *HnswMetaPage; + +typedef struct HnswAppendMetaPageData { + uint32 magicNumber; + uint32 version; + uint32 dimensions; + uint16 m; + uint16 efConstruction; + BlockNumber entryBlkno; + OffsetNumber entryOffno; + int16 entryLevel; + + /* PQ info */ + bool enablePQ; + uint16 pqM; /* number of subquantizer */ + uint16 pqKsub; /* number of centroids for each subquantizer */ + uint16 pqcodeSize; /* number of bits per quantization index */ + uint32 centerTableSize; /* dim * sizeof(float) */ + uint32 pqTableSize; /* dim * pqKsub * sizeof(float) */ + uint16 pqTableNblk; /* total number of blks pqtable */ + + /* slot info */ + int npages; /* number of pages per slot */ + BlockNumber slotStartBlkno; + BlockNumber elementInsertSlot; /* the first page of the element type to be inserted into the slot */ + BlockNumber neighborInsertSlot; /* the first page of the neighbor type to be inserted into the slot */ +} HnswAppendMetaPageData; + +typedef HnswAppendMetaPageData *HnswAppendMetaPage; + +typedef struct HnswPageOpaqueData { + BlockNumber nextblkno; + uint8 pageType; /* element or neighbor page */ + uint8 unused; + uint16 page_id; /* for identification of HNSW indexes */ +} HnswPageOpaqueData; + +typedef HnswPageOpaqueData *HnswPageOpaque; + +typedef struct HnswElementTupleData { + uint8 type; + uint8 level; + uint8 deleted; + uint8 unused; + ItemPointerData heaptids[HNSW_HEAPTIDS]; + ItemPointerData neighbortid; + uint16 unused2; + Vector data; +} HnswElementTupleData; + +typedef HnswElementTupleData *HnswElementTuple; + +typedef struct HnswNeighborTupleData { + uint8 type; + uint8 unused; + uint16 count; + ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER]; +} HnswNeighborTupleData; + +typedef HnswNeighborTupleData *HnswNeighborTuple; + +typedef struct HnswScanOpaqueData { + const HnswTypeInfo *typeInfo; + bool first; + List *w; + MemoryContext tmpCtx; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + /* used in ustore only */ + VectorScanData vs; +} HnswScanOpaqueData; + +typedef HnswScanOpaqueData *HnswScanOpaque; + +typedef struct HnswVacuumState { + /* Info */ + Relation index; + IndexBulkDeleteResult *stats; + IndexBulkDeleteCallback callback; + void *callbackState; + + /* Settings */ + int m; + int efConstruction; + + /* Support functions */ + FmgrInfo *procinfo; + Oid collation; + + /* Variables */ + struct tidhash_hash *deleted; + BufferAccessStrategy bas; + HnswNeighborTuple ntup; + HnswElementData highestPoint; + + /* Memory */ + MemoryContext tmpCtx; +} HnswVacuumState; + +/* Methods */ +int HnswGetM(Relation index); +int HnswGetEfConstruction(Relation index); +bool HnswGetEnablePQ(Relation index); +int HnswGetPqM(Relation index); +int HnswGetPqKsub(Relation index); +FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); +Datum HnswNormValue(const HnswTypeInfo *typeInfo, Oid collation, Datum value); +bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); +Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); +void HnswInitPage(Buffer buf, Page page); +void HnswInit(void); +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, + int m, bool inserting, HnswElement skipElement, IndexScanDesc scan = NULL); +HnswElement HnswGetEntryPoint(Relation index); +void HnswGetMetaPageInfo(Relation index, int *m, HnswElement *entryPoint); +void *HnswAlloc(HnswAllocator *allocator, Size size); +HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator *alloc); +HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); +void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, + FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing); +HnswCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, + bool loadVec, IndexScanDesc scan = NULL); +void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, + ForkNumber forkNum, bool building); +void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); +void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); +void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *alloc); +bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, const bool *isnull, ItemPointer heap_tid, + bool building); +void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, + bool checkExisting, bool building); +void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); +bool HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, + bool loadVec, float *maxDistance, IndexScanDesc scan = NULL); +void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate *hc, int lm, int lc, int *updateIdx, + Relation index, FmgrInfo *procinfo, Oid collation); +void HnswLoadNeighbors(HnswElement element, Relation index, int m); +const HnswTypeInfo *HnswGetTypeInfo(Relation index); +bool HnswDelete(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, bool isRollbackIndex); + +Datum hnswhandler(PG_FUNCTION_ARGS); +Datum hnswbuild(PG_FUNCTION_ARGS); +Datum hnswbuildempty(PG_FUNCTION_ARGS); +Datum hnswinsert(PG_FUNCTION_ARGS); +Datum hnswbulkdelete(PG_FUNCTION_ARGS); +Datum hnswvacuumcleanup(PG_FUNCTION_ARGS); +Datum hnswcostestimate(PG_FUNCTION_ARGS); +Datum hnswoptions(PG_FUNCTION_ARGS); +Datum hnswvalidate(PG_FUNCTION_ARGS); +Datum hnswbeginscan(PG_FUNCTION_ARGS); +Datum hnswrescan(PG_FUNCTION_ARGS); +Datum hnswgettuple(PG_FUNCTION_ARGS); +Datum hnswendscan(PG_FUNCTION_ARGS); +Datum hnswdelete(PG_FUNCTION_ARGS); +Datum hnsw_halfvec_support(PG_FUNCTION_ARGS); +Datum hnsw_bit_support(PG_FUNCTION_ARGS); +Datum hnsw_sparsevec_support(PG_FUNCTION_ARGS); + +/* Index access methods */ +IndexBuildResult *hnswbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo); +void hnswbuildempty_internal(Relation index); +bool hnswinsert_internal(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique); +IndexBulkDeleteResult *hnswbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState); +IndexBulkDeleteResult *hnswvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc hnswbeginscan_internal(Relation index, int nkeys, int norderbys); +void hnswrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool hnswgettuple_internal(IndexScanDesc scan, ScanDirection dir); +void hnswendscan_internal(IndexScanDesc scan); +bool hnswdelete_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, + bool isRollbackIndex); + +static inline HnswNeighborArray *HnswGetNeighbors(char *base, HnswElement element, int lc) +{ + HnswNeighborArrayPtr *neighborList = (HnswNeighborArrayPtr *)HnswPtrAccess(base, element->neighbors); + + Assert(element->level >= lc); + + return (HnswNeighborArray *)HnswPtrAccess(base, neighborList[lc]); +} + +/* Hash tables */ +typedef struct TidHashEntry { + ItemPointerData tid; + char status; +} TidHashEntry; + +#define SH_PREFIX tidhash +#define SH_ELEMENT_TYPE TidHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +typedef struct PointerHashEntry { + uintptr_t ptr; + char status; +} PointerHashEntry; + +#define SH_PREFIX pointerhash +#define SH_ELEMENT_TYPE PointerHashEntry +#define SH_KEY_TYPE uintptr_t +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +typedef struct OffsetHashEntry { + Size offset; + char status; +} OffsetHashEntry; + +#define SH_PREFIX offsethash +#define SH_ELEMENT_TYPE OffsetHashEntry +#define SH_KEY_TYPE Size +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +#endif diff --git a/src/include/access/datavec/ivfflat.h b/src/include/access/datavec/ivfflat.h new file mode 100644 index 0000000000..c722709b23 --- /dev/null +++ b/src/include/access/datavec/ivfflat.h @@ -0,0 +1,318 @@ +#ifndef IVFFLAT_H +#define IVFFLAT_H + +#include "postgres.h" + +#include "access/genam.h" +#include "access/generic_xlog.h" +#include "catalog/pg_operator.h" +#include "lib/pairingheap.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "sampling.h" +#include "utils/tuplesort.h" +#include "access/datavec/vector.h" +#include "postmaster/bgworker.h" + +#if PG_VERSION_NUM >= 150000 +#include "common/pg_prng.h" +#endif + +#ifdef IVFFLAT_BENCH +#include "portability/instr_time.h" +#endif + +#define IVFFLAT_MAX_DIM 2000 + +/* Support functions */ +#define IVFFLAT_DISTANCE_PROC 1 +#define IVFFLAT_NORM_PROC 2 +#define IVFFLAT_KMEANS_DISTANCE_PROC 3 +#define IVFFLAT_KMEANS_NORM_PROC 4 +#define IVFFLAT_TYPE_INFO_PROC 5 + +#define IVFFLAT_VERSION 1 +#define IVFFLAT_MAGIC_NUMBER 0x14FF1A7 +#define IVFFLAT_PAGE_ID 0xFF84 + +/* Preserved page numbers */ +#define IVFFLAT_METAPAGE_BLKNO 0 +#define IVFFLAT_HEAD_BLKNO 1 /* first list page */ + +/* IVFFlat parameters */ +#define IVFFLAT_DEFAULT_LISTS 100 +#define IVFFLAT_MIN_LISTS 1 +#define IVFFLAT_MAX_LISTS 32768 +#define IVFFLAT_DEFAULT_PROBES 1 + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_IVFFLAT_PHASE_KMEANS 2 +#define PROGRESS_IVFFLAT_PHASE_ASSIGN 3 +#define PROGRESS_IVFFLAT_PHASE_LOAD 4 + +#define IVFFLAT_LIST_SIZE(size) (offsetof(IvfflatListData, center) + (size)) + +#define IvfflatPageGetOpaque(page) ((IvfflatPageOpaque)PageGetSpecialPointer(page)) +#define IvfflatPageGetMeta(page) ((IvfflatMetaPageData *)PageGetContents(page)) + +#ifdef IVFFLAT_BENCH +#define IvfflatBench(name, code) \ + do { \ + instr_time start; \ + instr_time duration; \ + INSTR_TIME_SET_CURRENT(start); \ + (code); \ + INSTR_TIME_SET_CURRENT(duration); \ + INSTR_TIME_SUBTRACT(duration, start); \ + elog(INFO, "%s: %.3f ms", name, INSTR_TIME_GET_MILLISEC(duration)); \ + } while (0) +#else +#define IvfflatBench(name, code) (code) +#endif + +#if PG_VERSION_NUM >= 150000 +#define RandomDouble() pg_prng_double(&pg_global_prng_state) +#define RandomInt() pg_prng_uint32(&pg_global_prng_state) +#else +#define RandomDouble() (((double)random()) / MAX_RANDOM_VALUE) +#define RandomInt() random() +#endif + +typedef struct VectorArrayData { + int length; + int maxlen; + int dim; + Size itemsize; + char *items; +} VectorArrayData; + +typedef VectorArrayData *VectorArray; + +typedef struct ListInfo { + BlockNumber blkno; + OffsetNumber offno; +} ListInfo; + +/* IVFFlat index options */ +typedef struct IvfflatOptions { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int lists; /* number of lists */ +} IvfflatOptions; + +typedef struct IvfflatSpool { + Tuplesortstate *sortstate; + Relation heap; + Relation index; +} IvfflatSpool; + +typedef struct IvfflatShared { + /* Immutable state */ + Oid heaprelid; + Oid indexrelid; + int scantuplesortstates; + + /* Mutex for mutable state */ + slock_t mutex; + + /* Mutable state */ + int nparticipantsdone; + double reltuples; + double indtuples; + + Sharedsort *sharedsort; + Vector *ivfcenters; + int workmem; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; +#endif + ParallelHeapScanDescData heapdesc; // must come last +} IvfflatShared; + +#define ParallelTableScanFromIvfflatShared(shared) \ + (ParallelTableScanDesc)((char *)(shared) + BUFFERALIGN(sizeof(IvfflatShared))) + +typedef struct IvfflatLeader { + int nparticipanttuplesorts; + IvfflatShared *ivfshared; +} IvfflatLeader; + +typedef struct IvfflatTypeInfo { + int maxDimensions; + Datum (*normalize)(PG_FUNCTION_ARGS); + Size (*itemSize)(int dimensions); + void (*updateCenter)(Pointer v, int dimensions, float *x); + void (*sumCenter)(Pointer v, float *x); +} IvfflatTypeInfo; + +typedef struct IvfflatBuildState { + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + const IvfflatTypeInfo *typeInfo; + + /* Settings */ + int dimensions; + int lists; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + FmgrInfo *kmeansnormprocinfo; + Oid collation; + + /* Variables */ + VectorArray samples; + VectorArray centers; + ListInfo *listInfo; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; + double *listSums; + int *listCounts; +#endif + + /* Sampling */ + BlockSamplerData bs; + double rstate; + int rowstoskip; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + + /* Memory */ + MemoryContext tmpCtx; + + /* Parallel builds */ + IvfflatLeader *ivfleader; +} IvfflatBuildState; + +typedef struct IvfflatMetaPageData { + uint32 magicNumber; + uint32 version; + uint16 dimensions; + uint16 lists; +} IvfflatMetaPageData; + +typedef IvfflatMetaPageData *IvfflatMetaPage; + +typedef struct IvfflatPageOpaqueData { + BlockNumber nextblkno; + uint16 unused; + uint16 page_id; /* for identification of IVFFlat indexes */ +} IvfflatPageOpaqueData; + +typedef IvfflatPageOpaqueData *IvfflatPageOpaque; + +typedef struct IvfflatListData { + BlockNumber startPage; + BlockNumber insertPage; + Vector center; +} IvfflatListData; + +typedef IvfflatListData *IvfflatList; + +typedef struct IvfflatScanList { + pairingheap_node ph_node; + BlockNumber startPage; + double distance; +} IvfflatScanList; + +typedef struct IvfflatScanOpaqueData { + const IvfflatTypeInfo *typeInfo; + int probes; + int dimensions; + bool first; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + bool isnull; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + Datum (*distfunc)(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2); + + /* Lists */ + pairingheap *listQueue; + IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */ +} IvfflatScanOpaqueData; + +typedef IvfflatScanOpaqueData *IvfflatScanOpaque; + +#define VECTOR_ARRAY_SIZE(_length, _size) (sizeof(VectorArrayData) + (_length) * MAXALIGN(_size)) + +/* Use functions instead of macros to avoid double evaluation */ + +static inline Pointer VectorArrayGet(VectorArray arr, int offset) +{ + return ((char *)arr->items) + (offset * arr->itemsize); +} + +static inline void VectorArraySet(VectorArray arr, int offset, Pointer val) +{ + errno_t rc = memcpy_s(VectorArrayGet(arr, offset), VARSIZE_ANY(val), val, VARSIZE_ANY(val)); + securec_check(rc, "\0", "\0"); +} + +/* Methods */ +VectorArray VectorArrayInit(int maxlen, int dimensions, Size itemsize); +void VectorArrayFree(VectorArray arr); +void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo); +FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum); +Datum IvfflatNormValue(const IvfflatTypeInfo *typeInfo, Oid collation, Datum value); +bool IvfflatCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); +int IvfflatGetLists(Relation index); +void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions); +void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, + BlockNumber startPage, ForkNumber forkNum); +void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state); +void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum); +Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum); +void IvfflatInitPage(Buffer buf, Page page); +void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state); +PGDLLEXPORT void IvfflatParallelBuildMain(const BgWorkerContext *bwc); +void IvfflatInit(void); +const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index); + +Datum ivfflathandler(PG_FUNCTION_ARGS); +Datum ivfflatbuild(PG_FUNCTION_ARGS); +Datum ivfflatbuildempty(PG_FUNCTION_ARGS); +Datum ivfflatinsert(PG_FUNCTION_ARGS); +Datum ivfflatbulkdelete(PG_FUNCTION_ARGS); +Datum ivfflatvacuumcleanup(PG_FUNCTION_ARGS); +Datum ivfflatcostestimate(PG_FUNCTION_ARGS); +Datum ivfflatoptions(PG_FUNCTION_ARGS); +Datum ivfflatvalidate(PG_FUNCTION_ARGS); +Datum ivfflatbeginscan(PG_FUNCTION_ARGS); +Datum ivfflatrescan(PG_FUNCTION_ARGS); +Datum ivfflatgettuple(PG_FUNCTION_ARGS); +Datum ivfflatendscan(PG_FUNCTION_ARGS); +Datum ivfflat_halfvec_support(PG_FUNCTION_ARGS); +Datum ivfflat_bit_support(PG_FUNCTION_ARGS); + +/* Index access methods */ +IndexBuildResult *ivfflatbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo); +void ivfflatbuildempty_internal(Relation index); +bool ivfflatinsert_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique); +IndexBulkDeleteResult *ivfflatbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState); +IndexBulkDeleteResult *ivfflatvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc ivfflatbeginscan_internal(Relation index, int nkeys, int norderbys); +void ivfflatrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool ivfflatgettuple_internal(IndexScanDesc scan, ScanDirection dir); +void ivfflatendscan_internal(IndexScanDesc scan); + +#endif diff --git a/src/include/access/datavec/pg_prng.h b/src/include/access/datavec/pg_prng.h new file mode 100644 index 0000000000..73a53ce66a --- /dev/null +++ b/src/include/access/datavec/pg_prng.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------- + * + * Pseudo-Random Number Generator + * + * Copyright (c) 2021-2024, PostgreSQL Global Development Group + * + * src/include/common/pg_prng.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PRNG_H +#define PG_PRNG_H + +/* + * State vector for PRNG generation. Callers should treat this as an + * opaque typedef, but we expose its definition to allow it to be + * embedded in other structs. + */ +typedef struct pg_prng_state { + uint64 s0, s1; +} pg_prng_state; + +/* + * Callers not needing local PRNG series may use this global state vector, + * after initializing it with one of the pg_prng_...seed functions. + */ +extern PGDLLIMPORT pg_prng_state pg_global_prng_state; + +extern void pg_prng_seed(pg_prng_state *state, uint64 seed); +extern void pg_prng_fseed(pg_prng_state *state, double fseed); +extern bool pg_prng_seed_check(pg_prng_state *state); + +/* + * Initialize the PRNG state from the pg_strong_random source, + * taking care that we don't produce all-zeroes. If this returns false, + * caller should initialize the PRNG state from some other random seed, + * using pg_prng_[f]seed. + * + * We implement this as a macro, so that the pg_strong_random() call is + * in the caller. If it were in pg_prng.c, programs using pg_prng.c + * but not needing strong seeding would nonetheless be forced to pull in + * pg_strong_random.c and thence OpenSSL. + */ +#define pg_prng_strong_seed(state) \ + (pg_strong_random((void *)(state), sizeof(pg_prng_state)) ? pg_prng_seed_check(state) : false) + +extern uint64 pg_prng_uint64(pg_prng_state *state); +extern uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax); +extern int64 pg_prng_int64(pg_prng_state *state); +extern int64 pg_prng_int64p(pg_prng_state *state); +extern uint32 pg_prng_uint32(pg_prng_state *state); +extern int32 pg_prng_int32(pg_prng_state *state); +extern int32 pg_prng_int32p(pg_prng_state *state); +extern double pg_prng_double(pg_prng_state *state); +extern double pg_prng_double_normal(pg_prng_state *state); +extern bool pg_prng_bool(pg_prng_state *state); + +#endif /* PG_PRNG_H */ diff --git a/src/include/access/datavec/ryu_common.h b/src/include/access/datavec/ryu_common.h new file mode 100644 index 0000000000..3d61a5c06b --- /dev/null +++ b/src/include/access/datavec/ryu_common.h @@ -0,0 +1,146 @@ +/*--------------------------------------------------------------------------- + * + * Common routines for Ryu floating-point output. + * + * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/ryu_common.h + * + * This is a modification of code taken from github.com/ulfjack/ryu under the + * terms of the Boost license (not the Apache license). The original copyright + * notice follows: + * + * Copyright 2018 Ulf Adams + * + * The contents of this file may be used under the terms of the Apache + * License, Version 2.0. + * + * (See accompanying file LICENSE-Apache or copy at + * http://www.apache.org/licenses/LICENSE-2.0) + * + * Alternatively, the contents of this file may be used under the terms of the + * Boost Software License, Version 1.0. + * + * (See accompanying file LICENSE-Boost or copy at + * https://www.boost.org/LICENSE_1_0.txt) + * + * Unless required by applicable law or agreed to in writing, this software is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. + * + *--------------------------------------------------------------------------- + */ +#ifndef RYU_COMMON_H +#define RYU_COMMON_H + +/* + * Upstream Ryu's output is always the shortest possible. But we adjust that + * slightly to improve portability: we avoid outputting the exact midpoint + * value between two representable floats, since that relies on the reader + * getting the round-to-even rule correct, which seems to be the common + * failure mode. + * + * Defining this to 1 would restore the upstream behavior. + */ +#define STRICTLY_SHORTEST 0 + +#if SIZEOF_SIZE_T < 8 +#define RYU_32_BIT_PLATFORM +#endif + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', + '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', + '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', + '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', + '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', + '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', + '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', + '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', '9', '1', + '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'}; + +/* Returns e == 0 ? 1 : ceil(log_2(5^e)). */ +static inline uint32 pow5bits(const int32 e) +{ + /* + * This approximation works up to the point that the multiplication + * overflows at e = 3529. + * + * If the multiplication were done in 64 bits, it would fail at 5^4004 + * which is just greater than 2^9297. + */ + Assert(e >= 0); + Assert(e <= 3528); + return ((((uint32)e) * 1217359) >> 19) + 1; +} + +/* Returns floor(log_10(2^e)). */ +static inline int32 log10Pow2(const int32 e) +{ + /* + * The first value this approximation fails for is 2^1651 which is just + * greater than 10^297. + */ + Assert(e >= 0); + Assert(e <= 1650); + return (int32)((((uint32)e) * 78913) >> 18); +} + +/* Returns floor(log_10(5^e)). */ +static inline int32 log10Pow5(const int32 e) +{ + /* + * The first value this approximation fails for is 5^2621 which is just + * greater than 10^1832. + */ + Assert(e >= 0); + Assert(e <= 2620); + return (int32)((((uint32)e) * 732923) >> 20); +} + +static inline int copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa) +{ + errno_t rc = EOK; + if (mantissa) { + rc = memcpy_s(result, 3, "NaN", 3); + securec_check(rc, "\0", "\0"); + return 3; + } + if (sign) { + result[0] = '-'; + } + if (exponent) { + rc = memcpy_s(result + sign, 8, "Infinity", 8); + securec_check(rc, "\0", "\0"); + return sign + 8; + } + result[sign] = '0'; + return sign + 1; +} + +static inline uint32 float_to_bits(const float f) +{ + uint32 bits = 0; + errno_t rc = EOK; + + rc = memcpy_s(&bits, sizeof(float), &f, sizeof(float)); + securec_check(rc, "\0", "\0"); + return bits; +} + +static inline uint64 double_to_bits(const double d) +{ + uint64 bits = 0; + errno_t rc = EOK; + + rc = memcpy_s(&bits, sizeof(double), &d, sizeof(double)); + securec_check(rc, "\0", "\0"); + return bits; +} + +#endif /* RYU_COMMON_H */ diff --git a/src/include/access/datavec/sampling.h b/src/include/access/datavec/sampling.h new file mode 100644 index 0000000000..6d96ff49eb --- /dev/null +++ b/src/include/access/datavec/sampling.h @@ -0,0 +1,34 @@ +#ifndef SAMPLING_H +#define SAMPLING_H + +#include "access/datavec/pg_prng.h" +#include "storage/buf/block.h" + +extern void sampler_random_init_state(uint32 seed, pg_prng_state *randstate); +extern double sampler_random_fract(pg_prng_state *randstate); + +typedef struct { + BlockNumber N; /* number of blocks, known in advance */ + uint32 n; /* desired sample size */ + BlockNumber t; /* current block number */ + uint32 m; /* blocks selected so far */ + pg_prng_state randstate; /* random generator state */ +} BlockSamplerData2; + +typedef BlockSamplerData2 *BlockSampler2; + +extern BlockNumber BlockSampler_Init2(BlockSampler2 bs, BlockNumber nblocks, int samplesize, uint32 randseed); +extern bool BlockSampler_HasMore2(BlockSampler2 bs); +extern BlockNumber BlockSampler_Next2(BlockSampler2 bs); + +typedef struct { + double W; + pg_prng_state randstate; /* random generator state */ +} ReservoirStateData; + +typedef ReservoirStateData *ReservoirState; + +extern void reservoir_init_selection_state(ReservoirState rs, int n); +extern double reservoir_get_next_S(ReservoirState rs, double t, int n); + +#endif /* SAMPLING_H */ diff --git a/src/include/access/datavec/shortest_dec.h b/src/include/access/datavec/shortest_dec.h new file mode 100644 index 0000000000..b0312b6f24 --- /dev/null +++ b/src/include/access/datavec/shortest_dec.h @@ -0,0 +1,9 @@ +#ifndef SHORTEST_DEC_H +#define SHORTEST_DEC_H + +#define FLOAT_SHORTEST_DECIMAL_LEN 16 + +int FloatToShortestDecimalBufn(float f, char *result); +int FloatToShortestDecimalBuf(float f, char *result); + +#endif /* SHORTEST_DEC_H */ diff --git a/src/include/access/datavec/sparsevec.h b/src/include/access/datavec/sparsevec.h new file mode 100644 index 0000000000..a1a2fb9fdd --- /dev/null +++ b/src/include/access/datavec/sparsevec.h @@ -0,0 +1,64 @@ +#ifndef SPARSEVEC_H +#define SPARSEVEC_H + +#define SPARSEVEC_MAX_DIM 1000000000 +#define SPARSEVEC_MAX_NNZ 16000 + +#define DatumGetSparseVector(x) ((SparseVector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_SPARSEVEC_P(x) DatumGetSparseVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_SPARSEVEC_P(x) PG_RETURN_POINTER(x) + +/* + * Indices use 0-based numbering for the on-disk (and binary) format (consistent with C) + * and are always sorted. Values come after indices. + */ + +Datum sparsevec_in(PG_FUNCTION_ARGS); +Datum sparsevec_out(PG_FUNCTION_ARGS); +Datum sparsevec_typmod_in(PG_FUNCTION_ARGS); +Datum sparsevec_recv(PG_FUNCTION_ARGS); +Datum sparsevec_send(PG_FUNCTION_ARGS); +Datum sparsevec_l2_distance(PG_FUNCTION_ARGS); +Datum sparsevec_inner_product(PG_FUNCTION_ARGS); +Datum sparsevec_cosine_distance(PG_FUNCTION_ARGS); +Datum sparsevec_l1_distance(PG_FUNCTION_ARGS); +Datum sparsevec_l2_norm(PG_FUNCTION_ARGS); +Datum sparsevec_l2_normalize(PG_FUNCTION_ARGS); +Datum sparsevec_lt(PG_FUNCTION_ARGS); +Datum sparsevec_le(PG_FUNCTION_ARGS); +Datum sparsevec_eq(PG_FUNCTION_ARGS); +Datum sparsevec_ne(PG_FUNCTION_ARGS); +Datum sparsevec_ge(PG_FUNCTION_ARGS); +Datum sparsevec_gt(PG_FUNCTION_ARGS); +Datum sparsevec_cmp(PG_FUNCTION_ARGS); +Datum sparsevec_l2_squared_distance(PG_FUNCTION_ARGS); +Datum sparsevec_negative_inner_product(PG_FUNCTION_ARGS); +Datum sparsevec(PG_FUNCTION_ARGS); +Datum vector_to_sparsevec(PG_FUNCTION_ARGS); +Datum sparsevec_to_vector(PG_FUNCTION_ARGS); +Datum halfvec_to_sparsevec(PG_FUNCTION_ARGS); +Datum sparsevec_to_halfvec(PG_FUNCTION_ARGS); + +typedef struct SparseVector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 dim; /* number of dimensions */ + int32 nnz; /* number of non-zero elements */ + int32 unused; /* reserved for future use, always zero */ + int32 indices[FLEXIBLE_ARRAY_MEMBER]; +} SparseVector; + +/* Use functions instead of macros to avoid double evaluation */ + +static inline Size SPARSEVEC_SIZE(int nnz) +{ + return offsetof(SparseVector, indices) + (nnz * sizeof(int32)) + (nnz * sizeof(float)); +} + +static inline float *SPARSEVEC_VALUES(SparseVector *x) +{ + return (float *)(((char *)x) + offsetof(SparseVector, indices) + (x->nnz * sizeof(int32))); +} + +SparseVector *InitSparseVector(int dim, int nnz); + +#endif diff --git a/src/include/access/datavec/vecindex.h b/src/include/access/datavec/vecindex.h new file mode 100644 index 0000000000..73ff5af5a1 --- /dev/null +++ b/src/include/access/datavec/vecindex.h @@ -0,0 +1,23 @@ +#ifndef VECINDEX_H +#define VECINDEX_H + +#define MIN(A, B) ((B) < (A) ? (B) : (A)) +#define MAX(A, B) ((B) > (A) ? (B) : (A)) + +#define VecIndexTupleGetXid(itup) (((char *)(itup)) + HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(&(itup)->data))) + +struct VectorScanData { + /* + * used in ustore only, indicate the last returned index tuple which is modified + * by current transaction. see VecVisibilityCheckCid() for more information. + */ + char *lastSelfModifiedItup; + uint16 lastSelfModifiedItupBufferSize; + Buffer buf; +}; + +bool VecItupGetXminXmax(Page page, OffsetNumber offnum, TransactionId oldest_xmin, TransactionId *xmin, + TransactionId *xmax, bool *xminCommitted, bool *xmaxCommitted, bool isToast); +bool VecVisibilityCheck(IndexScanDesc scan, Page page, OffsetNumber offnum, bool *needRecheck); + +#endif // VECINDEX_H diff --git a/src/include/access/datavec/vector.h b/src/include/access/datavec/vector.h new file mode 100644 index 0000000000..74c16feae7 --- /dev/null +++ b/src/include/access/datavec/vector.h @@ -0,0 +1,71 @@ +#ifndef VECTOR_H +#define VECTOR_H + +#define VECTOR_MAX_DIM 16000 +#define MEM_INFO_NUM (1024 * 1024) + +#define VECTOR_SIZE(_dim) (offsetof(Vector, x) + sizeof(float) * (_dim)) +#define DatumGetVector(x) ((Vector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_VECTOR_P(x) DatumGetVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_VECTOR_P(x) PG_RETURN_POINTER(x) +#define UpdateProgress(index, val) ((void)(val)) + +typedef struct Vector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int16 dim; /* number of dimensions */ + int16 unused; /* reserved for future use, always zero */ + float x[FLEXIBLE_ARRAY_MEMBER]; +} Vector; + +Vector *InitVector(int dim); +void PrintVector(char *msg, Vector *vector); +int vector_cmp_internal(Vector *a, Vector *b); +void LogNewpageRange(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std); +int PlanCreateIndexWorkers(Relation heapRelation, IndexInfo *indexInfo); + +Datum vector_in(PG_FUNCTION_ARGS); +Datum vector_out(PG_FUNCTION_ARGS); +Datum vector_typmod_in(PG_FUNCTION_ARGS); +Datum vector_recv(PG_FUNCTION_ARGS); +Datum vector_send(PG_FUNCTION_ARGS); +Datum vector(PG_FUNCTION_ARGS); +Datum array_to_vector(PG_FUNCTION_ARGS); +Datum vector_to_float4(PG_FUNCTION_ARGS); +Datum l2_distance(PG_FUNCTION_ARGS); +Datum vector_l2_squared_distance(PG_FUNCTION_ARGS); +Datum inner_product(PG_FUNCTION_ARGS); +Datum vector_negative_inner_product(PG_FUNCTION_ARGS); +Datum cosine_distance(PG_FUNCTION_ARGS); +Datum vector_spherical_distance(PG_FUNCTION_ARGS); +Datum vector_dims(PG_FUNCTION_ARGS); +Datum vector_norm(PG_FUNCTION_ARGS); +Datum vector_add(PG_FUNCTION_ARGS); +Datum vector_sub(PG_FUNCTION_ARGS); +Datum vector_le(PG_FUNCTION_ARGS); +Datum vector_lt(PG_FUNCTION_ARGS); +Datum vector_eq(PG_FUNCTION_ARGS); +Datum vector_ne(PG_FUNCTION_ARGS); +Datum vector_ge(PG_FUNCTION_ARGS); +Datum vector_gt(PG_FUNCTION_ARGS); +Datum vector_cmp(PG_FUNCTION_ARGS); +Datum vector_accum(PG_FUNCTION_ARGS); +Datum vector_combine(PG_FUNCTION_ARGS); +Datum vector_avg(PG_FUNCTION_ARGS); +Datum l1_distance(PG_FUNCTION_ARGS); +Datum l2_normalize(PG_FUNCTION_ARGS); +Datum binary_quantize(PG_FUNCTION_ARGS); +Datum subvector(PG_FUNCTION_ARGS); +Datum vector_mul(PG_FUNCTION_ARGS); +Datum vector_concat(PG_FUNCTION_ARGS); +void set_extension_index(uint32 index); +void init_session_vars(void); + +typedef struct datavec_session_context { + int hnsw_ef_search; + int ivfflat_probes; +} datavec_session_context; + +extern uint32 datavec_index; +extern datavec_session_context *get_session_context(); + +#endif diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index 6cd0082ebd..c362b96411 100644 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -485,7 +485,9 @@ DATA(insert ( 9990 tdigest_merge tdigest_merge_to_one calculate_quantile_of DATA(insert ( 9986 tdigest_mergep tdigest_merge_to_one calculate_value_at 0 4406 _null_ _null_ n 0)); #define ADDTDIGESTMERGEPOID 9986 - +/*vector aggregate function*/ +DATA(insert ( 8241 vector_accum vector_combine vector_avg 0 1022 "{0}" "{0}" n 0)); +DATA(insert ( 8242 vector_add vector_add - 0 8305 _null_ _null_ n 0)); /* * prototypes for functions in pg_aggregate.c */ diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 7e77b1eec6..4340bb5089 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -156,7 +156,13 @@ DATA(insert OID = 4439 ( ubtree 5 3 t f t t t t t t f t t 0 ubtinsert ubtbegin DESCR("ustore b-tree index access method"); #define UBTREE_AM_OID 4439 -#define HNSW_AM_OID 4446 +DATA(insert OID = 8300 ( hnsw 0 3 f t f f f t f f f f f 0 hnswinsert hnswbeginscan hnswgettuple - hnswrescan hnswendscan - - - hnswbuild hnswbuildempty hnswbulkdelete hnswvacuumcleanup - hnswcostestimate hnswoptions - -)); +DESCR("hnsw index access method"); +#define HNSW_AM_OID 8300 + +DATA(insert OID = 8301 ( ivfflat 0 5 f t f f f t f f f f f 0 ivfflatinsert ivfflatbeginscan ivfflatgettuple - ivfflatrescan ivfflatendscan - - - ivfflatbuild ivfflatbuildempty ivfflatbulkdelete ivfflatvacuumcleanup - ivfflatcostestimate ivfflatoptions - -)); +DESCR("ivfflat index access method"); +#define IVFFLAT_AM_OID 8301 #define OID_IS_BTREE(oid) ((oid) == BTREE_AM_OID || (oid) == UBTREE_AM_OID) diff --git a/src/include/catalog/pg_amop.data b/src/include/catalog/pg_amop.data index ae6fdb342e..183993ea5b 100644 --- a/src/include/catalog/pg_amop.data +++ b/src/include/catalog/pg_amop.data @@ -1594,3 +1594,27 @@ DATA(insert OID = 7272 ( 9570 9003 9003 2 s 5553 4439 0 )); DATA(insert OID = 7273 ( 9570 9003 9003 3 s 5550 4439 0 )); DATA(insert OID = 7274 ( 9570 9003 9003 4 s 5549 4439 0 )); DATA(insert OID = 7275 ( 9570 9003 9003 5 s 5554 4439 0 )); +DATA(insert OID = 6031 ( 8371 8305 8305 1 o 8311 8300 1970 )); +DATA(insert OID = 6041 ( 8372 8305 8305 1 o 8312 8300 1970 )); +DATA(insert OID = 6051 ( 8373 8305 8305 1 o 8313 8300 1970 )); +DATA(insert OID = 6061 ( 8374 8305 8305 1 o 8314 8300 1970 )); +DATA(insert OID = 6091 ( 8381 8307 8307 1 o 8319 8300 1970 )); +DATA(insert OID = 6095 ( 8382 8307 8307 1 o 8320 8300 1970 )); +DATA(insert OID = 6101 ( 8383 8307 8307 1 o 8321 8300 1970 )); +DATA(insert OID = 6103 ( 8384 8307 8307 1 o 8322 8300 1970 )); +DATA(insert OID = 6107 ( 8379 1560 1560 1 o 8324 8300 1970 )); +DATA(insert OID = 6109 ( 8380 1560 1560 1 o 8323 8300 1970 )); +DATA(insert OID = 6111 ( 8385 8305 8305 1 o 8311 8301 1970 )); +DATA(insert OID = 6115 ( 8386 8305 8305 1 o 8312 8301 1970 )); +DATA(insert OID = 6119 ( 8387 8305 8305 1 o 8313 8301 1970 )); +DATA(insert OID = 6147 ( 8394 1560 1560 1 o 8323 8301 1970 )); +DATA(insert OID = 8980 ( 8392 8305 8305 1 s 8327 403 0 )); +DATA(insert OID = 8981 ( 8392 8305 8305 2 s 8328 403 0 )); +DATA(insert OID = 8982 ( 8392 8305 8305 3 s 8331 403 0 )); +DATA(insert OID = 8983 ( 8392 8305 8305 4 s 8330 403 0 )); +DATA(insert OID = 8984 ( 8392 8305 8305 5 s 8329 403 0 )); +DATA(insert OID = 8993 ( 8397 8307 8307 1 s 8333 403 0 )); +DATA(insert OID = 8994 ( 8397 8307 8307 2 s 8334 403 0 )); +DATA(insert OID = 8995 ( 8397 8307 8307 3 s 8337 403 0 )); +DATA(insert OID = 8996 ( 8397 8307 8307 4 s 8336 403 0 )); +DATA(insert OID = 8997 ( 8397 8307 8307 5 s 8335 403 0 )); \ No newline at end of file diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 138c7d54ea..73755d729c 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -655,4 +655,46 @@ DATA(insert ( 8901 3831 3831 1 3870 )); DATA(insert ( 8626 3614 3614 1 3622 )); DATA(insert ( 8683 3615 3615 1 3668 )); +DATA(insert OID = 8924 ( 8371 8305 8305 1 8431 )); +DATA(insert OID = 8925 ( 8372 8305 8305 1 8434 )); +DATA(insert OID = 8926 ( 8373 8305 8305 1 8434 )); +DATA(insert OID = 8947 ( 8373 8305 8305 2 8438 )); +DATA(insert OID = 8927 ( 8374 8305 8305 1 8436 )); + +DATA(insert OID = 8932 ( 8379 1560 1560 1 8468 )); +DATA(insert OID = 8975 ( 8379 1560 1560 3 8209 )); + +DATA(insert OID = 8933 ( 8380 1560 1560 1 8469 )); +DATA(insert OID = 8976 ( 8380 1560 1560 3 8209 )); + +DATA(insert OID = 8934 ( 8381 8307 8307 1 8470 )); +DATA(insert OID = 8954 ( 8381 8307 8307 3 8479 )); + +DATA(insert OID = 8935 ( 8382 8307 8307 1 8463 )); +DATA(insert OID = 8955 ( 8382 8307 8307 3 8479 )); + +DATA(insert OID = 8936 ( 8383 8307 8307 1 8463 )); +DATA(insert OID = 8956 ( 8383 8307 8307 2 8478 )); +DATA(insert OID = 8957 ( 8383 8307 8307 3 8479 )); + +DATA(insert OID = 8937 ( 8384 8307 8307 1 8467 )); +DATA(insert OID = 8958 ( 8384 8307 8307 3 8479 )); + +DATA(insert OID = 8938 ( 8385 8305 8305 1 8431 )); +DATA(insert OID = 8939 ( 8385 8305 8305 3 8433 )); + +DATA(insert OID = 8940 ( 8386 8305 8305 1 8434 )); +DATA(insert OID = 8941 ( 8386 8305 8305 3 8432 )); +DATA(insert OID = 8942 ( 8386 8305 8305 4 8438 )); + +DATA(insert OID = 8943 ( 8387 8305 8305 1 8434 )); +DATA(insert OID = 8944 ( 8387 8305 8305 2 8438 )); +DATA(insert OID = 8945 ( 8387 8305 8305 3 8432 )); +DATA(insert OID = 8946 ( 8387 8305 8305 4 8438 )); + +DATA(insert OID = 8953 ( 8394 1560 1560 1 8469 )); +DATA(insert OID = 8973 ( 8394 1560 1560 3 8469 )); +DATA(insert OID = 8974 ( 8394 1560 1560 5 8210 )); +DATA(insert OID = 8985 ( 8392 8305 8305 1 8450 )); +DATA(insert OID = 8998 ( 8397 8307 8307 1 8464 )); #endif /* PG_AMPROC_H */ diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index e2532011f3..40eef589a4 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -596,4 +596,16 @@ DATA(insert ( 1042 3272 3314 i f _null_)); DATA(insert ( 3272 3969 3323 i f _null_)); DATA(insert ( 3969 3272 3321 i f _null_)); +/* vector <-> int[],float4[],float8[],numeric[] */ +DATA(insert OID = 8299 ( 8305 8305 8214 i f _null_)); +DATA(insert OID = 8298 ( 1007 8305 8215 a f _null_)); +DATA(insert OID = 8297 ( 1021 8305 8216 a f _null_)); +DATA(insert OID = 8296 ( 1022 8305 8217 a f _null_)); +DATA(insert OID = 8295 ( 1231 8305 8218 a f _null_)); +DATA(insert OID = 8294 ( 8305 1021 8219 i f _null_)); + +/* sparsevec <-> int[],float4[],float8[],numeric[] halfvector,vector*/ +DATA(insert OID = 8285 ( 8307 8307 8228 i f _null_)); +DATA(insert OID = 8284 ( 8305 8307 8229 i f _null_)); +DATA(insert OID = 8283 ( 8307 8305 8230 a f _null_)); #endif /* PG_CAST_H */ diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index 7f83a4c8e2..e566732b60 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -375,5 +375,28 @@ DATA(insert ( 405 settext_ops PGNSP PGUID 1995 3272 f 0 )); DATA(insert ( 4439 setasint_ops PGNSP PGUID 6976 3272 t 0 )); DATA(insert ( 405 set_ops PGNSP PGUID 8646 3272 t 0 )); +DATA(insert OID = 8900 (8300 vector_l2_ops PGNSP PGUID 8371 8305 f 0)); +DATA(insert OID = 8999 (8300 vector_ip_ops PGNSP PGUID 8372 8305 f 0)); +DATA(insert OID = 8902 (8300 vector_cosine_ops PGNSP PGUID 8373 8305 f 0)); +DATA(insert OID = 8903 (8300 vector_l1_ops PGNSP PGUID 8374 8305 f 0)); + +DATA(insert OID = 8908 (8300 bit_jaccard_ops PGNSP PGUID 8379 1560 f 0)); +DATA(insert OID = 8909 (8300 bit_hamming_ops PGNSP PGUID 8380 1560 f 0)); + +DATA(insert OID = 8910 (8300 sparsevec_l2_ops PGNSP PGUID 8381 8307 f 0)); +DATA(insert OID = 8911 (8300 sparsevec_ip_ops PGNSP PGUID 8382 8307 f 0)); +DATA(insert OID = 8912 (8300 sparsevec_cosine_ops PGNSP PGUID 8383 8307 f 0)); +DATA(insert OID = 8913 (8300 sparsevec_l1_ops PGNSP PGUID 8384 8307 f 0)); + +DATA(insert OID = 8914 (8301 vector_l2_ops PGNSP PGUID 8385 8305 t 0)); +DATA(insert OID = 8915 (8301 vector_ip_ops PGNSP PGUID 8386 8305 f 0)); +DATA(insert OID = 8916 (8301 vector_cosine_ops PGNSP PGUID 8387 8305 f 0)); + +DATA(insert OID = 8923 (8301 bit_hamming_ops PGNSP PGUID 8394 1560 f 0)); + +DATA(insert OID = 8977 (403 vector_ops PGNSP PGUID 8392 8305 t 0)); + +DATA(insert OID = 8979 (403 sparsevec_ops PGNSP PGUID 8397 8307 t 0)); + #endif /* PG_OPCLASS_H */ diff --git a/src/include/catalog/pg_operator.data b/src/include/catalog/pg_operator.data index 67e260f310..9e17ea18e4 100644 --- a/src/include/catalog/pg_operator.data +++ b/src/include/catalog/pg_operator.data @@ -1918,6 +1918,63 @@ DESCR("greater than or equal"); DATA(insert OID = 6565 ("-" PGNSP PGUID l f f 0 16 16 0 0 boolum - -)); DESCR("negate"); +DATA(insert OID = 8311 ("<->" PGNSP PGUID b f f 8305 8305 701 8311 0 8433 - -)); +DESCR("l2_distance"); +DATA(insert OID = 8312 ("<#>" PGNSP PGUID b f f 8305 8305 701 8312 0 vector_negative_inner_product - -)); +DESCR("vector_negative_inner_product"); +DATA(insert OID = 8313 ("<=>" PGNSP PGUID b f f 8305 8305 701 8313 0 8435 - -)); +DESCR("cosine_distance"); +DATA(insert OID = 8314 ("<+>" PGNSP PGUID b f f 8305 8305 701 8314 0 8436 - -)); +DESCR("l1_distance"); +DATA(insert OID = 8339 ("||" PGNSP PGUID b f f 8305 8305 8305 0 0 vector_concat - -)); +DESCR("l1_distance"); + +DATA(insert OID = 8319 ("<->" PGNSP PGUID b f f 8307 8307 701 8319 0 8465 - -)); +DESCR("sparsevec_l2_distance"); +DATA(insert OID = 8320 ("<#>" PGNSP PGUID b f f 8307 8307 701 8320 0 sparsevec_negative_inner_product - -)); +DESCR("sparsevec_negative_inner_product"); +DATA(insert OID = 8321 ("<=>" PGNSP PGUID b f f 8307 8307 701 8321 0 8466 - -)); +DESCR("sparsevec_cosine_distance"); +DATA(insert OID = 8322 ("<+>" PGNSP PGUID b f f 8307 8307 701 8322 0 8467 - -)); +DESCR("sparsevec_l1_distance"); + +DATA(insert OID = 8323 ("<~>" PGNSP PGUID b f f 1560 1560 701 8323 0 hamming_distance - -)); +DESCR("jaccard_distance"); +DATA(insert OID = 8324 ("<%>" PGNSP PGUID b f f 1560 1560 701 8324 0 jaccard_distance - -)); +DESCR("hamming_distance"); + +DATA(insert OID = 8325 ("+" PGNSP PGUID b f f 8305 8305 8305 8325 0 vector_add 0 0)); +DESCR("vector_add"); +DATA(insert OID = 8326 ("-" PGNSP PGUID b f f 8305 8305 8305 8326 0 vector_sub 0 0)); +DESCR("vector_sub"); +DATA(insert OID = 8349 ("*" PGNSP PGUID b f f 8305 8305 8305 8349 0 8203 0 0)); +DESCR("vector_mul"); +DATA(insert OID = 8327 ("<" PGNSP PGUID b f f 8305 8305 16 8329 8330 vector_lt scalarltsel scalarltjoinsel)); +DESCR("vector less than"); +DATA(insert OID = 8328 ("<=" PGNSP PGUID b f f 8305 8305 16 8330 8329 vector_le scalarltsel scalarltjoinsel)); +DESCR("vector less than or equal"); +DATA(insert OID = 8329 (">" PGNSP PGUID b f f 8305 8305 16 8327 8328 vector_gt scalargtsel scalargtjoinsel)); +DESCR("vector greater than"); +DATA(insert OID = 8330 (">=" PGNSP PGUID b f f 8305 8305 16 8328 8327 vector_ge scalargtsel scalargtjoinsel)); +DESCR("vector greater than or equal"); +DATA(insert OID = 8331 ("=" PGNSP PGUID b f t 8305 8305 16 8331 8332 vector_eq eqsel eqjoinsel)); +DESCR("vector equal"); +DATA(insert OID = 8332 ("<>" PGNSP PGUID b f f 8305 8305 16 8332 8331 vector_ne neqsel neqjoinsel)); +DESCR("vector unequal"); + +DATA(insert OID = 8333 ("<" PGNSP PGUID b f f 8307 8307 16 8335 8336 sparsevec_lt scalarltsel scalarltjoinsel)); +DESCR("sparsevec less than"); +DATA(insert OID = 8334 ("<=" PGNSP PGUID b f f 8307 8307 16 8336 8335 sparsevec_le scalarltsel scalarltjoinsel)); +DESCR("sparsevec less than or equal"); +DATA(insert OID = 8335 (">" PGNSP PGUID b f f 8307 8307 16 8333 8334 sparsevec_gt scalargtsel scalargtjoinsel)); +DESCR("sparsevec greater than"); +DATA(insert OID = 8336 (">=" PGNSP PGUID b f f 8307 8307 16 8334 8333 sparsevec_ge scalargtsel scalargtjoinsel)); +DESCR("sparsevec greater than or equal"); +DATA(insert OID = 8337 ("=" PGNSP PGUID b f t 8307 8307 16 8337 8338 sparsevec_eq eqsel eqjoinsel)); +DESCR("sparsevec equal"); +DATA(insert OID = 8338 ("<>" PGNSP PGUID b f f 8307 8307 16 8338 8337 sparsevec_ne neqsel neqjoinsel)); +DESCR("sparsevec unequal"); + /* * function prototypes */ \ No newline at end of file diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 740f061d18..9f2013d839 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -199,6 +199,28 @@ DATA(insert OID = 4262 (4239 int1_ops PGNSP PGUID)); DATA(insert OID = 4263 (4239 bool_ops PGNSP PGUID)); DATA(insert OID = 4264 (4239 smalldatetime_ops PGNSP PGUID)); +/*datavec index ops*/ +DATA(insert OID = 8371 (8300 vector_l2_ops PGNSP PGUID)); +DATA(insert OID = 8372 (8300 vector_ip_ops PGNSP PGUID)); +DATA(insert OID = 8373 (8300 vector_cosine_ops PGNSP PGUID)); +DATA(insert OID = 8374 (8300 vector_l1_ops PGNSP PGUID)); + +DATA(insert OID = 8379 (8300 bit_jaccard_ops PGNSP PGUID)); +DATA(insert OID = 8380 (8300 bit_hamming_ops PGNSP PGUID)); + +DATA(insert OID = 8381 (8300 sparsevec_l2_ops PGNSP PGUID)); +DATA(insert OID = 8382 (8300 sparsevec_ip_ops PGNSP PGUID)); +DATA(insert OID = 8383 (8300 sparsevec_cosine_ops PGNSP PGUID)); +DATA(insert OID = 8384 (8300 sparsevec_l1_ops PGNSP PGUID)); + +DATA(insert OID = 8385 (8301 vector_l2_ops PGNSP PGUID)); +DATA(insert OID = 8386 (8301 vector_ip_ops PGNSP PGUID)); +DATA(insert OID = 8387 (8301 vector_cosine_ops PGNSP PGUID)); + +DATA(insert OID = 8394 (8301 bit_hamming_ops PGNSP PGUID)); +DATA(insert OID = 8392 (403 vector_ops PGNSP PGUID)); +DATA(insert OID = 8397 (403 sparsevec_ops PGNSP PGUID)); + /* ubtree index */ #define BTREE_UBTREE_FAM_OID_DIFF 5000 #define BTREE_UBTREE_FAM_OID_SPECIAL_DIFF 4000 diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 513ea4d1fb..4a1e153063 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -822,6 +822,19 @@ DATA(insert OID = 3272 ( anyset PGNSP PGUID -1 f s H t t \054 0 0 0 anyset_in DATA(insert OID = 4408 ( undefined PGNSP PGUID -2 f u W f t \054 0 0 0 undefinedin undefinedout undefinedrecv undefinedsend - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("undefined objects as PLSQL compilation time"); #define UNDEFINEDOID 4408 + +DATA(insert OID = 8305 (vector PGNSP PGUID -1 f b U f t \054 0 0 8308 vector_in vector_out vector_recv vector_send vector_typmod_in - - i e f 0 -1 0 0 _null_ _null_ _null_)); +#define VECTOROID 8305 + +DATA(insert OID = 8307 (sparsevec PGNSP PGUID -1 f b U f t \054 0 0 8310 sparsevec_in sparsevec_out sparsevec_recv sparsevec_send sparsevec_typmod_in - - i e f 0 -1 0 0 _null_ _null_ _null_)); +#define SPARSEVECTOROID 8307 + +DATA(insert OID = 8308 ( _vector PGNSP PGUID -1 f b A f t \054 0 8305 0 array_in array_out array_recv array_send vector_typmod_in - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +#define VECTORARRAYOID 8308 + +DATA(insert OID = 8310 ( _sparsevec PGNSP PGUID -1 f b A f t \054 0 8307 0 array_in array_out array_recv array_send sparsevec_typmod_in - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +#define SPARSEVECARRAYOID 8310 + /* * macros */ diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h index 663efea7e1..41195ca7b0 100644 --- a/src/include/knl/knl_session.h +++ b/src/include/knl/knl_session.h @@ -3008,6 +3008,11 @@ typedef struct knl_u_ndp_context { char *crl_path; } knl_u_ndp_context; +typedef struct knl_u_datavec_context { + int hnsw_ef_search; + int ivfflat_probes; +} knl_u_datavec_context; + typedef struct knl_session_context { volatile knl_session_status status; /* used for threadworker, elem in m_readySessionList */ @@ -3162,6 +3167,8 @@ typedef struct knl_session_context { /* standby write. */ knl_u_libsw_context libsw_cxt; + knl_u_datavec_context datavec_ctx; + } knl_session_context; enum stp_xact_err_type { diff --git a/src/test/regress/expected/opr_sanity_2.out b/src/test/regress/expected/opr_sanity_2.out index c065a44183..4c1896e175 100644 --- a/src/test/regress/expected/opr_sanity_2.out +++ b/src/test/regress/expected/opr_sanity_2.out @@ -261,7 +261,17 @@ ORDER BY 1, 2, 3; 4439 | 5 | ~>~ 4444 | 1 | @@ 4444 | 2 | @@@ -(80 rows) + 8300 | 1 | <#> + 8300 | 1 | <%> + 8300 | 1 | <+> + 8300 | 1 | <-> + 8300 | 1 | <=> + 8300 | 1 | <~> + 8301 | 1 | <#> + 8301 | 1 | <-> + 8301 | 1 | <=> + 8301 | 1 | <~> +(90 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing @@ -419,7 +429,8 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND p4.amprocrighttype = p3.amprocrighttype) NOT BETWEEN (CASE WHEN p1.amname IN ('gist', 'gin') THEN p1.amsupport - 1 - WHEN p1.amname IN ('btree', 'ubtree') THEN p1.amsupport - 2 + WHEN p1.amname IN ('btree', 'ubtree', 'hnsw') THEN p1.amsupport - 2 + WHEN p1.amname = 'ivfflat' THEN p1.amsupport - 3 ELSE p1.amsupport END) AND p1.amsupport; amname | opfname | amproclefttype | amprocrighttype @@ -434,6 +445,7 @@ FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype WHERE am.amname <> 'btree' AND am.amname <> 'gist' AND am.amname <> 'gin' AND am.amname <> 'ubtree' + AND am.amname <> 'hnsw' AND am.amname <> 'ivfflat' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; amname | opcname | count diff --git a/src/test/regress/expected/single_node_test_null_operator.out b/src/test/regress/expected/single_node_test_null_operator.out index bc2b1f3404..6218306bf6 100644 --- a/src/test/regress/expected/single_node_test_null_operator.out +++ b/src/test/regress/expected/single_node_test_null_operator.out @@ -5,15 +5,15 @@ LINE 1: SELECT 1 <=> 1; ^ HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. SELECT '' <=> NULL; -ERROR: operator does not exist: unknown <=> unknown +ERROR: operator is not unique: unknown <=> unknown LINE 1: SELECT '' <=> NULL; ^ -HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. +HINT: Could not choose a best candidate operator. You might need to add explicit type casts. SELECT NULL <=> NULL; -ERROR: operator does not exist: unknown <=> unknown +ERROR: operator is not unique: unknown <=> unknown LINE 1: SELECT NULL <=> NULL; ^ -HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. +HINT: Could not choose a best candidate operator. You might need to add explicit type casts. SELECT (1,2) <=> (1,2); ERROR: operator does not exist: integer <=> integer LINE 1: SELECT (1,2) <=> (1,2); diff --git a/src/test/regress/pg_regress.cpp b/src/test/regress/pg_regress.cpp index 825c7ef19e..8701807d0d 100644 --- a/src/test/regress/pg_regress.cpp +++ b/src/test/regress/pg_regress.cpp @@ -5467,7 +5467,7 @@ static void CheckCleanCodeWarningInfo(const int baseNum, const int currentNum, return; } -#define BASE_GLOBAL_VARIABLE_NUM 237 +#define BASE_GLOBAL_VARIABLE_NUM 240 #define CMAKE_CMD_BUF_LEN 1000 diff --git a/src/test/regress/sql/opr_sanity_2.sql b/src/test/regress/sql/opr_sanity_2.sql index 8fc4d2a7aa..97edf33c2b 100644 --- a/src/test/regress/sql/opr_sanity_2.sql +++ b/src/test/regress/sql/opr_sanity_2.sql @@ -286,7 +286,8 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND p4.amprocrighttype = p3.amprocrighttype) NOT BETWEEN (CASE WHEN p1.amname IN ('gist', 'gin') THEN p1.amsupport - 1 - WHEN p1.amname IN ('btree', 'ubtree') THEN p1.amsupport - 2 + WHEN p1.amname IN ('btree', 'ubtree', 'hnsw') THEN p1.amsupport - 2 + WHEN p1.amname = 'ivfflat' THEN p1.amsupport - 3 ELSE p1.amsupport END) AND p1.amsupport; @@ -299,6 +300,7 @@ FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype WHERE am.amname <> 'btree' AND am.amname <> 'gist' AND am.amname <> 'gin' AND am.amname <> 'ubtree' + AND am.amname <> 'hnsw' AND am.amname <> 'ivfflat' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; -- Gitee