diff --git a/GNUmakefile.in b/GNUmakefile.in index 47a30ae1ff7f56de9e5b61f90350c0ff6bc0c9b1..fd5bbbd06305de7a5e5c7a220ab7b3fa31a0cd2a 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -102,11 +102,19 @@ install: @if test -d contrib/spq_plugin; then $(MAKE) -C contrib/spq_plugin $@; fi @if test -d contrib/dolphin; then $(MAKE) -C contrib/dolphin $@; fi @if test -d contrib/age; then $(MAKE) -C contrib/age $@; fi + @if test -d contrib/datavec; then $(MAKE) -C contrib/datavec clean; fi @if test -d contrib/datavec; then $(MAKE) -C contrib/datavec $@; fi @if test -d contrib/gms_stats; then $(MAKE) -C contrib/gms_stats $@; fi @if test -d contrib/gms_profiler; then $(MAKE) -C contrib/gms_profiler $@; fi @if test -d contrib/gms_output; then $(MAKE) -C contrib/gms_output $@; fi @if test -d contrib/timescaledb; then (./contrib/timescaledb/run_to_build.sh && $(MAKE) -C contrib/timescaledb/build $@); fi + @if test -d contrib/chparser; then \ + if command -v scws &> /dev/null; then \ + $(MAKE) -C contrib/chparser $@; \ + else \ + echo "SCWS is not installed, skipping chparser build."; \ + fi \ + fi +@echo "openGauss installation complete." endif endif diff --git a/build/script/aarch64_lite_list b/build/script/aarch64_lite_list index b4260130c817101e96e21f3b2b34c75cf3cf406d..72d7f646f58a847b50e519aa927fc1bda426f6e0 100644 --- a/build/script/aarch64_lite_list +++ b/build/script/aarch64_lite_list @@ -33,6 +33,8 @@ ./share/postgresql/extension/hstore.control ./share/postgresql/extension/security_plugin.control ./share/postgresql/extension/security_plugin--1.0.sql +./share/postgresql/extension/chparser.control +./share/postgresql/extension/chparser--1.0.sql ./share/postgresql/extension/dolphin.control ./share/postgresql/extension/dolphin--4.1.sql ./share/postgresql/extension/dolphin--1.0--1.1.sql @@ -744,6 +746,7 @@ ./lib/postgresql/pg_plugin ./lib/postgresql/proc_srclib ./lib/postgresql/security_plugin.so +./lib/postgresql/chparser.so ./lib/postgresql/dolphin.so ./lib/postgresql/pg_upgrade_support.so ./lib/postgresql/latin2_and_win1250.so diff --git a/build/script/aarch64_opengauss_list b/build/script/aarch64_opengauss_list index bda43613041c22eb7d23aa52ca99a51c864c0f6f..25f729e90a5cea5e797eafcba36090760c2025ba 100644 --- a/build/script/aarch64_opengauss_list +++ b/build/script/aarch64_opengauss_list @@ -99,6 +99,8 @@ ./share/postgresql/extension/age.control ./share/postgresql/extension/datavec--0.4.4.sql ./share/postgresql/extension/datavec.control +./share/postgresql/extension/chparser--1.0.sql +./share/postgresql/extension/chparser.control ./share/postgresql/extension/assessment--1.0.sql ./share/postgresql/extension/assessment.control ./share/postgresql/extension/file_fdw--1.0.sql @@ -737,6 +739,7 @@ ./share/postgresql/tsearch_data/swedish.stop ./share/postgresql/tsearch_data/ispell_sample.dict ./share/postgresql/tsearch_data/italian.stop +./share/postgresql/tsearch_data/dict.utf8.xdb ./share/postgresql/information_schema.sql ./share/postgresql/timezonesets/Antarctica.txt ./share/postgresql/timezonesets/Australia.txt @@ -816,6 +819,7 @@ ./lib/postgresql/dolphin.so ./lib/postgresql/age.so ./lib/postgresql/datavec.so +./lib/postgresql/chparser.so ./lib/postgresql/pg_upgrade_support.so ./lib/postgresql/java/pljava.jar ./lib/postgresql/postgres_fdw.so diff --git a/build/script/opengauss_release_list_ubuntu_single b/build/script/opengauss_release_list_ubuntu_single index 2cfa4b9522bc171c1ab6e5336f243e84ac02abab..96df59ece388a348b7abbbe5e3279fe004688f76 100644 --- a/build/script/opengauss_release_list_ubuntu_single +++ b/build/script/opengauss_release_list_ubuntu_single @@ -84,6 +84,8 @@ ./share/postgresql/extension/age.control ./share/postgresql/extension/datavec--0.4.4.sql ./share/postgresql/extension/datavec.control +./share/postgresql/extension/chparser--1.0.sql +./share/postgresql/extension/chparser.control ./share/postgresql/extension/file_fdw--1.0.sql ./share/postgresql/extension/plpgsql.control ./share/postgresql/extension/dist_fdw.control @@ -724,6 +726,7 @@ ./share/postgresql/tsearch_data/swedish.stop ./share/postgresql/tsearch_data/ispell_sample.dict ./share/postgresql/tsearch_data/italian.stop +./share/postgresql/tsearch_data/dict.utf8.xdb ./share/postgresql/information_schema.sql ./share/postgresql/timezonesets/Antarctica.txt ./share/postgresql/timezonesets/Australia.txt @@ -788,6 +791,7 @@ ./lib/postgresql/dolphin.so ./lib/postgresql/age.so ./lib/postgresql/datavec.so +./lib/postgresql/chparser.so ./lib/postgresql/pg_upgrade_support.so ./lib/postgresql/java/pljava.jar ./lib/postgresql/postgres_fdw.so diff --git a/build/script/x86_64_lite_list b/build/script/x86_64_lite_list index 4bc6148bc0d36649ee9e6bf9394a3e59de8c76a8..ee81412668a0c15ce2b3377c11869cb64fccab3f 100644 --- a/build/script/x86_64_lite_list +++ b/build/script/x86_64_lite_list @@ -33,6 +33,8 @@ ./share/postgresql/extension/hstore.control ./share/postgresql/extension/security_plugin.control ./share/postgresql/extension/security_plugin--1.0.sql +./share/postgresql/extension/chparser.control +./share/postgresql/extension/chparser--1.0.sql ./share/postgresql/extension/dolphin.control ./share/postgresql/extension/dolphin--4.1.sql ./share/postgresql/extension/dolphin--1.0--1.1.sql @@ -743,6 +745,7 @@ ./lib/postgresql/pg_plugin ./lib/postgresql/proc_srclib ./lib/postgresql/security_plugin.so +./lib/postgresql/chparser.so ./lib/postgresql/dolphin.so ./lib/postgresql/pg_upgrade_support.so ./lib/postgresql/latin2_and_win1250.so diff --git a/build/script/x86_64_opengauss_list b/build/script/x86_64_opengauss_list index 757ea56f3f8bfc3bad24156e7f3a0ef090c3edc7..3caff580b211ea8c0dfc7a3fba18a0e98ba35b8f 100644 --- a/build/script/x86_64_opengauss_list +++ b/build/script/x86_64_opengauss_list @@ -99,6 +99,8 @@ ./share/postgresql/extension/timescaledb--1.7.4.sql ./share/postgresql/extension/datavec--0.4.4.sql ./share/postgresql/extension/datavec.control +./share/postgresql/extension/chparser--1.0.sql +./share/postgresql/extension/chparser.control ./share/postgresql/extension/assessment--1.0.sql ./share/postgresql/extension/assessment.control ./share/postgresql/extension/file_fdw--1.0.sql @@ -737,6 +739,7 @@ ./share/postgresql/tsearch_data/swedish.stop ./share/postgresql/tsearch_data/ispell_sample.dict ./share/postgresql/tsearch_data/italian.stop +./share/postgresql/tsearch_data/dict.utf8.xdb ./share/postgresql/information_schema.sql ./share/postgresql/timezonesets/Antarctica.txt ./share/postgresql/timezonesets/Australia.txt @@ -816,6 +819,7 @@ ./lib/postgresql/dolphin.so ./lib/postgresql/age.so ./lib/postgresql/datavec.so +./lib/postgresql/chparser.so ./lib/postgresql/pg_upgrade_support.so ./lib/postgresql/java/pljava.jar ./lib/postgresql/postgres_fdw.so diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 7777f0625a5c0d7b5c10150a7360210be11069c6..ace80fcfe259768a1afecab9eca61a2fc424327c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -26,6 +26,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/ndpplugin ${CMAKE_CURRENT_SOURCE_DIR}/spq_plugin ${CMAKE_CURRENT_SOURCE_DIR}/datavec + ${CMAKE_CURRENT_SOURCE_DIR}/chparser ${CMAKE_CURRENT_SOURCE_DIR}/gms_stats ${CMAKE_CURRENT_SOURCE_DIR}/gms_profiler ) @@ -57,4 +58,7 @@ endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/datavec) add_subdirectory(datavec) endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/chparser) + add_subdirectory(chparser) +endif() add_subdirectory(gms_profiler) diff --git a/docker/dockerfiles/buildDockerImage.sh b/docker/dockerfiles/buildDockerImage.sh index 2dc05642d9f996025feba48fe53e0d2d003c10a8..5ea160c087879a5b7b5c2d80912d80388a7f12a5 100644 --- a/docker/dockerfiles/buildDockerImage.sh +++ b/docker/dockerfiles/buildDockerImage.sh @@ -91,6 +91,17 @@ MIN_DOCKER_VERSION_MAJOR="17" MIN_DOCKER_VERSION_MINOR="09" arch=$(case $(uname -m) in i386) echo "386" ;; i686) echo "386" ;; x86_64) echo "amd64";; aarch64)echo "arm64";; esac) file_arch="" +if [ "${arch}" = "amd64" ]; then + file_arch="x86_64" + if [ -f "/etc/openEuler-release" ];then + DOCKERFILE="dockerfile_x86" + else + DOCKERFILE="dockerfile_amd" + fi +else + file_arch="aarch64" + DOCKERFILE="dockerfile_arm" +fi if [ "$#" -eq 0 ]; then usage; diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index 157fdd424920423e1182c240ed3e4241bcc76d75..b72bf61c4075a6fd2cc8bb21e8d1e063efdc35fd 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -832,6 +832,8 @@ uwal_rpc_flowcontrol_value|int|8,2048|NULL|NULL| uwal_truncate_interval|int|0,7200|NULL|NULL| uwal_async_append_switch|bool|0,0|NULL|NULL| enable_gazelle_performance_mode|bool|0,0|NULL|NULL| +enable_pq|bool|0,0|NULL|NULL| +ivfpq_kreorder|int|0,2147483647|NULL|NULL| [cmserver] log_dir|string|0,0|NULL|NULL| log_file_size|int|0,2047|MB|NULL| diff --git a/src/common/backend/catalog/builtin_funcs.ini b/src/common/backend/catalog/builtin_funcs.ini index 4f538bb3055d666efe6d01e9a54b0ebdcae7c4cc..37bad0bc6a51cd0e3cb12547d6ea5d3b033f8c01 100644 --- a/src/common/backend/catalog/builtin_funcs.ini +++ b/src/common/backend/catalog/builtin_funcs.ini @@ -487,7 +487,7 @@ AddBuiltinFunc(_0(1504), _1("attach"), _2(2), _3(true), _4(true), _5(debug_client_attatch), _6(2249), _7(PG_PLDEBUG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(1), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 25, 23), _21(6, 25, 23, 26, 25, 23, 25), _22(6, 'i', 'i', 'o', 'o', 'o', 'o'), _23(6, "nodename", "port", "funcoid", "funcname", "lineno", "query"), _24(NULL), _25("debug_client_attatch"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), ), AddFuncGroup( - "avg", 8, + "avg", 9, AddBuiltinFunc(_0(2100), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 20), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2101), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2102), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 21), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), @@ -495,7 +495,8 @@ AddBuiltinFunc(_0(2104), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2105), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 701), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2106), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1186), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), - AddBuiltinFunc(_0(5537), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 5545), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + AddBuiltinFunc(_0(5537), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 5545), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8241), _1("avg"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("concatenate aggregate input into an array"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), AddFuncGroup( "backtrace", 1, @@ -11203,7 +11204,7 @@ AddFuncGroup( AddBuiltinFunc(_0(SUBSTRINNNOLENFUNCOID), _1("substring_inner"), _2(2), _3(true), _4(false), _5(text_substr_no_len), _6(25), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 25, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("text_substr_no_len"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("extract portion of string"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), AddFuncGroup( - "sum", 8, + "sum", 9, AddBuiltinFunc(_0(2107), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 20), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2108), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(20), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2109), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(20), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 21), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), @@ -11211,7 +11212,8 @@ AddFuncGroup( AddBuiltinFunc(_0(2111), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 701), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2112), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(790), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 790), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), AddBuiltinFunc(_0(2113), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1186), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1186), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), - AddBuiltinFunc(_0(2114), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + AddBuiltinFunc(_0(2114), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(1700), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1700), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8242), _1("sum"), _2(1), _3(false), _4(false), _5(aggregate_dummy), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(true), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("aggregate_dummy"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("the average (arithmetic mean) as numeric of all bigint values"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), AddFuncGroup( "suppress_redundant_updates_trigger", 1, @@ -13062,3 +13064,370 @@ AddFuncGroup( "gs_get_hba_conf", 1, AddBuiltinFunc(_0(2873), _1("gs_get_hba_conf"), _2(0), _3(true), _4(true), _5(gs_get_hba_conf), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(10), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(0), _21(5, 25, 25, 25, 25, 25), _22(5, 'o', 'o', 'o', 'o','o'), _23(5, "type", "database", "users", "address", "method"), _24(NULL), _25("gs_get_hba_conf"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("config: information about pg_hba conf file"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) ), + AddFuncGroup( + "hnswinsert", 1, + AddBuiltinFunc(_0(8401), _1("hnswinsert"), _2(6), _3(true), _4(false), _5(hnswinsert), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(6, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswinsert"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbeginscan", 1, + AddBuiltinFunc(_0(8402), _1("hnswbeginscan"), _2(3), _3(true), _4(false), _5(hnswbeginscan), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbeginscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswgettuple", 1, + AddBuiltinFunc(_0(8403), _1("hnswgettuple"), _2(2), _3(true), _4(false), _5(hnswgettuple), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswgettuple"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswrescan", 1, + AddBuiltinFunc(_0(8404), _1("hnswrescan"), _2(5), _3(true), _4(false), _5(hnswrescan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(5, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswrescan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswdelete", 1, + AddBuiltinFunc(_0(8429), _1("hnswdelete"), _2(5), _3(true), _4(false), _5(hnswdelete), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(5, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswdelete"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswendscan", 1, + AddBuiltinFunc(_0(8405), _1("hnswendscan"), _2(1), _3(true), _4(false), _5(hnswendscan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswendscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbuild", 1, + AddBuiltinFunc(_0(8406), _1("hnswbuild"), _2(3), _3(true), _4(false), _5(hnswbuild), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbuild"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbuildempty", 1, + AddBuiltinFunc(_0(8407), _1("hnswbuildempty"), _2(1), _3(true), _4(false), _5(hnswbuildempty), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbuildempty"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswbulkdelete", 1, + AddBuiltinFunc(_0(8408), _1("hnswbulkdelete"), _2(4), _3(true), _4(false), _5(hnswbulkdelete), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(4, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswbulkdelete"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswvacuumcleanup", 1, + AddBuiltinFunc(_0(8409), _1("hnswvacuumcleanup"), _2(2), _3(true), _4(false), _5(hnswvacuumcleanup), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswvacuumcleanup"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswcostestimate", 1, + AddBuiltinFunc(_0(8410), _1("hnswcostestimate"), _2(7), _3(true), _4(false), _5(hnswcostestimate), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(7, 2281, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswcostestimate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswoptions", 1, + AddBuiltinFunc(_0(8411), _1("hnswoptions"), _2(2), _3(true), _4(false), _5(hnswoptions), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswoptions"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatinsert", 1, + AddBuiltinFunc(_0(8412), _1("ivfflatinsert"), _2(6), _3(true), _4(false), _5(ivfflatinsert), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(6, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatinsert"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbeginscan", 1, + AddBuiltinFunc(_0(8413), _1("ivfflatbeginscan"), _2(3), _3(true), _4(false), _5(ivfflatbeginscan), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbeginscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatgettuple", 1, + AddBuiltinFunc(_0(8414), _1("ivfflatgettuple"), _2(2), _3(true), _4(false), _5(ivfflatgettuple), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatgettuple"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatrescan", 1, + AddBuiltinFunc(_0(8415), _1("ivfflatrescan"), _2(5), _3(true), _4(false), _5(ivfflatrescan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(5, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatrescan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatendscan", 1, + AddBuiltinFunc(_0(8416), _1("ivfflatendscan"), _2(1), _3(true), _4(false), _5(ivfflatendscan), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatendscan"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbuild", 1, + AddBuiltinFunc(_0(8417), _1("ivfflatbuild"), _2(3), _3(true), _4(false), _5(ivfflatbuild), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbuild"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbuildempty", 1, + AddBuiltinFunc(_0(8418), _1("ivfflatbuildempty"), _2(1), _3(true), _4(false), _5(ivfflatbuildempty), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbuildempty"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatbulkdelete", 1, + AddBuiltinFunc( _0(8419), _1("ivfflatbulkdelete"), _2(4), _3(true), _4(false), _5(ivfflatbulkdelete), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(4, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatbulkdelete"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatvacuumcleanup", 1, + AddBuiltinFunc(_0(8420), _1("ivfflatvacuumcleanup"), _2(2), _3(true), _4(false), _5(ivfflatvacuumcleanup), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatvacuumcleanup"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatcostestimate", 1, + AddBuiltinFunc(_0(8421), _1("ivfflatcostestimate"), _2(7), _3(true), _4(false), _5(ivfflatcostestimate), _6(2278), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(7, 2281, 2281, 2281, 2281, 2281, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatcostestimate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatoptions", 1, + AddBuiltinFunc(_0(8422), _1("ivfflatoptions"), _2(2), _3(true), _4(false), _5(ivfflatoptions), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 2281, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatoptions"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_in", 1, + AddBuiltinFunc(_0(8423), _1("vector_in"), _2(3), _3(true), _4(false), _5(vector_in), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2275, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_out", 1, + AddBuiltinFunc(_0(8424), _1("vector_out"), _2(1), _3(true), _4(false), _5(vector_out), _6(2275), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_out"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_typmod_in", 1, + AddBuiltinFunc(_0(8425), _1("vector_typmod_in"), _2(1), _3(true), _4(false), _5(vector_typmod_in), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1263), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_typmod_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false),_33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_recv", 1, + AddBuiltinFunc(_0(8426), _1("vector_recv"), _2(3), _3(true), _4(false), _5(vector_recv), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2281, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_recv"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_send", 1, + AddBuiltinFunc(_0(8427), _1("vector_send"), _2(1), _3(true), _4(false), _5(vector_send), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_send"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_dims", 1, + AddBuiltinFunc(_0(8428), _1("vector_dims"), _2(1), _3(true), _4(false), _5(vector_dims), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_dims"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + + AddFuncGroup( + "vector_l2_squared_distance", 1, + AddBuiltinFunc(_0(8431), _1("vector_l2_squared_distance"), _2(1), _3(true), _4(false), _5(vector_l2_squared_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_l2_squared_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_spherical_distance", 1, + AddBuiltinFunc(_0(8432), _1("vector_spherical_distance"), _2(1), _3(true), _4(false), _5(vector_spherical_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL),_25("vector_spherical_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_distance", 2, + AddBuiltinFunc(_0(8433), _1("l2_distance"), _2(1), _3(true), _4(false), _5(l2_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l2_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8465), _1("l2_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l2_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_negative_inner_product", 1, + AddBuiltinFunc(_0(8434), _1("vector_negative_inner_product"), _2(1), _3(true), _4(false), _5(vector_negative_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_negative_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "cosine_distance", 2, + AddBuiltinFunc(_0(8435), _1("cosine_distance"), _2(1), _3(true), _4(false), _5(cosine_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("cosine_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8466), _1("cosine_distance"), _2(1), _3(true), _4(false), _5(sparsevec_cosine_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_cosine_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l1_distance", 2, + AddBuiltinFunc(_0(8436), _1("l1_distance"), _2(1), _3(true), _4(false), _5(l1_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l1_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8467), _1("l1_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l1_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l1_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "inner_product", 2, + AddBuiltinFunc(_0(8437), _1("inner_product"), _2(1), _3(true), _4(false), _5(inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8471), _1("inner_product"), _2(1), _3(true), _4(false), _5(sparsevec_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_norm", 1, + AddBuiltinFunc(_0(8438), _1("vector_norm"), _2(1), _3(true), _4(false), _5(vector_norm), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_norm"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_add", 1, + AddBuiltinFunc(_0(8439), _1("vector_add"), _2(1), _3(true), _4(false), _5(vector_add), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_add"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_sub", 1, + AddBuiltinFunc(_0(8440), _1("vector_sub"), _2(1), _3(true), _4(false), _5(vector_sub), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_sub"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_lt", 1, + AddBuiltinFunc(_0(8441), _1("vector_lt"), _2(1), _3(true), _4(false), _5(vector_lt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_lt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_le", 1, + AddBuiltinFunc(_0(8442), _1("vector_le"), _2(1), _3(true), _4(false), _5(vector_le), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_le"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_eq", 1, + AddBuiltinFunc(_0(8443), _1("vector_eq"), _2(1), _3(true), _4(false), _5(vector_eq), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_eq"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_ne", 1, + AddBuiltinFunc(_0(8444), _1("vector_ne"), _2(1), _3(true), _4(false), _5(vector_ne), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_ne"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_ge", 1, + AddBuiltinFunc(_0(8445), _1("vector_ge"), _2(1), _3(true), _4(false), _5(vector_ge), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_ge"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_gt", 1, + AddBuiltinFunc(_0(8446), _1("vector_gt"), _2(1), _3(true), _4(false), _5(vector_gt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_gt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_accum", 1, + AddBuiltinFunc(_0(8447), _1("vector_accum"), _2(1), _3(true), _4(false), _5(vector_accum), _6(1022), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 1022, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_accum"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_combine", 1, + AddBuiltinFunc(_0(8448), _1("vector_combine"), _2(1), _3(true), _4(false), _5(vector_combine), _6(1022), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 1022, 1022), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_combine"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_avg", 1, + AddBuiltinFunc(_0(8449), _1("vector_avg"), _2(1), _3(true), _4(false), _5(vector_avg), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 1022), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_avg"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_cmp", 1, + AddBuiltinFunc(_0(8450), _1("vector_cmp"), _2(1), _3(true), _4(false), _5(vector_cmp), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_cmp"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_in", 1, + AddBuiltinFunc(_0(8458), _1("sparsevec_in"), _2(3), _3(true), _4(false), _5(sparsevec_in), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2275, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_out", 1, + AddBuiltinFunc(_0(8459), _1("sparsevec_out"), _2(1), _3(true), _4(false), _5(sparsevec_out), _6(2275), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_out"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_typmod_in", 1, + AddBuiltinFunc(_0(8460), _1("sparsevec_typmod_in"), _2(1), _3(true), _4(false), _5(sparsevec_typmod_in), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 1263), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_typmod_in"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false),_33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_recv", 1, + AddBuiltinFunc(_0(8461), _1("sparsevec_recv"), _2(3), _3(true), _4(false), _5(sparsevec_recv), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(3, 2281, 26, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_recv"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_send", 1, + AddBuiltinFunc(_0(8462), _1("sparsevec_send"), _2(1), _3(true), _4(false), _5(sparsevec_send), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_send"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("I/O"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_negative_inner_product", 1, + AddBuiltinFunc(_0(8463), _1("sparsevec_negative_inner_product"), _2(1), _3(true), _4(false), _5(sparsevec_negative_inner_product), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_negative_inner_product"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_cmp", 1, + AddBuiltinFunc(_0(8464), _1("sparsevec_cmp"), _2(1), _3(true), _4(false), _5(sparsevec_cmp), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_cmp"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + + AddFuncGroup( + "jaccard_distance", 1, + AddBuiltinFunc(_0(8468), _1("jaccard_distance"), _2(2), _3(true), _4(false), _5(jaccard_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 1560, 1560), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("jaccard_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hamming_distance", 1, + AddBuiltinFunc(_0(8469), _1("hamming_distance"), _2(2), _3(true), _4(false), _5(hamming_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 1560, 1560), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hamming_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_l2_squared_distance", 1, + AddBuiltinFunc(_0(8470), _1("sparsevec_l2_squared_distance"), _2(1), _3(true), _4(false), _5(sparsevec_l2_squared_distance), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_squared_distance"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_lt", 1, + AddBuiltinFunc(_0(8472), _1("sparsevec_lt"), _2(1), _3(true), _4(false), _5(sparsevec_lt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_lt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_le", 1, + AddBuiltinFunc(_0(8473), _1("sparsevec_le"), _2(1), _3(true), _4(false), _5(sparsevec_le), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_le"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_eq", 1, + AddBuiltinFunc(_0(8474), _1("sparsevec_eq"), _2(1), _3(true), _4(false), _5(sparsevec_eq), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_eq"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_ne", 1, + AddBuiltinFunc(_0(8475), _1("sparsevec_ne"), _2(1), _3(true), _4(false), _5(sparsevec_ne), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_ne"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_ge", 1, + AddBuiltinFunc(_0(8476), _1("sparsevec_ge"), _2(1), _3(true), _4(false), _5(sparsevec_ge), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_ge"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_gt", 1, + AddBuiltinFunc(_0(8477), _1("sparsevec_gt"), _2(1), _3(true), _4(false), _5(sparsevec_gt), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(2, 8307, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_gt"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + + AddFuncGroup( + "hnsw_sparsevec_support", 1, + AddBuiltinFunc(_0(8479), _1("hnsw_sparsevec_support"), _2(0), _3(true), _4(false), _5(hnsw_sparsevec_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnsw_sparsevec_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_norm", 1, + AddBuiltinFunc(_0(8478), _1("l2_norm"), _2(1), _3(true), _4(false), _5(sparsevec_l2_norm), _6(701), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_norm"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "l2_normalize", 2, + AddBuiltinFunc(_0(8200), _1("l2_normalize"), _2(1), _3(true), _4(false), _5(l2_normalize), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("l2_normalize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8211), _1("l2_normalize"), _2(1), _3(true), _4(false), _5(sparsevec_l2_normalize), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8307), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_l2_normalize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "binary_quantize", 1, + AddBuiltinFunc(_0(8201), _1("binary_quantize"), _2(1), _3(true), _4(false), _5(binary_quantize), _6(1562), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("binary_quantize"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + AddFuncGroup( + "subvector", 1, + AddBuiltinFunc(_0(8202), _1("subvector"), _2(1), _3(true), _4(false), _5(subvector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8305, 23, 23), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("subvector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + ), + AddFuncGroup( + "vector_mul", 1, + AddBuiltinFunc(_0(8203), _1("vector_mul"), _2(1), _3(true), _4(false), _5(vector_mul), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_mul"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_concat", 1, + AddBuiltinFunc(_0(8204), _1("vector_concat"), _2(1), _3(true), _4(false), _5(vector_concat), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('s'), _19(0), _20(2, 8305, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_concat"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflatvalidate", 1, + AddBuiltinFunc(_0(8205), _1("ivfflatvalidate"), _2(1), _3(true), _4(false), _5(ivfflatvalidate), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflatvalidate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflathandler", 1, + AddBuiltinFunc(_0(8206), _1("ivfflathandler"), _2(1), _3(true), _4(false), _5(ivfflathandler), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflathandler"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswvalidate", 1, + AddBuiltinFunc(_0(8207), _1("hnswvalidate"), _2(1), _3(true), _4(false), _5(hnswvalidate), _6(16), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswvalidate"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnswhandler", 1, + AddBuiltinFunc(_0(8208), _1("hnswhandler"), _2(1), _3(true), _4(false), _5(hnswhandler), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnswhandler"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "hnsw_bit_support", 1, + AddBuiltinFunc(_0(8209), _1("hnsw_bit_support"), _2(0), _3(true), _4(false), _5(hnsw_bit_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("hnsw_bit_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "ivfflat_bit_support", 1, + AddBuiltinFunc(_0(8210), _1("ivfflat_bit_support"), _2(0), _3(true), _4(false), _5(ivfflat_bit_support), _6(2281), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 2281), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("ivfflat_bit_support"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector", 1, + AddBuiltinFunc(_0(8214), _1("vector"), _2(3), _3(true), _4(false), _5(vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "array_to_vector", 4, + AddBuiltinFunc(_0(8215), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 1007, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8216), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 1021, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8217), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 1022, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)), + AddBuiltinFunc(_0(8218), _1("array_to_vector"), _2(3), _3(true), _4(false), _5(array_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 1231, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("array_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_float4", 1, + AddBuiltinFunc(_0(8219), _1("vector_to_float4"), _2(3), _3(true), _4(false), _5(vector_to_float4), _6(1021), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_float4"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_int4", 1, + AddBuiltinFunc(_0(8212), _1("vector_to_int4"), _2(1), _3(true), _4(false), _5(vector_to_int4), _6(1007), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_int4"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_float8", 1, + AddBuiltinFunc(_0(8213), _1("vector_to_float8"), _2(1), _3(true), _4(false), _5(vector_to_float8), _6(1022), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_float8"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_numeric", 1, + AddBuiltinFunc(_0(8221), _1("vector_to_numeric"), _2(1), _3(true), _4(false), _5(vector_to_numeric), _6(1231), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_numeric"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_text", 1, + AddBuiltinFunc(_0(8222), _1("vector_to_text"), _2(1), _3(true), _4(false), _5(vector_to_text), _6(1009), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_text"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_varchar", 1, + AddBuiltinFunc(_0(8223), _1("vector_to_varchar"), _2(1), _3(true), _4(false), _5(vector_to_varchar), _6(1015), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(1, 8305), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_varchar"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec", 1, + AddBuiltinFunc(_0(8228), _1("sparsevec"), _2(3), _3(true), _4(false), _5(sparsevec), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8307, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "vector_to_sparsevec", 1, + AddBuiltinFunc(_0(8229), _1("vector_to_sparsevec"), _2(3), _3(true), _4(false), _5(vector_to_sparsevec), _6(8307), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8305, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("vector_to_sparsevec"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), + AddFuncGroup( + "sparsevec_to_vector", 1, + AddBuiltinFunc(_0(8230), _1("sparsevec_to_vector"), _2(3), _3(true), _4(false), _5(sparsevec_to_vector), _6(8305), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(3, 8307, 23, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("sparsevec_to_vector"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(false), _32(false), _33("NULL"), _34('f'), _35(NULL), _36(0), _37(false), _38(NULL), _39(NULL), _40(0)) + ), diff --git a/src/common/backend/catalog/index.cpp b/src/common/backend/catalog/index.cpp index e14d9887b3b1f94fcaf006bf831cbccdc34f80f5..fc9d4497735af44d035621e731026207ef36addc 100644 --- a/src/common/backend/catalog/index.cpp +++ b/src/common/backend/catalog/index.cpp @@ -416,7 +416,7 @@ static TupleDesc ConstructTupleDescriptor(Relation heapRelation, IndexInfo* inde to->attalign = typeTup->typalign; to->attstattarget = -1; to->attcacheoff = -1; - to->atttypmod = -1; + to->atttypmod = exprTypmod(indexkey); to->attislocal = true; to->attcollation = (i < numkeyatts) ? collationObjectId[i] : InvalidOid; @@ -3816,7 +3816,7 @@ static TransactionId GetCatalogOldestXmin(Relation heapRelation) * chain tip. */ double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo* indexInfo, bool allow_sync, - IndexBuildCallback callback, void* callbackState, TableScanDesc scan) + IndexBuildCallback callback, void* callbackState, TableScanDesc scan, BlockNumber startBlkno, BlockNumber numblocks) { bool is_system_catalog = false; bool checking_uniqueness = false; @@ -3904,6 +3904,22 @@ double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexIn snapshot = SnapshotAny; OldestXmin = GetOldestXmin(heapRelation); } + + /* set our scan endpoints */ + if (!allow_sync && BlockNumberIsValid(numblocks)) { + Assert(!scan->rs_inited); + Assert(!(scan->rs_flags & SO_ALLOW_SYNC)); + Assert(startBlkno == 0 || startBlkno < scan->rs_nblocks); + + scan->rs_rangeScanInRedis.isRangeScanInRedis = true; + scan->rs_startblock = startBlkno; + scan->rs_nblocks = numblocks; + } else { + /* synscan can only be requested on whole relation */ + Assert(startBlkno == 0); + Assert(numblocks == InvalidBlockNumber); + } + reltuples = 0; /* diff --git a/src/common/backend/nodes/copyfuncs.cpp b/src/common/backend/nodes/copyfuncs.cpp index d05b4de95359ca057ac1dc09ebf624e664df79c2..749d13457fc564721d9e34b9b5d0eeb94e612a22 100644 --- a/src/common/backend/nodes/copyfuncs.cpp +++ b/src/common/backend/nodes/copyfuncs.cpp @@ -896,6 +896,34 @@ static CStoreIndexHeapScan* _copyCStoreIndexHeapScan(const CStoreIndexHeapScan* return newnode; } +/* + * _copyAnnIndexScan + */ +static AnnIndexScan* _copyAnnIndexScan(const AnnIndexScan* from) +{ + AnnIndexScan* newnode = makeNode(AnnIndexScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan*)from, (Scan*)newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(indexid); + COPY_NODE_FIELD(indexqual); + COPY_NODE_FIELD(indexqualorig); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indexorderbyorig); + COPY_SCALAR_FIELD(indexorderdir); + COPY_SCALAR_FIELD(is_ustore); + COPY_SCALAR_FIELD(selectivity); + COPY_SCALAR_FIELD(is_partial); + + return newnode; +} + /* * _copyTidScan */ @@ -7459,6 +7487,7 @@ static IndexOptInfo *_copyPartialIndexOptInfo(const IndexOptInfo *from) COPY_NODE_FIELD(indextlist); COPY_SCALAR_FIELD(isGlobal); + COPY_SCALAR_FIELD(isAnnIndex); COPY_SCALAR_FIELD(crossbucket); COPY_SCALAR_FIELD(predOK); COPY_SCALAR_FIELD(unique); @@ -8005,6 +8034,9 @@ void* copyObject(const void* from) case T_BitmapHeapScan: retval = _copyBitmapHeapScan((BitmapHeapScan*)from); break; + case T_AnnIndexScan: + retval = _copyAnnIndexScan((AnnIndexScan*)from); + break; case T_TidScan: retval = _copyTidScan((TidScan*)from); break; diff --git a/src/common/backend/nodes/nodes.cpp b/src/common/backend/nodes/nodes.cpp index 538b1887f6337eac2be0188f10466ca593b6f189..0214991157be6aa198675ba672ad254a85fe4569 100755 --- a/src/common/backend/nodes/nodes.cpp +++ b/src/common/backend/nodes/nodes.cpp @@ -60,6 +60,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"}, {T_IndexOnlyScan, "IndexOnlyScan"}, {T_BitmapIndexScan, "BitmapIndexScan"}, {T_BitmapHeapScan, "BitmapHeapScan"}, + {T_AnnIndexScan, "AnnIndexScan"}, {T_TidScan, "TidScan"}, {T_SubqueryScan, "SubqueryScan"}, {T_FunctionScan, "FunctionScan"}, @@ -134,6 +135,7 @@ static const TagStr g_tagStrArr[] = {{T_Invalid, "Invalid"}, {T_IndexOnlyScanState, "IndexOnlyScanState"}, {T_BitmapIndexScanState, "BitmapIndexScanState"}, {T_BitmapHeapScanState, "BitmapHeapScanState"}, + {T_AnnIndexScanState, "AnnIndexScanState"}, {T_TidScanState, "TidScanState"}, {T_SubqueryScanState, "SubqueryScanState"}, {T_FunctionScanState, "FunctionScanState"}, diff --git a/src/common/backend/nodes/outfuncs.cpp b/src/common/backend/nodes/outfuncs.cpp index c0f30f9d3be94872f794e329276fbc560a96cfb7..bc84c56d4b7dd2ecb4dd4aee213f922d9abbcb71 100755 --- a/src/common/backend/nodes/outfuncs.cpp +++ b/src/common/backend/nodes/outfuncs.cpp @@ -855,6 +855,20 @@ static void _outBitmapHeapScanInfo(StringInfo str, BitmapHeapScan* node) WRITE_NODE_FIELD(bitmapqualorig); } +static void _outAnnIndexScanInfo(StringInfo str, AnnIndexScan* node) +{ + _outCommonIndexScanPart(str, node); + if (t_thrd.proc->workingVersionNum >= INPLACE_UPDATE_VERSION_NUM) { + WRITE_BOOL_FIELD(is_ustore); + } + if (t_thrd.proc->workingVersionNum >= PLAN_SELECT_VERSION_NUM) { + if (u_sess->opt_cxt.out_plan_stat) { + WRITE_FLOAT_FIELD(selectivity, "%.4f"); + } + WRITE_BOOL_FIELD(is_partial); + } +} + /* * print the basic stuff of all nodes that inherit from Join */ @@ -1192,6 +1206,12 @@ static void _outCStoreIndexScan(StringInfo str, CStoreIndexScan* node) WRITE_BOOL_FIELD(indexonly); } +static void _outAnnIndexScan(StringInfo str, AnnIndexScan* node) +{ + WRITE_NODE_TYPE("ANNINDEXSCAN"); + _outAnnIndexScanInfo(str, node); +} + static void _outStream(StringInfo str, Stream* node) { WRITE_NODE_TYPE("STREAM"); @@ -3323,6 +3343,12 @@ static void _outIndexPath(StringInfo str, IndexPath* node) WRITE_ENUM_FIELD(indexscandir, ScanDirection); WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); WRITE_FLOAT_FIELD(indexselectivity, "%.4f"); + WRITE_BOOL_FIELD(isAnnIndex); + WRITE_NODE_FIELD(annQuals); + WRITE_NODE_FIELD(annQualCols); + WRITE_FLOAT_FIELD(annQualTotalCost, "%.2f"); + WRITE_FLOAT_FIELD(annQualSelectivity, "%.4f"); + WRITE_FLOAT_FIELD(allcost, "%.2f"); if (t_thrd.proc->workingVersionNum >= INPLACE_UPDATE_VERSION_NUM) { WRITE_BOOL_FIELD(is_ustore); } @@ -6530,6 +6556,9 @@ static void _outNode(StringInfo str, const void* obj) case T_BitmapHeapScan: _outBitmapHeapScan(str, (BitmapHeapScan*)obj); break; + case T_AnnIndexScan: + _outAnnIndexScan(str, (AnnIndexScan*)obj); + break; case T_CStoreIndexCtidScan: _outCStoreIndexCtidScan(str, (CStoreIndexCtidScan*)obj); break; diff --git a/src/common/backend/nodes/readfuncs.cpp b/src/common/backend/nodes/readfuncs.cpp index 4b1844e28b8a77299b7acb6ab40638bea3084a52..885155469bb98a998dbea60eeb5a30e33726ad61 100755 --- a/src/common/backend/nodes/readfuncs.cpp +++ b/src/common/backend/nodes/readfuncs.cpp @@ -4153,6 +4153,54 @@ static CStoreIndexScan* _readCStoreIndexScan(CStoreIndexScan* local_node) READ_DONE(); } +static AnnIndexScan* _readAnnIndexScan(AnnIndexScan* local_node) +{ + READ_LOCALS_NULL(AnnIndexScan); + READ_TEMP_LOCALS(); + + // Read Scan + _readScan(&local_node->scan); + + READ_OID_FIELD(indexid); +#ifdef STREAMPLAN + // Note: The Oid shipped(in plan) is invalid here + // We need to get the Oid on this node + if (local_node->indexid >= FirstBootstrapObjectId) { + IF_EXIST(indexname) { + char *indexname, *indexnamespace; + + token = pg_strtok(&length); + token = pg_strtok(&length); + indexname = nullable_string(token, length); + token = pg_strtok(&length); + token = pg_strtok(&length); + indexnamespace = nullable_string(token, length); + if (!IS_PGXC_COORDINATOR) + local_node->indexid = get_valid_relname_relid(indexnamespace, indexname); + + pfree_ext(indexname); + pfree_ext(indexnamespace); + } + } +#endif // STREAMPLAN + + READ_NODE_FIELD(indexqual); + READ_NODE_FIELD(indexqualorig); + READ_NODE_FIELD(indexorderby); + READ_NODE_FIELD(indexorderbyorig); + READ_ENUM_FIELD(indexorderdir, ScanDirection); + IF_EXIST(is_ustore) { + READ_BOOL_FIELD(is_ustore); + } + IF_EXIST(selectivity) { + READ_FLOAT_FIELD(selectivity); + } + IF_EXIST(is_partial) { + READ_BOOL_FIELD(is_partial); + } + READ_DONE(); +} + static Sort* _readSort(Sort* local_node) { READ_LOCALS_NULL(Sort); @@ -6795,7 +6843,9 @@ Node* parseNodeString(void) return_value = _readSubqueryScan(NULL); } else if (MATCH("INDEXSCAN", 9)) { return_value = _readIndexScan(NULL); - } else if (MATCH("JOIN", 4)) { + } else if (MATCH("ANNINDEXSCAN", 12)) { + return_value = _readAnnIndexScan(NULL); + } else if (MATCH("JOIN", 4)) { return_value = _readJoin(NULL); } else if (MATCH("HASH", 4)) { return_value = _readHash(NULL); diff --git a/src/common/backend/parser/gram.y b/src/common/backend/parser/gram.y index 55dcdab2195b1cb417b7edabf07376ab18139f0f..259403a5b378d2fe4ab1f50df0c3c1a6209863d5 100644 --- a/src/common/backend/parser/gram.y +++ b/src/common/backend/parser/gram.y @@ -22967,14 +22967,6 @@ InsertStmt: opt_with_clause INSERT hint_string INTO insert_target insert_rest re } | opt_with_clause INSERT hint_string INTO insert_target insert_rest upsert_clause returning_clause { - if ($8 != NIL) { - const char* message = "RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement."; - InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); - ereport(errstate, - (errmodule(MOD_PARSER), - errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement."))); - } if ($1 != NULL) { const char* message = "WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement."; InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); diff --git a/src/common/backend/utils/adt/CMakeLists.txt b/src/common/backend/utils/adt/CMakeLists.txt index b33909070e36b7596f102993986a4425d79055a6..5a8b7baa95c162f28f2d5d1d7ff1d47d2e0fd02b 100755 --- a/src/common/backend/utils/adt/CMakeLists.txt +++ b/src/common/backend/utils/adt/CMakeLists.txt @@ -13,6 +13,20 @@ set(TGT_adt_INC ${LIBOPENSSL_INCLUDE_PATH} ) +if ((APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "arm") OR CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64*") + set(datavec_OPTFLAGS "") +else () + set(datavec_OPTFLAGS "-march=native") +endif () + +set(datavec_OPTFLAGS ${datavec_OPTFLAGS} -ftree-vectorize -fassociative-math -fno-signed-zeros -fno-trapping-math) + +set_source_files_properties( + SOURCE vector.cpp f2s.cpp halfvec.cpp halfutils.cpp sparsevec.cpp bitvec.cpp + APPEND + PROPERTIES COMPILE_OPTIONS "${datavec_OPTFLAGS}" +) + set(adt_DEF_OPTIONS ${MACRO_OPTIONS}) set(adt_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) set(adt_LINK_OPTIONS ${BIN_LINK_OPTIONS}) diff --git a/src/common/backend/utils/adt/Makefile b/src/common/backend/utils/adt/Makefile index a9bd1b0cd56ba75e8ac10eda0e6d76e0acacab52..c95946451087e3378d11a6d2e812327d5d3bca6d 100644 --- a/src/common/backend/utils/adt/Makefile +++ b/src/common/backend/utils/adt/Makefile @@ -39,7 +39,26 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsvector.o tsvector_op.o tsvector_parser.o \ txid.o uuid.o windowfuncs.o xml.o extended_statistics.o clientlogic_bytea.o clientlogicsettings.o \ - median_aggs.o expr_distinct.o nlssort.o memory_func.o first_last_agg.o encrypt_decrypt.o expandeddatum.o + median_aggs.o expr_distinct.o nlssort.o memory_func.o first_last_agg.o encrypt_decrypt.o expandeddatum.o \ + bitvec.o f2s.o halfutils.o halfvec.o sparsevec.o vector.o + +VECTOR_OPT = -march=native + +ifeq ($(shell uname -s), Darwin) + ifeq ($(shell uname -p), arm) + VECTOR_OPT = + endif +endif + +ifneq ($(filter ppc64%, $(shell uname -m)), ) + VECTOR_OPT = +endif + +VECTOR_CFLAGS += $(VECTOR_OPT) -ftree-vectorize -fassociative-math -fno-signed-zeros -fno-trapping-math + +VECTOR_SRC = bitvec.o f2s.o halfutils.o halfvec.o sparsevec.o vector.o + +$(VECTOR_SRC): CPPFLAGS +=$(filter-out -fstack-protector, $(CFLAGS)) -fstack-protector-all -Wl,-z,relro,-z,now -fPIC $(VECTOR_CFLAGS) like.o: like.cpp like_match.cpp diff --git a/src/common/backend/utils/adt/bitvec.cpp b/src/common/backend/utils/adt/bitvec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71f2cae79aab44726f978ddec361aabba0cab689 --- /dev/null +++ b/src/common/backend/utils/adt/bitvec.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * bitvec.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/bitvec.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/datavec/bitvec.h" +#include "utils/varbit.h" + +uint64 (*BitHammingDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance); +double (*BitJaccardDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, uint64 aa, uint64 bb); + +/* + * Allocate and initialize a new bit vector + */ +VarBit *InitBitVector(int dim) +{ + VarBit *result; + int size; + + size = VARBITTOTALLEN(dim); + result = (VarBit *)palloc0(size); + SET_VARSIZE(result, size); + VARBITLEN(result) = dim; + + return result; +} + +/* + * Ensure same dimensions + */ +static inline void CheckDims(VarBit *a, VarBit *b) +{ + if (VARBITLEN(a) != VARBITLEN(b)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("different bit lengths %u and %u", VARBITLEN(a), VARBITLEN(b)))); +} + +/* + * Get the Hamming distance between two bit vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance); +Datum hamming_distance(PG_FUNCTION_ARGS) +{ + VarBit *a = PG_GETARG_VARBIT_P(0); + VarBit *b = PG_GETARG_VARBIT_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)BitHammingDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0)); +} + +/* + * Get the Jaccard distance between two bit vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(jaccard_distance); +Datum jaccard_distance(PG_FUNCTION_ARGS) +{ + VarBit *a = PG_GETARG_VARBIT_P(0); + VarBit *b = PG_GETARG_VARBIT_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(BitJaccardDistance(VARBITBYTES(a), VARBITS(a), VARBITS(b), 0, 0, 0)); +} diff --git a/src/common/backend/utils/adt/f2s.cpp b/src/common/backend/utils/adt/f2s.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7fc9677b0f8fa2c084e62aba1680b0c4a76d43aa --- /dev/null +++ b/src/common/backend/utils/adt/f2s.cpp @@ -0,0 +1,721 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * f2s.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/f2s.cpp + * + * ------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include "access/datavec/shortest_dec.h" + +#include "access/datavec/ryu_common.h" + +#define FLOAT_MANTISSA_BITS 23 +#define FLOAT_EXPONENT_BITS 8 +#define FLOAT_BIAS 127 + +/* + * This table is generated (by the upstream) by PrintFloatLookupTable, + * and modified (by us) to add UINT64CONST. + */ +#define FLOAT_POW5_INV_BITCOUNT 59 +static const uint64 FLOAT_POW5_INV_SPLIT[31] = { + UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), + UINT64CONST(295147905179352826), UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), + UINT64CONST(302231454903657294), UINT64CONST(483570327845851670), UINT64CONST(386856262276681336), + UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688), + UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), + UINT64CONST(324518553658426727), UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), + UINT64CONST(332306998946228969), UINT64CONST(531691198313966350), UINT64CONST(425352958651173080), + UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234), + UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), + UINT64CONST(356811923176489971), UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), + UINT64CONST(365375409332725730)}; +#define FLOAT_POW5_BITCOUNT 61 +static const uint64 FLOAT_POW5_SPLIT[47] = { + UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), + UINT64CONST(2251799813685248000), UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), + UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000), UINT64CONST(1717986918400000000), + UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000), + UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), + UINT64CONST(2048000000000000000), UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), + UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000), UINT64CONST(1562500000000000000), + UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000), + UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), + UINT64CONST(1862645149230957031), UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), + UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594), UINT64CONST(1421085471520200371), + UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675), + UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), + UINT64CONST(1694065894508600678), UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), + UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187), UINT64CONST(1292469707114105741), + UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)}; + +static inline uint32 pow5Factor(uint32 value) +{ + uint32 count = 0; + + for (;;) { + Assert(value != 0); + const uint32 q = value / 5; + const uint32 r = value % 5; + + if (r != 0) { + break; + } + + value = q; + ++count; + } + return count; +} + +/* Returns true if value is divisible by 5^p. */ +static inline bool multipleOfPowerOf5(const uint32 value, const uint32 p) +{ + return pow5Factor(value) >= p; +} + +/* Returns true if value is divisible by 2^p. */ +static inline bool multipleOfPowerOf2(const uint32 value, const uint32 p) +{ + return (value & ((1u << p) - 1)) == 0; +} + +/* + * It seems to be slightly faster to avoid uint128_t here, although the + * generated code for uint128_t looks slightly nicer. + */ +static inline uint32 mulShift(const uint32 m, const uint64 factor, const int32 shift) +{ + /* + * The casts here help MSVC to avoid calls to the __allmul library + * function. + */ + const uint32 factorLo = (uint32)(factor); + const uint32 factorHi = (uint32)(factor >> 32); + const uint64 bits0 = (uint64)m * factorLo; + const uint64 bits1 = (uint64)m * factorHi; + + Assert(shift > 32); + +#ifdef RYU_32_BIT_PLATFORM + + /* + * On 32-bit platforms we can avoid a 64-bit shift-right since we only + * need the upper 32 bits of the result and the shift value is > 32. + */ + const uint32 bits0Hi = (uint32)(bits0 >> 32); + uint32 bits1Lo = (uint32)(bits1); + uint32 bits1Hi = (uint32)(bits1 >> 32); + + bits1Lo += bits0Hi; + bits1Hi += (bits1Lo < bits0Hi); + + const int32 s = shift - 32; + + return (bits1Hi << (32 - s)) | (bits1Lo >> s); + +#else /* RYU_32_BIT_PLATFORM */ + + const uint64 sum = (bits0 >> 32) + bits1; + const uint64 shiftedSum = sum >> (shift - 32); + + Assert(shiftedSum <= UINT32_MAX); + return (uint32)shiftedSum; + +#endif /* RYU_32_BIT_PLATFORM */ +} + +static inline uint32 mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j) +{ + return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j); +} + +static inline uint32 mulPow5divPow2(const uint32 m, const uint32 i, const int32 j) +{ + return mulShift(m, FLOAT_POW5_SPLIT[i], j); +} + +static inline uint32 decimalLength(const uint32 v) +{ + /* Function precondition: v is not a 10-digit number. */ + /* (9 digits are sufficient for round-tripping.) */ + Assert(v < 1000000000); + if (v >= 100000000) { + return 9; + } + if (v >= 10000000) { + return 8; + } + if (v >= 1000000) { + return 7; + } + if (v >= 100000) { + return 6; + } + if (v >= 10000) { + return 5; + } + if (v >= 1000) { + return 4; + } + if (v >= 100) { + return 3; + } + if (v >= 10) { + return 2; + } + return 1; +} + +/* A floating decimal representing m * 10^e. */ +typedef struct floating_decimal_32 { + uint32 mantissa; + int32 exponent; +} floating_decimal_32; + +static inline floating_decimal_32 f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent) +{ + int32 e2; + uint32 m2; + + if (ieeeExponent == 0) { + /* We subtract 2 so that the bounds computation has 2 additional bits. */ + e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = ieeeMantissa; + } else { + e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; + m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; + } + +#if STRICTLY_SHORTEST + const bool even = (m2 & 1) == 0; + const bool acceptBounds = even; +#else + const bool acceptBounds = false; +#endif + + /* Step 2: Determine the interval of legal decimal representations. */ + const uint32 mv = 4 * m2; + const uint32 mp = 4 * m2 + 2; + + /* Implicit bool -> int conversion. True is 1, false is 0. */ + const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; + const uint32 mm = 4 * m2 - 1 - mmShift; + + /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */ + uint32 vr, vp, vm; + int32 e10; + bool vmIsTrailingZeros = false; + bool vrIsTrailingZeros = false; + uint8 lastRemovedDigit = 0; + + if (e2 >= 0) { + const uint32 q = log10Pow2(e2); + + e10 = q; + + const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1; + const int32 i = -e2 + q + k; + + vr = mulPow5InvDivPow2(mv, q, i); + vp = mulPow5InvDivPow2(mp, q, i); + vm = mulPow5InvDivPow2(mm, q, i); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + /* + * We need to know one removed digit even if we are not going to + * loop below. We could use q = X - 1 above, except that would + * require 33 bits for the result, and we've found that 32-bit + * arithmetic is faster even on 64-bit machines. + */ + const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1; + + lastRemovedDigit = (uint8)(mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10); + } + if (q <= 9) { + /* + * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 + * seems to be safe as well. + * + * Only one of mp, mv, and mm can be a multiple of 5, if any. + */ + if (mv % 5 == 0) { + vrIsTrailingZeros = multipleOfPowerOf5(mv, q); + } else if (acceptBounds) { + vmIsTrailingZeros = multipleOfPowerOf5(mm, q); + } else { + vp -= multipleOfPowerOf5(mp, q); + } + } + } else { + const uint32 q = log10Pow5(-e2); + + e10 = q + e2; + + const int32 i = -e2 - q; + const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT; + int32 j = q - k; + + vr = mulPow5divPow2(mv, i, j); + vp = mulPow5divPow2(mp, i, j); + vm = mulPow5divPow2(mm, i, j); + if (q != 0 && (vp - 1) / 10 <= vm / 10) { + j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); + lastRemovedDigit = (uint8)(mulPow5divPow2(mv, i + 1, j) % 10); + } + if (q <= 1) { + /* + * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q + * trailing 0 bits. + */ + /* mv = 4 * m2, so it always has at least two trailing 0 bits. */ + vrIsTrailingZeros = true; + if (acceptBounds) { + /* + * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff + * mmShift == 1. + */ + vmIsTrailingZeros = mmShift == 1; + } else { + /* + * mp = mv + 2, so it always has at least one trailing 0 bit. + */ + --vp; + } + } else if (q < 31) { + vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1); + } + } + + /* + * Step 4: Find the shortest decimal representation in the interval of + * legal representations. + */ + uint32 removed = 0; + uint32 output; + + if (vmIsTrailingZeros || vrIsTrailingZeros) { + /* General case, which happens rarely (~4.0%). */ + while (vp / 10 > vm / 10) { + vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + if (vmIsTrailingZeros) { + while (vm % 10 == 0) { + vrIsTrailingZeros &= lastRemovedDigit == 0; + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + } + + if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) { + /* Round even if the exact number is .....50..0. */ + lastRemovedDigit = 4; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); + } else { + /* + * Specialized for the common case (~96.0%). Percentages below are + * relative to this. + * + * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2: + * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% + */ + while (vp / 10 > vm / 10) { + lastRemovedDigit = (uint8)(vr % 10); + vr /= 10; + vp /= 10; + vm /= 10; + ++removed; + } + + /* + * We need to take vr + 1 if vr is outside bounds or we need to round + * up. + */ + output = vr + (vr == vm || lastRemovedDigit >= 5); + } + + const int32 exp = e10 + removed; + + floating_decimal_32 fd; + + fd.exponent = exp; + fd.mantissa = output; + return fd; +} + +static inline int to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + int32 exp = v.exponent; + errno_t rc = EOK; + + /*---- + * On entry, mantissa * 10^exp is the result to be output. + * Caller has already done the - sign if needed. + * + * We want to insert the point somewhere depending on the output length + * and exponent, which might mean adding zeros: + * + * exp | format + * 1+ | ddddddddd000000 + * 0 | ddddddddd + * -1 .. -len+1 | dddddddd.d to d.ddddddddd + * -len ... | 0.ddddddddd to 0.000dddddd + */ + uint32 i = 0; + int32 nexp = exp + olength; + + if (nexp <= 0) { + /* -nexp is number of 0s to add after '.' */ + Assert(nexp >= -3); + /* 0.000ddddd */ + index = 2 - nexp; + /* copy 8 bytes rather than 5 to let compiler optimize */ + rc = memcpy_s(result, 8, "0.000000", 8); + securec_check(rc, "\0", "\0"); + } else if (exp < 0) { + /* + * dddd.dddd; leave space at the start and move the '.' in after + */ + index = 1; + } else { + /* + * We can save some code later by pre-filling with zeros. We know + * that there can be no more than 6 output digits in this form, + * otherwise we would not choose fixed-point output. memset 8 + * rather than 6 bytes to let the compiler optimize it. + */ + Assert(exp < 6 && exp + olength <= 6); + rc = memset_s(result, 8, '0', 8); + securec_check(rc, "\0", "\0"); + } + + while (output >= 10000) { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c0, 2); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(result + index + olength - i - 4, 2, DIGIT_TABLE + c1, 2); + securec_check(rc, "\0", "\0"); + i += 4; + } + if (output >= 100) { + const uint32 c = (output % 100) << 1; + + output /= 100; + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + i += 2; + } + if (output >= 10) { + const uint32 c = output << 1; + + rc = memcpy_s(result + index + olength - i - 2, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + } else { + result[index] = (char)('0' + output); + } + + if (index == 1) { + /* + * nexp is 1..6 here, representing the number of digits before the + * point. A value of 7+ is not possible because we switch to + * scientific notation when the display exponent reaches 6. + */ + Assert(nexp < 7); + /* gcc only seems to want to optimize memmove for small 2^n */ + if (nexp & 4) { + rc = memmove_s(result + index - 1, 4, result + index, 4); + securec_check(rc, "\0", "\0"); + index += 4; + } + if (nexp & 2) { + rc = memmove_s(result + index - 1, 2, result + index, 2); + securec_check(rc, "\0", "\0"); + index += 2; + } + if (nexp & 1) { + result[index - 1] = result[index]; + } + result[nexp] = '.'; + index = olength + 1; + } else if (exp >= 0) { + /* we supplied the trailing zeros earlier, now just set the length. */ + index = olength + exp; + } else { + index = olength + (2 - nexp); + } + + return index; +} + +static inline int ToChars(const floating_decimal_32 v, const bool sign, char *const result) +{ + /* Step 5: Print the decimal representation. */ + int index = 0; + + uint32 output = v.mantissa; + uint32 olength = decimalLength(output); + int32 exp = v.exponent + olength - 1; + errno_t rc = EOK; + + if (sign) { + result[index++] = '-'; + } + + /* + * The thresholds for fixed-point output are chosen to match printf + * defaults. Beware that both the code of to_chars_f and the value + * of FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds. + */ + if (exp >= -4 && exp < 6) { + return to_chars_f(v, olength, result + index) + sign; + } + + /* + * If v.exponent is exactly 0, we might have reached here via the small + * integer fast path, in which case v.mantissa might contain trailing + * (decimal) zeros. For scientific notation we need to move these zeros + * into the exponent. (For fixed point this doesn't matter, which is why + * we do this here rather than above.) + * + * Since we already calculated the display exponent (exp) above based on + * the old decimal length, that value does not change here. Instead, we + * just reduce the display length for each digit removed. + * + * If we didn't get here via the fast path, the raw exponent will not + * usually be 0, and there will be no trailing zeros, so we pay no more + * than one div10/multiply extra cost. We claw back half of that by + * checking for divisibility by 2 before dividing by 10. + */ + if (v.exponent == 0) { + while ((output & 1) == 0) { + const uint32 q = output / 10; + const uint32 r = output - 10 * q; + + if (r != 0) { + break; + } + output = q; + --olength; + } + } + + /*---- + * Print the decimal digits. + * The following code is equivalent to: + * + * for (uint32 i = 0; i < olength - 1; ++i) { + * const uint32 c = output % 10; output /= 10; + * result[index + olength - i] = (char) ('0' + c); + * } + * result[index] = '0' + output % 10; + */ + uint32 i = 0; + + while (output >= 10000) { + const uint32 c = output - 10000 * (output / 10000); + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + output /= 10000; + + rc = memcpy_s(result + index + olength - i - 1, 2, DIGIT_TABLE + c0, 2); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(result + index + olength - i - 3, 2, DIGIT_TABLE + c1, 2); + securec_check(rc, "\0", "\0"); + i += 4; + } + if (output >= 100) { + const uint32 c = (output % 100) << 1; + + output /= 100; + rc = memcpy_s(result + index + olength - i - 1, 2, DIGIT_TABLE + c, 2); + securec_check(rc, "\0", "\0"); + i += 2; + } + if (output >= 10) { + const uint32 c = output << 1; + + /* + * We can't use memcpy here: the decimal dot goes between these two + * digits. + */ + result[index + olength - i] = DIGIT_TABLE[c + 1]; + result[index] = DIGIT_TABLE[c]; + } else { + result[index] = (char)('0' + output); + } + + /* Print decimal point if needed. */ + if (olength > 1) { + result[index + 1] = '.'; + index += olength + 1; + } else { + ++index; + } + + /* Print the exponent. */ + result[index++] = 'e'; + if (exp < 0) { + result[index++] = '-'; + exp = -exp; + } else { + result[index++] = '+'; + } + + rc = memcpy_s(result + index, 2, DIGIT_TABLE + 2 * exp, 2); + securec_check(rc, "\0", "\0"); + index += 2; + + return index; +} + +static inline bool f2d_small_int(const uint32 ieeeMantissa, const uint32 ieeeExponent, floating_decimal_32 *v) +{ + const int32 e2 = (int32)ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS; + + /* + * Avoid using multiple "return false;" here since it tends to provoke the + * compiler into inlining multiple copies of f2d, which is undesirable. + */ + + if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0) { + /*---- + * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23: + * 1 <= f = m2 / 2^-e2 < 2^24. + * + * Test if the lower -e2 bits of the significand are 0, i.e. whether + * the fraction is 0. We can use ieeeMantissa here, since the implied + * 1 bit can never be tested by this; the implied 1 can only be part + * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already + * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24) + */ + const uint32 mask = (1U << -e2) - 1; + const uint32 fraction = ieeeMantissa & mask; + + if (fraction == 0) { + /*---- + * f is an integer in the range [1, 2^24). + * Note: mantissa might contain trailing (decimal) 0's. + * Note: since 2^24 < 10^9, there is no need to adjust + * decimalLength(). + */ + const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa; + + v->mantissa = m2 >> -e2; + v->exponent = 0; + return true; + } + } + + return false; +} + +/* + * Store the shortest decimal representation of the given float as an + * UNTERMINATED string in the caller's supplied buffer (which must be at least + * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long). + * + * Returns the number of bytes stored. + */ +int FloatToShortestDecimalBufn(float f, char *result) +{ + /* + * Step 1: Decode the floating-point number, and unify normalized and + * subnormal cases. + */ + const uint32 bits = float_to_bits(f); + + /* Decode bits into sign, mantissa, and exponent. */ + const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; + const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); + const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); + + /* Case distinction; exit early for the easy cases. */ + if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) { + return copy_special_str(result, ieeeSign, ieeeExponent, ieeeMantissa); + } + + floating_decimal_32 v; + const bool isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v); + if (!isSmallInt) { + v = f2d(ieeeMantissa, ieeeExponent); + } + + return ToChars(v, ieeeSign, result); +} + +/* + * Store the shortest decimal representation of the given float as a + * null-terminated string in the caller's supplied buffer (which must be at + * least FLOAT_SHORTEST_DECIMAL_LEN bytes long). + * + * Returns the string length. + */ +int FloatToShortestDecimalBuf(float f, char *result) +{ + const int index = FloatToShortestDecimalBufn(f, result); + + /* Terminate the string. */ + Assert(index < FLOAT_SHORTEST_DECIMAL_LEN); + result[index] = '\0'; + return index; +} + +/* + * Return the shortest decimal representation as a null-terminated palloc'd + * string (outside the backend, uses malloc() instead). + * + * Caller is responsible for freeing the result. + */ +char *FloatToShortestDecimal(float f) +{ + char *const result = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN); + + FloatToShortestDecimalBuf(f, result); + return result; +} diff --git a/src/common/backend/utils/adt/halfutils.cpp b/src/common/backend/utils/adt/halfutils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e132ad6eb99cfa6cafcf33d5689a1849cd213561 --- /dev/null +++ b/src/common/backend/utils/adt/halfutils.cpp @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * halfutils.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/halfutils.cpp + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" + +#ifdef HALFVEC_DISPATCH +#include + +#if defined(USE__GET_CPUID) +#include +#else +#include +#endif + +#define TARGET_F16C +#endif + +float (*HalfvecL2SquaredDistance)(int dim, half *ax, half *bx); +float (*HalfvecInnerProduct)(int dim, half *ax, half *bx); +double (*HalfvecCosineSimilarity)(int dim, half *ax, half *bx); +float (*HalfvecL1Distance)(int dim, half *ax, half *bx); + +static float HalfvecL2SquaredDistanceDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]); + + distance += diff * diff; + } + + return distance; +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static float HalfvecL2SquaredDistanceF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + __m256 diff = _mm256_sub_ps(axs, bxs); + + dist = _mm256_fmadd_ps(diff, diff, dist); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) { + float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]); + + distance += diff * diff; + } + + return distance; +} +#endif + +static float HalfvecInnerProductDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) + distance += HalfToFloat4(ax[i]) * HalfToFloat4(bx[i]); + + return distance; +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static float HalfvecInnerProductF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + dist = _mm256_fmadd_ps(axs, bxs, dist); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) + distance += HalfToFloat4(ax[i]) * HalfToFloat4(bx[i]); + + return distance; +} +#endif + +static double HalfvecCosineSimilarityDefault(int dim, half *ax, half *bx) +{ + float similarity = 0.0; + float norma = 0.0; + float normb = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float axi = HalfToFloat4(ax[i]); + float bxi = HalfToFloat4(bx[i]); + + similarity += axi * bxi; + norma += axi * axi; + normb += bxi * bxi; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return static_cast(similarity) / sqrt(static_cast(norma) * static_cast(normb)); +} + +#ifdef HALFVEC_DISPATCH +TARGET_F16C static double HalfvecCosineSimilarityF16c(int dim, half *ax, half *bx) +{ + float similarity; + float norma; + float normb; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 sim = _mm256_setzero_ps(); + __m256 na = _mm256_setzero_ps(); + __m256 nb = _mm256_setzero_ps(); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + sim = _mm256_fmadd_ps(axs, bxs, sim); + na = _mm256_fmadd_ps(axs, axs, na); + nb = _mm256_fmadd_ps(bxs, bxs, nb); + } + + _mm256_storeu_ps(s, sim); + similarity = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + _mm256_storeu_ps(s, na); + norma = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + _mm256_storeu_ps(s, nb); + normb = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + /* Auto-vectorized */ + for (; i < dim; i++) { + float axi = HalfToFloat4(ax[i]); + float bxi = HalfToFloat4(bx[i]); + + similarity += axi * bxi; + norma += axi * axi; + normb += bxi * bxi; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return (double)similarity / sqrt((double)norma * (double)normb); +} +#endif + +static float HalfvecL1DistanceDefault(int dim, half *ax, half *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) + distance += fabsf(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); + + return distance; +} + +#ifdef HALFVEC_DISPATCH +/* Does not require FMA, but keep logic simple */ +TARGET_F16C static float HalfvecL1DistanceF16c(int dim, half *ax, half *bx) +{ + float distance; + int i; + float s[8]; + int count = (dim / 8) * 8; + __m256 dist = _mm256_setzero_ps(); + __m256 sign = _mm256_set1_ps(-0.0); + + for (i = 0; i < count; i += 8) { + __m128i axi = _mm_loadu_si128((__m128i *)(ax + i)); + __m128i bxi = _mm_loadu_si128((__m128i *)(bx + i)); + __m256 axs = _mm256_cvtph_ps(axi); + __m256 bxs = _mm256_cvtph_ps(bxi); + + dist = _mm256_add_ps(dist, _mm256_andnot_ps(sign, _mm256_sub_ps(axs, bxs))); + } + + _mm256_storeu_ps(s, dist); + + distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7]; + + for (; i < dim; i++) + distance += fabsf(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); + + return distance; +} +#endif + +#ifdef HALFVEC_DISPATCH +#define CPU_FEATURE_FMA (1 << 12) +#define CPU_FEATURE_OSXSAVE (1 << 27) +#define CPU_FEATURE_AVX (1 << 28) +#define CPU_FEATURE_F16C (1 << 29) + +#ifdef _MSC_VER +#define TARGET_XSAVE +#else +#define TARGET_XSAVE __attribute__((target("xsave"))) +#endif + +TARGET_XSAVE static bool SupportsCpuFeature(unsigned int feature) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(USE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuid(exx, 1); +#endif + + /* Check OS supports XSAVE */ + if ((exx[2] & CPU_FEATURE_OSXSAVE) != CPU_FEATURE_OSXSAVE) + return false; + + /* Check XMM and YMM registers are enabled */ + if ((_xgetbv(0) & 6) != 6) + return false; + + /* Now check features */ + return (exx[2] & feature) == feature; +} +#endif + +void HalfvecInit(void) +{ + /* + * Could skip pointer when single function, but no difference in + * performance + */ + HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceDefault; + HalfvecInnerProduct = HalfvecInnerProductDefault; + HalfvecCosineSimilarity = HalfvecCosineSimilarityDefault; + HalfvecL1Distance = HalfvecL1DistanceDefault; + +#ifdef HALFVEC_DISPATCH + if (SupportsCpuFeature(CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA)) { + HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c; + HalfvecInnerProduct = HalfvecInnerProductF16c; + HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c; + /* Does not require FMA, but keep logic simple */ + HalfvecL1Distance = HalfvecL1DistanceF16c; + } +#endif +} diff --git a/src/common/backend/utils/adt/halfvec.cpp b/src/common/backend/utils/adt/halfvec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b97de6070217658558f4c17e4022ecafcb51107f --- /dev/null +++ b/src/common/backend/utils/adt/halfvec.cpp @@ -0,0 +1,1141 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * halfvec.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/halfvec.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "port.h" /* for strtof() */ +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "access/datavec/vector.h" + +#define TYPALIGN_DOUBLE 'd' +#define TYPALIGN_INT 'i' + +#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) +#define CreateStateDatums(dim) palloc(sizeof(Datum) * ((dim) + 1)) + +/* + * Get a half from a message buffer + */ +static half pq_getmsghalf(StringInfo msg) +{ + union { + half h; + uint16 i; + } swap; + + swap.i = pq_getmsgint(msg, 2); + return swap.h; +} + +/* + * Append a half to a StringInfo buffer + */ +static void pq_sendhalf(StringInfo buf, half h) +{ + union { + half h; + uint16 i; + } swap; + + swap.h = h; + pq_sendint16(buf, swap.i); +} + +/* + * Ensure same dimensions + */ +static inline void CheckDims(HalfVector *a, HalfVector *b) +{ + if (a->dim != b->dim) { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different halfvec dimensions %d and %d", a->dim, b->dim))); + } +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); + } +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + + if (dim > HALFVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("halfvec cannot have more than %d dimensions", HALFVEC_MAX_DIM))); +} + +/* + * Ensure finite element + */ +static inline void CheckElement(half value) +{ + if (HalfIsNan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in halfvec"))); + + if (HalfIsInf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in halfvec"))); +} + +/* + * Allocate and initialize a new half vector + */ +HalfVector *InitHalfVector(int dim) +{ + HalfVector *result; + int size; + + size = HALFVEC_SIZE(dim); + result = (HalfVector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool HalfvecIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Check state array + */ +static float8 *CheckStateArray(ArrayType *statearray, const char *caller) +{ + if (ARR_NDIM(statearray) != 1 || ARR_DIMS(statearray)[0] < 1 || ARR_HASNULL(statearray)) + elog(ERROR, "%s: expected state array", caller); + return (float8 *)ARR_DATA_PTR(statearray); +} + +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +static pg_noinline void float_underflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: underflow"))); +} + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_in); +Datum halfvec_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + half x[HALFVEC_MAX_DIM]; + int dim = 0; + char *pt = lit; + HalfVector *result; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt != '[') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit), + errdetail("Vector contents must start with \"[\"."))); + + pt++; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt == ']') + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + + for (;;) { + float val; + char *stringEnd; + + if (dim == HALFVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("halfvec cannot have more than %d dimensions", HALFVEC_MAX_DIM))); + + while (HalfvecIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + + errno = 0; + + /* Postgres sets LC_NUMERIC to C on startup */ + val = strtof(pt, &stringEnd); + + if (stringEnd == pt) { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + } + + x[dim] = Float4ToHalfUnchecked(val); + + /* Check for range error like float4in */ + if ((errno == ERANGE && isinf(val)) || (HalfIsInf(x[dim]) && !isinf(val))) { + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type halfvec", pnstrdup(pt, stringEnd - pt)))); + } + + CheckElement(x[dim]); + dim++; + + pt = stringEnd; + + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == ']') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit))); + } + } + + /* Only whitespace is allowed after the closing brace */ + while (HalfvecIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type halfvec: \"%s\"", lit), + errdetail("Junk after closing right brace."))); + } + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = x[i]; + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_out); +Datum halfvec_out(PG_FUNCTION_ARGS) +{ + HalfVector *vector = PG_GETARG_HALFVEC_P(0); + int dim = vector->dim; + char *buf; + char *ptr; + + /* + * Need: + * + * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * dim - 1 bytes for separator + * + * 3 bytes for [, ], and \0 + */ + buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2); + ptr = buf; + + AppendChar(ptr, '['); + + for (int i = 0; i < dim; i++) { + if (i > 0) { + AppendChar(ptr, ','); + } + + /* + * Use shortest decimal representation of single-precision float for + * simplicity + */ + AppendFloat(ptr, HalfToFloat4(vector->x[i])); + } + + AppendChar(ptr, ']'); + *ptr = '\0'; + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_typmod_in); +Datum halfvec_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + } + + if (*tl < 1) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type halfvec must be at least 1"))); + } + + if (*tl > HALFVEC_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type halfvec cannot exceed %d", HALFVEC_MAX_DIM))); + } + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_recv); +Datum halfvec_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + HalfVector *result; + int16 dim; + int16 unused; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + } + + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = pq_getmsghalf(buf); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_send); +Datum halfvec_send(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, vec->dim, sizeof(int16)); + pq_sendint(&buf, vec->unused, sizeof(int16)); + for (int i = 0; i < vec->dim; i++) { + pq_sendhalf(&buf, vec->x[i]); + } + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert half vector to half vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec); +Datum halfvec(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, vec->dim); + + PG_RETURN_POINTER(vec); +} + +/* + * Convert array to half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_halfvec); +Datum array_to_halfvec(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + int nelemsp; + + if (ARR_NDIM(array) > 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("array must be 1-D"))); + } + + if (ARR_HASNULL(array) && array_contains_nulls(array)) { + ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("array must not contain nulls"))); + } + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + + result = InitHalfVector(nelemsp); + + if (ARR_ELEMTYPE(array) == INT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetInt32(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == FLOAT8OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat8(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == FLOAT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat4(elemsp[i])); + } else if (ARR_ELEMTYPE(array) == NUMERICOID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = Float4ToHalf(DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i]))); + } else { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("unsupported array type"))); + } + + /* + * Free allocation from deconstruct_array. Do not free individual elements + * when pass-by-reference since they point to original array. + */ + pfree(elemsp); + + /* Check elements */ + for (int i = 0; i < result->dim; i++) { + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to float4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_float4); +Datum halfvec_to_float4(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = Float4GetDatum(HalfToFloat4(vec->x[i])); + } + + /* Use TYPALIGN_INT for float4 */ + result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to half vec + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_halfvec); +Datum vector_to_halfvec(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + + CheckDim(vec->dim); + CheckExpectedDim(typmod, vec->dim); + + result = InitHalfVector(vec->dim); + + for (int i = 0; i < vec->dim; i++) + result->x[i] = Float4ToHalf(vec->x[i]); + + PG_RETURN_POINTER(result); +} + +/* + * Get the L2 distance between half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_distance); +Datum halfvec_l2_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)HalfvecL2SquaredDistance(a->dim, a->x, b->x))); +} + +/* + * Get the L2 squared distance between half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_squared_distance); +Datum halfvec_l2_squared_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecL2SquaredDistance(a->dim, a->x, b->x)); +} + +/* + * Get the inner product of two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_inner_product); +Datum halfvec_inner_product(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the negative inner product of two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_negative_inner_product); +Datum halfvec_negative_inner_product(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-HalfvecInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the cosine distance between two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_cosine_distance); +Datum halfvec_cosine_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + double similarity; + + CheckDims(a, b); + + similarity = HalfvecCosineSimilarity(a->dim, a->x, b->x); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) { + PG_RETURN_FLOAT8(NAN); + } +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1; + } else if (similarity < -1) { + similarity = -1; + } + + PG_RETURN_FLOAT8(1 - similarity); +} + +/* + * Get the distance for spherical k-means + * Currently uses angular distance since needs to satisfy triangle inequality + * Assumes inputs are unit vectors (skips norm) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_spherical_distance); +Datum halfvec_spherical_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + double distance; + + CheckDims(a, b); + + distance = (double)HalfvecInnerProduct(a->dim, a->x, b->x); + /* Prevent NaN with acos with loss of precision */ + if (distance > 1) { + distance = 1; + } else if (distance < -1) { + distance = -1; + } + + PG_RETURN_FLOAT8(acos(distance) / M_PI); +} + +/* + * Get the L1 distance between two half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l1_distance); +Datum halfvec_l1_distance(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)HalfvecL1Distance(a->dim, a->x, b->x)); +} + +/* + * Get the dimensions of a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_vector_dims); +Datum halfvec_vector_dims(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + + PG_RETURN_INT32(a->dim); +} + +/* + * Get the L2 norm of a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_norm); +Datum halfvec_l2_norm(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + double axi = (double)HalfToFloat4(ax[i]); + + norm += axi * axi; + } + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +/* + * Normalize a half vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_normalize); +Datum halfvec_l2_normalize(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + double norm = 0; + HalfVector *result; + half *rx; + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + norm += (double)HalfToFloat4(ax[i]) * (double)HalfToFloat4(ax[i]); + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + for (int i = 0; i < a->dim; i++) + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) / norm); + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) { + if (HalfIsInf(rx[i])) + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Add half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_add); +Datum halfvec_add(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] + bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) + HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Subtract half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_sub); +Datum halfvec_sub(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] - bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Multiply half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_mul); +Datum halfvec_mul(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + half *ax = a->x; + half *bx = b->x; + HalfVector *result; + half *rx; + + CheckDims(a, b); + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { +#ifdef FLT16_SUPPORT + rx[i] = ax[i] * bx[i]; +#else + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) * HalfToFloat4(bx[i])); +#endif + } + + /* Check for overflow and underflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (HalfIsInf(rx[i])) { + float_overflow_error(); + } + + if (HalfIsZero(rx[i]) && !(HalfIsZero(ax[i]) || HalfIsZero(bx[i]))) { + float_underflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Concatenate half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_concat); +Datum halfvec_concat(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + HalfVector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitHalfVector(dim); + + for (int i = 0; i < a->dim; i++) { + result->x[i] = a->x[i]; + } + + for (int i = 0; i < b->dim; i++) { + result->x[i + a->dim] = b->x[i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Quantize a half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_binary_quantize); +Datum halfvec_binary_quantize(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + VarBit *result = InitBitVector(a->dim); + unsigned char *rx = VARBITS(result); + + for (int i = 0; i < a->dim; i++) { + rx[i / 8] |= (HalfToFloat4(ax[i]) > 0) << (7 - (i % 8)); + } + + PG_RETURN_VARBIT_P(result); +} + +/* + * Get a subvector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_subvector); +Datum halfvec_subvector(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + int32 start = PG_GETARG_INT32(1); + int32 count = PG_GETARG_INT32(2); + int32 end; + half *ax = a->x; + HalfVector *result; + int32 dim; + + if (count < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + } + + /* + * Check if (start + count > a->dim), avoiding integer overflow. a->dim + * and count are both positive, so a->dim - count won't overflow. + */ + if (start > a->dim - count) { + end = a->dim + 1; + } else { + end = start + count; + } + + /* Indexing starts at 1, like substring */ + if (start < 1) { + start = 1; + } else if (start > a->dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("halfvec must have at least 1 dimension"))); + } + + dim = end - start; + CheckDim(dim); + result = InitHalfVector(dim); + + for (int i = 0; i < dim; i++) { + result->x[i] = ax[start - 1 + i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare half vectors + */ +static int halfvec_cmp_internal(HalfVector *a, HalfVector *b) +{ + int dim = Min(a->dim, b->dim); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < dim; i++) { + if (HalfToFloat4(a->x[i]) < HalfToFloat4(b->x[i])) { + return -1; + } + + if (HalfToFloat4(a->x[i]) > HalfToFloat4(b->x[i])) { + return 1; + } + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_lt); +Datum halfvec_lt(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_le); +Datum halfvec_le(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_eq); +Datum halfvec_eq(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_ne); +Datum halfvec_ne(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_ge); +Datum halfvec_ge(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_gt); +Datum halfvec_gt(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_BOOL(halfvec_cmp_internal(a, b) > 0); +} + +/* + * Compare half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_cmp); +Datum halfvec_cmp(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + + PG_RETURN_INT32(halfvec_cmp_internal(a, b)); +} + +/* + * Accumulate half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_accum); +Datum halfvec_accum(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + HalfVector *newval = PG_GETARG_HALFVEC_P(1); + float8 *statevalues; + int16 dim; + bool newarr; + float8 n; + Datum *statedatums; + half *x = newval->x; + ArrayType *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "halfvec_accum"); + dim = STATE_DIMS(statearray); + newarr = dim == 0; + + if (newarr) + dim = newval->dim; + else + CheckExpectedDim(dim, newval->dim); + + n = statevalues[0] + 1.0; + + statedatums = (Datum *)CreateStateDatums(dim); + statedatums[0] = Float8GetDatum(n); + + if (newarr) { + for (int i = 0; i < dim; i++) + statedatums[i + 1] = Float8GetDatum((double)HalfToFloat4(x[i])); + } else { + for (int i = 0; i < dim; i++) { + double v = statevalues[i + 1] + (double)HalfToFloat4(x[i]); + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i + 1] = Float8GetDatum(v); + } + } + + /* Use float8 array like float4_accum */ + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Average half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_avg); +Datum halfvec_avg(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + float8 *statevalues; + float8 n; + uint16 dim; + HalfVector *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "halfvec_avg"); + n = statevalues[0]; + + /* SQL defines AVG of no values to be NULL */ + if (n == 0.0) { + PG_RETURN_NULL(); + } + + /* Create half vector */ + dim = STATE_DIMS(statearray); + CheckDim(dim); + result = InitHalfVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = Float4ToHalf(statevalues[i + 1] / n); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert sparse vector to half vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_halfvec); +Datum sparsevec_to_halfvec(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + HalfVector *result; + int dim = svec->dim; + float *values = SPARSEVEC_VALUES(svec); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitHalfVector(dim); + for (int i = 0; i < svec->nnz; i++) { + result->x[svec->indices[i]] = Float4ToHalf(values[i]); + } + + PG_RETURN_POINTER(result); +} diff --git a/src/common/backend/utils/adt/sparsevec.cpp b/src/common/backend/utils/adt/sparsevec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..28fc55f66a2fa43785069632632a9a97822ed5ff --- /dev/null +++ b/src/common/backend/utils/adt/sparsevec.cpp @@ -0,0 +1,1064 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * sparsevec.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/sparsevec.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "libpq/pqformat.h" +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "access/datavec/vector.h" + +#include +#include "utils/builtins.h" + +typedef struct SparseInputElement { + int32 index; + float value; +} SparseInputElement; + +/* + * Ensure same dimensions + */ +static inline void CheckDims(SparseVector *a, SparseVector *b) +{ + if (a->dim != b->dim) { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different sparsevec dimensions %d and %d", a->dim, b->dim))); + } +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); + } +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec must have at least 1 dimension"))); + } + + if (dim > SPARSEVEC_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d dimensions", SPARSEVEC_MAX_DIM))); + } +} + +/* + * Ensure valid nnz + */ +static inline void CheckNnz(int nnz, int dim) +{ + if (nnz < 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec cannot have negative number of elements"))); + + if (nnz > SPARSEVEC_MAX_NNZ) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d non-zero elements", SPARSEVEC_MAX_NNZ))); + + if (nnz > dim) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more elements than dimensions"))); +} + +/* + * Ensure valid index + */ +static inline void CheckIndex(int32 *indices, int i, int dim) +{ + int32 index = indices[i]; + + if (index < 0 || index >= dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec index out of bounds"))); + } + + if (i > 0) { + if (index < indices[i - 1]) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec indices must be in ascending order"))); + + if (index == indices[i - 1]) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec indices must not contain duplicates"))); + } +} + +/* + * Ensure finite element + */ +static inline void CheckElement(float value) +{ + if (isnan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in sparsevec"))); + + if (isinf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in sparsevec"))); +} + +/* + * Allocate and initialize a new sparse vector + */ +SparseVector *InitSparseVector(int dim, int nnz) +{ + SparseVector *result; + int size; + + size = SPARSEVEC_SIZE(nnz); + result = (SparseVector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + result->nnz = nnz; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool SparsevecIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Compare indices + */ +static int CompareIndices(const void *a, const void *b) +{ + if (((SparseInputElement *)a)->index < ((SparseInputElement *)b)->index) { + return -1; + } + + if (((SparseInputElement *)a)->index > ((SparseInputElement *)b)->index) { + return 1; + } + + return 0; +} + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_in); +Datum sparsevec_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + long dim; + char *pt = lit; + char *stringEnd; + SparseVector *result; + float *rvalues; + SparseInputElement *elements; + int maxNnz; + int nnz = 0; + + maxNnz = 1; + while (*pt != '\0') { + if (*pt == ',') { + maxNnz++; + } + + pt++; + } + + elements = (SparseInputElement *)palloc(Min(maxNnz, SPARSEVEC_MAX_NNZ) * sizeof(SparseInputElement)); + + pt = lit; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '{') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), + errdetail("Vector contents must start with \"{\"."))); + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt == '}') { + pt++; + } else { + for (;;) { + long index; + float value; + + if (nnz == maxNnz) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("the current nnz value of %d ran out of buffer: \"%s\"", nnz, lit))); + } + while (SparsevecIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Use similar logic as int2vectorin */ + index = strtol(pt, &stringEnd, 10); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Keep in int range for correct error message later */ + if (index > INT_MAX) { + index = INT_MAX; + } else if (index < INT_MIN + 1) { + index = INT_MIN + 1; + } + + pt = stringEnd; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != ':') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + errno = 0; + + /* Use strtof like float4in to avoid a double-rounding problem */ + /* Postgres sets LC_NUMERIC to C on startup */ + value = strtof(pt, &stringEnd); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Check for range error like float4in */ + if (errno == ERANGE && (value == 0 || isinf(value))) + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type sparsevec", pnstrdup(pt, stringEnd - pt)))); + + CheckElement(value); + + /* Do not store zero values */ + if (value != 0) { + /* Convert 1-based numbering (SQL) to 0-based (C) */ + elements[nnz].index = index - 1; + elements[nnz].value = value; + nnz++; + } + + pt = stringEnd; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == '}') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + } + } + } + + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '/') { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), + errdetail("Unexpected end of input."))); + } + + pt++; + + while (SparsevecIsspace(*pt)) { + pt++; + } + + /* Use similar logic as int2vectorin */ + dim = strtol(pt, &stringEnd, 10); + + if (stringEnd == pt) + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); + + /* Keep in int range for correct error message later */ + if (dim > INT_MAX) { + dim = INT_MAX; + } else if (dim < INT_MIN) { + dim = INT_MIN; + } + + pt = stringEnd; + + /* Only whitespace is allowed after the closing brace */ + while (SparsevecIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type sparsevec: \"%s\"", lit), errdetail("Junk after closing."))); + + CheckDim(dim); + CheckNnz(nnz, dim); + CheckExpectedDim(typmod, dim); + + qsort(elements, nnz, sizeof(SparseInputElement), CompareIndices); + + result = InitSparseVector(dim, nnz); + rvalues = SPARSEVEC_VALUES(result); + for (int i = 0; i < nnz; i++) { + result->indices[i] = elements[i].index; + rvalues[i] = elements[i].value; + + CheckIndex(result->indices, i, dim); + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +#define AppendInt(ptr, i) \ + do { \ + pg_ltoa(i, ptr); \ + while (*ptr != '\0') \ + ptr++; \ + } while (0) + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_out); +Datum sparsevec_out(PG_FUNCTION_ARGS) +{ + SparseVector *sparsevec = PG_GETARG_SPARSEVEC_P(0); + float *values = SPARSEVEC_VALUES(sparsevec); + char *buf; + char *ptr; + + /* + * Need: + * + * nnz * 10 bytes for index (positive integer) + * + * nnz bytes for : + * + * nnz * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * nnz - 1 bytes for , + * + * 10 bytes for dimensions + * + * 4 bytes for {, }, /, and \0 + */ + buf = (char *)palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 13); + ptr = buf; + + AppendChar(ptr, '{'); + + for (int i = 0; i < sparsevec->nnz; i++) { + if (i > 0) + AppendChar(ptr, ','); + + /* Convert 0-based numbering (C) to 1-based (SQL) */ + AppendInt(ptr, sparsevec->indices[i] + 1); + AppendChar(ptr, ':'); + AppendFloat(ptr, values[i]); + } + + AppendChar(ptr, '}'); + AppendChar(ptr, '/'); + AppendInt(ptr, sparsevec->dim); + *ptr = '\0'; + + PG_FREE_IF_COPY(sparsevec, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_typmod_in); +Datum sparsevec_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + + if (*tl < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type sparsevec must be at least 1"))); + + if (*tl > SPARSEVEC_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type sparsevec cannot exceed %d", SPARSEVEC_MAX_DIM))); + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_recv); +Datum sparsevec_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + SparseVector *result; + int32 dim; + int32 nnz; + int32 unused; + float *values; + + dim = pq_getmsgint(buf, sizeof(int32)); + nnz = pq_getmsgint(buf, sizeof(int32)); + unused = pq_getmsgint(buf, sizeof(int32)); + + CheckDim(dim); + CheckNnz(nnz, dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + + /* Binary representation uses zero-based numbering for indices */ + for (int i = 0; i < nnz; i++) { + result->indices[i] = pq_getmsgint(buf, sizeof(int32)); + CheckIndex(result->indices, i, dim); + } + + for (int i = 0; i < nnz; i++) { + values[i] = pq_getmsgfloat4(buf); + CheckElement(values[i]); + + if (values[i] == 0) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("binary representation of sparsevec cannot contain zero values"))); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_send); +Datum sparsevec_send(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + float *values = SPARSEVEC_VALUES(svec); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, svec->dim, sizeof(int32)); + pq_sendint(&buf, svec->nnz, sizeof(int32)); + pq_sendint(&buf, svec->unused, sizeof(int32)); + + /* Binary representation uses zero-based numbering for indices */ + for (int i = 0; i < svec->nnz; i++) + pq_sendint(&buf, svec->indices[i], sizeof(int32)); + + for (int i = 0; i < svec->nnz; i++) + pq_sendfloat4(&buf, values[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert sparse vector to sparse vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec); +Datum sparsevec(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, svec->dim); + + PG_RETURN_POINTER(svec); +} + +/* + * Convert dense vector to sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_sparsevec); +Datum vector_to_sparsevec(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + SparseVector *result; + int dim = vec->dim; + int nnz = 0; + float *values; + int j = 0; + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + for (int i = 0; i < dim; i++) { + if (vec->x[i] != 0) { + nnz++; + } + } + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + for (int i = 0; i < dim; i++) { + if (vec->x[i] != 0) { + /* Safety check */ + if (j >= result->nnz) + elog(ERROR, "safety check failed"); + + result->indices[j] = i; + values[j] = vec->x[i]; + j++; + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_sparsevec); +Datum halfvec_to_sparsevec(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + SparseVector *result; + int dim = vec->dim; + int nnz = 0; + float *values; + int j = 0; + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + for (int i = 0; i < dim; i++) { + if (!HalfIsZero(vec->x[i])) { + nnz++; + } + } + + result = InitSparseVector(dim, nnz); + values = SPARSEVEC_VALUES(result); + for (int i = 0; i < dim; i++) { + if (!HalfIsZero(vec->x[i])) { + /* Safety check */ + if (j >= result->nnz) + elog(ERROR, "safety check failed"); + + result->indices[j] = i; + values[j] = HalfToFloat4(vec->x[i]); + j++; + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Get the L2 squared distance between sparse vectors + */ +static float SparsevecL2SquaredDistance(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + int bi = -1; + + for (int j = bpos; j < b->nnz; j++) { + bi = b->indices[j]; + + if (ai == bi) { + float diff = ax[i] - bx[j]; + + distance += diff * diff; + } else if (ai > bi) + distance += bx[j] * bx[j]; + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + + if (ai != bi) + distance += ax[i] * ax[i]; + } + + for (int j = bpos; j < b->nnz; j++) + distance += bx[j] * bx[j]; + + return distance; +} + +/* + * Get the L2 distance between sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_distance); +Datum sparsevec_l2_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)SparsevecL2SquaredDistance(a, b))); +} + +/* + * Get the L2 squared distance between sparse vectors + * This saves a sqrt calculation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_squared_distance); +Datum sparsevec_l2_squared_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)SparsevecL2SquaredDistance(a, b)); +} + +/* + * Get the inner product of two sparse vectors + */ +static float SparsevecInnerProduct(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + + for (int j = bpos; j < b->nnz; j++) { + int bi = b->indices[j]; + + /* Only update when the same index */ + if (ai == bi) + distance += ax[i] * bx[j]; + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + } + + return distance; +} + +/* + * Get the inner product of two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_inner_product); +Datum sparsevec_inner_product(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)SparsevecInnerProduct(a, b)); +} + +/* + * Get the negative inner product of two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_negative_inner_product); +Datum sparsevec_negative_inner_product(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-SparsevecInnerProduct(a, b)); +} + +/* + * Get the cosine distance between two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cosine_distance); +Datum sparsevec_cosine_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float norma = 0.0; + float normb = 0.0; + double similarity; + + CheckDims(a, b); + + similarity = SparsevecInnerProduct(a, b); + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norma += ax[i] * ax[i]; + + /* Auto-vectorized */ + for (int i = 0; i < b->nnz; i++) + normb += bx[i] * bx[i]; + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + similarity /= sqrt(static_cast(norma) * static_cast(normb)); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) + PG_RETURN_FLOAT8(NAN); +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1.0; + } else if (similarity < -1) { + similarity = -1.0; + } + + PG_RETURN_FLOAT8(1.0 - similarity); +} + +/* + * Get the L1 distance between two sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l1_distance); +Datum sparsevec_l1_distance(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + float distance = 0.0; + int bpos = 0; + + CheckDims(a, b); + + for (int i = 0; i < a->nnz; i++) { + int ai = a->indices[i]; + int bi = -1; + + for (int j = bpos; j < b->nnz; j++) { + bi = b->indices[j]; + + if (ai == bi) + distance += fabsf(ax[i] - bx[j]); + else if (ai > bi) + distance += fabsf(bx[j]); + + /* Update start for next iteration */ + if (ai >= bi) + bpos = j + 1; + + /* Found or passed it */ + if (bi >= ai) + break; + } + + if (ai != bi) + distance += fabsf(ax[i]); + } + + for (int j = bpos; j < b->nnz; j++) + distance += fabsf(bx[j]); + + PG_RETURN_FLOAT8(static_cast(distance)); +} + +/* + * Get the L2 norm of a sparse vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_norm); +Datum sparsevec_l2_norm(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + float *ax = SPARSEVEC_VALUES(a); + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norm += (double)ax[i] * (double)ax[i]; + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +/* + * Normalize a sparse vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_normalize); +Datum sparsevec_l2_normalize(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + float *ax = SPARSEVEC_VALUES(a); + double norm = 0; + SparseVector *result; + float *rx; + + result = InitSparseVector(a->dim, a->nnz); + rx = SPARSEVEC_VALUES(result); + + /* Auto-vectorized */ + for (int i = 0; i < a->nnz; i++) + norm += (double)ax[i] * (double)ax[i]; + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + int zeros = 0; + + for (int i = 0; i < a->nnz; i++) { + result->indices[i] = a->indices[i]; + rx[i] = ax[i] / norm; + + if (isinf(rx[i])) + float_overflow_error(); + + if (rx[i] == 0) + zeros++; + } + + /* Allocate a new vector in the unlikely event there are zeros */ + if (zeros > 0) { + SparseVector *newResult = InitSparseVector(result->dim, result->nnz - zeros); + float *nx = SPARSEVEC_VALUES(newResult); + int j = 0; + + for (int i = 0; i < result->nnz; i++) { + if (rx[i] == 0) + continue; + + /* Safety check */ + if (j >= newResult->nnz) + elog(ERROR, "safety check failed"); + + newResult->indices[j] = result->indices[i]; + nx[j] = rx[i]; + j++; + } + + pfree(result); + + PG_RETURN_POINTER(newResult); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare sparse vectors + */ +static int sparsevec_cmp_internal(SparseVector *a, SparseVector *b) +{ + float *ax = SPARSEVEC_VALUES(a); + float *bx = SPARSEVEC_VALUES(b); + int nnz = Min(a->nnz, b->nnz); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < nnz; i++) { + if (a->indices[i] < b->indices[i]) { + return ax[i] < 0 ? -1 : 1; + } + + if (a->indices[i] > b->indices[i]) { + return bx[i] < 0 ? 1 : -1; + } + + if (ax[i] < bx[i]) { + return -1; + } + + if (ax[i] > bx[i]) { + return 1; + } + } + + if (a->nnz < b->nnz && b->indices[nnz] < a->dim) { + return bx[nnz] < 0 ? 1 : -1; + } + + if (a->nnz > b->nnz && a->indices[nnz] < b->dim) { + return ax[nnz] < 0 ? -1 : 1; + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_lt); +Datum sparsevec_lt(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_le); +Datum sparsevec_le(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_eq); +Datum sparsevec_eq(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_ne); +Datum sparsevec_ne(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_ge); +Datum sparsevec_ge(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_gt); +Datum sparsevec_gt(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_BOOL(sparsevec_cmp_internal(a, b) > 0); +} + +/* + * Compare sparse vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cmp); +Datum sparsevec_cmp(PG_FUNCTION_ARGS) +{ + SparseVector *a = PG_GETARG_SPARSEVEC_P(0); + SparseVector *b = PG_GETARG_SPARSEVEC_P(1); + + PG_RETURN_INT32(sparsevec_cmp_internal(a, b)); +} diff --git a/src/common/backend/utils/adt/vector.cpp b/src/common/backend/utils/adt/vector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a65160f2078f87024f452c77090262cae138dfd5 --- /dev/null +++ b/src/common/backend/utils/adt/vector.cpp @@ -0,0 +1,1823 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * vector.cpp + * + * IDENTIFICATION + * src/common/backend/utils/adt/vector.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#ifdef __aarch64__ +#include +#else +#include +#endif + +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/ivfflat.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "port.h" /* for strtof() */ +#include "access/datavec/shortest_dec.h" +#include "access/datavec/sparsevec.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/numeric.h" +#include "commands/extension.h" +#include "knl/knl_session.h" +#include "access/datavec/vector.h" + +#define TYPALIGN_DOUBLE 'd' +#define TYPALIGN_INT 'i' + +#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) +#define CreateStateDatums(dim) palloc(sizeof(Datum) * ((dim) + 1)) + +#define MarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x) + +/* + * Ensure same dimensions + */ +static inline void CheckDims(Vector *a, Vector *b) +{ + if (a->dim != b->dim) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), errmsg("different vector dimensions %d and %d", a->dim, b->dim))); +} + +/* + * Ensure expected dimensions + */ +static inline void CheckExpectedDim(int32 typmod, int dim) +{ + if (typmod != -1 && typmod != dim) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected %d dimensions, not %d", typmod, dim))); +} + +/* + * Ensure valid dimensions + */ +static inline void CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + + if (dim > VECTOR_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); +} + +/* + * Ensure finite element + */ +static inline void CheckElement(float value) +{ + if (isnan(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("NaN not allowed in vector"))); + + if (isinf(value)) + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("infinite value not allowed in vector"))); +} + +/* + * Allocate and initialize a new vector + */ +Vector *InitVector(int dim) +{ + Vector *result; + int size; + + size = VECTOR_SIZE(dim); + result = (Vector *)palloc0(size); + SET_VARSIZE(result, size); + result->dim = dim; + + return result; +} + +/* + * Check for whitespace, since array_isspace() is static + */ +static inline bool VectorIsspace(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') { + return true; + } + return false; +} + +/* + * Check state array + */ +static float8 *CheckStateArray(ArrayType *statearray, const char *caller) +{ + if (ARR_NDIM(statearray) != 1 || ARR_DIMS(statearray)[0] < 1 || ARR_HASNULL(statearray)) + elog(ERROR, "%s: expected state array", caller); + return (float8 *)ARR_DATA_PTR(statearray); +} + +static pg_noinline void float_overflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: overflow"))); +} + +static pg_noinline void float_underflow_error(void) +{ + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value out of range: underflow"))); +} + +/* + * Convert textual representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_in); +Datum vector_in(PG_FUNCTION_ARGS) +{ + char *lit = PG_GETARG_CSTRING(0); + int32 typmod = PG_GETARG_INT32(2); + float x[VECTOR_MAX_DIM]; + int dim = 0; + char *pt = lit; + Vector *result; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt != '[') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit), + errdetail("Vector contents must start with \"[\"."))); + + pt++; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt == ']') { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + for (;;) { + float val; + char *stringEnd; + + if (dim == VECTOR_MAX_DIM) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); + + while (VectorIsspace(*pt)) { + pt++; + } + + /* Check for empty string like float4in */ + if (*pt == '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + + errno = 0; + + /* Use strtof like float4in to avoid a double-rounding problem */ + /* Postgres sets LC_NUMERIC to C on startup */ + val = strtof(pt, &stringEnd); + + if (stringEnd == pt) { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + } + + /* Check for range error like float4in */ + if (errno == ERANGE && isinf(val)) + ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"%s\" is out of range for type vector", pnstrdup(pt, stringEnd - pt)))); + + CheckElement(val); + x[dim++] = val; + + pt = stringEnd; + + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt == ',') { + pt++; + } else if (*pt == ']') { + pt++; + break; + } else { + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit))); + } + } + + /* Only whitespace is allowed after the closing brace */ + while (VectorIsspace(*pt)) { + pt++; + } + + if (*pt != '\0') + ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type vector: \"%s\"", lit), + errdetail("Junk after closing right brace."))); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = x[i]; + } + + PG_RETURN_POINTER(result); +} + +#define AppendChar(ptr, c) (*(ptr)++ = (c)) +#define AppendFloat(ptr, f) ((ptr) += FloatToShortestDecimalBufn((f), (ptr))) + +/* + * Convert internal representation to textual representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_out); +Datum vector_out(PG_FUNCTION_ARGS) +{ + Vector *vector = PG_GETARG_VECTOR_P(0); + int dim = vector->dim; + char *buf; + char *ptr; + + /* + * Need: + * + * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for + * FloatToShortestDecimalBufn + * + * dim - 1 bytes for separator + * + * 3 bytes for [, ], and \0 + */ + buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2); + ptr = buf; + + AppendChar(ptr, '['); + + for (int i = 0; i < dim; i++) { + if (i > 0) { + AppendChar(ptr, ','); + } + + AppendFloat(ptr, vector->x[i]); + } + + AppendChar(ptr, ']'); + *ptr = '\0'; + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_CSTRING(buf); +} + +/* + * Print vector - useful for debugging + */ +void PrintVector(char *msg, Vector *vector) +{ + char *out = DatumGetPointer(DirectFunctionCall1(vector_out, PointerGetDatum(vector))); + + elog(INFO, "%s = %s", msg, out); + pfree(out); +} + +/* + * Convert type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_typmod_in); +Datum vector_typmod_in(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + int32 *tl; + int n; + + tl = ArrayGetIntegerTypmods(ta, &n); + + if (n != 1) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid type modifier"))); + } + + if (*tl < 1) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("dimensions for type vector must be at least 1"))); + } + + if (*tl > VECTOR_MAX_DIM) { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions for type vector cannot exceed %d", VECTOR_MAX_DIM))); + } + + PG_RETURN_INT32(*tl); +} + +/* + * Convert external binary representation to internal representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_recv); +Datum vector_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo)PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + Vector *result; + int16 dim; + int16 unused; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + if (unused != 0) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); + } + + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = pq_getmsgfloat4(buf); + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_send); +Datum vector_send(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint(&buf, vec->dim, sizeof(int16)); + pq_sendint(&buf, vec->unused, sizeof(int16)); + for (int i = 0; i < vec->dim; i++) + pq_sendfloat4(&buf, vec->x[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Convert vector to vector + * This is needed to check the type modifier + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector); +Datum vector(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + int32 typmod = PG_GETARG_INT32(1); + + CheckExpectedDim(typmod, vec->dim); + + PG_RETURN_POINTER(vec); +} + +/* + * Convert array to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_vector); +Datum array_to_vector(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + int nelemsp; + + if (ARR_NDIM(array) > 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("array must be 1-D"))); + } + + if (ARR_HASNULL(array) && array_contains_nulls(array)) { + ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("array must not contain nulls"))); + } + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + + result = InitVector(nelemsp); + + if (ARR_ELEMTYPE(array) == INT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetInt32(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == FLOAT8OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat8(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == FLOAT4OID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(elemsp[i]); + } else if (ARR_ELEMTYPE(array) == NUMERICOID) { + for (int i = 0; i < nelemsp; i++) + result->x[i] = DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i])); + } else { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("unsupported array type"))); + } + + /* + * Free allocation from deconstruct_array. Do not free individual elements + * when pass-by-reference since they point to original array. + */ + pfree(elemsp); + + /* Check elements */ + for (int i = 0; i < result->dim; i++) { + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to float4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_float4); +Datum vector_to_float4(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = Float4GetDatum(vec->x[i]); + } + + /* Use TYPALIGN_INT for float4 */ + result = construct_array(datums, vec->dim, FLOAT4OID, sizeof(float4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to int4[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_int4); +Datum vector_to_int4(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = DirectFunctionCall1(ftoi4, Float4GetDatum(vec->x[i])); + } + + /* Use TYPALIGN_INT for int4 */ + result = construct_array(datums, vec->dim, INT4OID, sizeof(int4), true, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to float8[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_float8); +Datum vector_to_float8(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + datums[i] = Float8GetDatum(vec->x[i]); + } + + /* Use TYPALIGN_DOUBLE for float8 */ + result = construct_array(datums, vec->dim, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to numeric[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_numeric); +Datum vector_to_numeric(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + Datum numericVal; + Numeric typmod_numericVal; + numericVal = DirectFunctionCall1(float4_numeric, Float4GetDatum(vec->x[i])); + datums[i] = NumericGetDatum(numericVal); + } + + /* Use TYPALIGN_INT for numeric */ + result = construct_array(datums, vec->dim, NUMERICOID, -1, false, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to text[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_text); +Datum vector_to_text(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + char* tmp = nullptr; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + tmp = DatumGetCString(DirectFunctionCall1(float4out, Float4GetDatum(vec->x[i]))); + datums[i] = DirectFunctionCall1(textin, CStringGetDatum(tmp)); + pfree_ext(tmp); + } + + /* Use TYPALIGN_INT for text */ + result = construct_array(datums, vec->dim, TEXTOID, -1, false, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert vector to varchar[] + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_varchar); +Datum vector_to_varchar(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + Datum *datums; + ArrayType *result; + char* tmp = nullptr; + + datums = (Datum *)palloc(sizeof(Datum) * vec->dim); + + for (int i = 0; i < vec->dim; i++) { + tmp = DatumGetCString(DirectFunctionCall1(float4out, Float4GetDatum(vec->x[i]))); + datums[i] = DirectFunctionCall3(varcharin, CStringGetDatum(tmp), ObjectIdGetDatum(0), Int32GetDatum(-1)); + pfree_ext(tmp); + } + + /* Use TYPALIGN_INT for varchar */ + result = construct_array(datums, vec->dim, VARCHAROID, -1, false, TYPALIGN_INT); + + pfree(datums); + + PG_RETURN_POINTER(result); +} + +/* + * Convert half vector to vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_to_vector); +Datum halfvec_to_vector(PG_FUNCTION_ARGS) +{ + HalfVector *vec = PG_GETARG_HALFVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + + CheckDim(vec->dim); + CheckExpectedDim(typmod, vec->dim); + + result = InitVector(vec->dim); + + for (int i = 0; i < vec->dim; i++) { + result->x[i] = HalfToFloat4(vec->x[i]); + } + + PG_RETURN_POINTER(result); +} + +inline void prefetch_L1(const void *address) +{ +#if defined(__SSE2__) + _mm_prefetch((const char*)address, _MM_HINT_T0); +#elif defined(__aarch64__) + asm volatile("prfm PLDL1KEEP, [%0]" : : "r"(address)); +#else + __builtin_prefetch(address, 0, 3); // L3 cache +#endif +} + +#ifdef __aarch64__ +static float L2SquaredDistanceRef(int dim, float *ax, float *bx) +{ + float distance = 0.0f; + + for (int i = 0; i < dim; i++) { + float diff = ax[i] - bx[i]; + distance += diff * diff; + } + + return distance; +} + +VECTOR_TARGET_CLONES float +VectorL2SquaredDistance(int dim, float *ax, float *bx) +{ + // 128 bit register = float 32*4 + float32x4_t r1 = vdupq_n_f32(0.0); + float32x4_t r2 = vdupq_n_f32(0.0); + float32x4_t r3 = vdupq_n_f32(0.0); + float32x4_t r4 = vdupq_n_f32(0.0); + int i = 0; + float* pta = ax; + float* ptb = bx; + int batch1 = 16; + int batch2 = 4; + int rest = batch2 - 1; + for (; i + batch1 <= dim; i += batch1, pta += batch1, ptb += batch1) { + float32x4x4_t packdata_a = vld1q_f32_x4(pta); + float32x4x4_t packdata_b = vld1q_f32_x4(ptb); + + float32x4_t diff0 = vsubq_f32(packdata_a.val[0], packdata_b.val[0]); + float32x4_t diff1 = vsubq_f32(packdata_a.val[1], packdata_b.val[1]); + float32x4_t diff2 = vsubq_f32(packdata_a.val[2], packdata_b.val[2]); + float32x4_t diff3 = vsubq_f32(packdata_a.val[3], packdata_b.val[3]); + + r1 = vfmaq_f32(r1, diff0, diff0); + r2 = vfmaq_f32(r2, diff1, diff1); + r3 = vfmaq_f32(r3, diff2, diff2); + r4 = vfmaq_f32(r4, diff3, diff3); + } + + for (; i + batch2 <= dim; i += batch2, pta += batch2, ptb += batch2) { + float32x4_t data_a = vld1q_f32(pta); + float32x4_t data_b = vld1q_f32(ptb); + float32x4_t diff = vsubq_f32(data_a, data_b); + r1 = vfmaq_f32(r1, diff, diff); + } + + r1 = vpaddq_f32(r1, r2); + r2 = vpaddq_f32(r3, r4); + r1 = vpaddq_f32(r1, r2); + + float distance = vaddvq_f32(r1); + if (dim & rest) { + distance += L2SquaredDistanceRef(dim - i, ax + i, bx + i); + } + return distance; +} +#elif defined(__x86_64__) +static inline __m128 masked_read(int d, const float *x) +{ + __attribute__((__aligned__(16))) float buf[4]; + + Assert(0 <= d && d < 4); /* reads 0 <= d < 4 floats as __m128 */ + memset((void*)buf, 0, sizeof(buf)); + switch (d) { + case 3: + buf[2] = x[2]; + case 2: + buf[1] = x[1]; + case 1: + buf[0] = x[0]; + default: + break; + } + return _mm_load_ps(buf); +} +VECTOR_TARGET_CLONES float +VectorL2SquaredDistance(int dim, float *ax, float *bx) +{ + float* x = (float*)ax; + float* y = (float*)bx; + size_t d = (size_t)dim; + int batch_num1 = 8; + int batch_num2 = 4; + __m256 msum1 = _mm256_setzero_ps(); + + while (d >= batch_num1) { + __m256 mx = _mm256_loadu_ps(x); + x += batch_num1; + __m256 my = _mm256_loadu_ps(y); + y += batch_num1; + const __m256 a_m_b1 = mx - my; + msum1 += a_m_b1 * a_m_b1; + d -= batch_num1; + } + + __m128 msum2 = _mm256_extractf128_ps(msum1, 1); + msum2 += _mm256_extractf128_ps(msum1, 0); + + if (d >= batch_num2) { + __m128 mx = _mm_loadu_ps(x); + x += batch_num2; + __m128 my = _mm_loadu_ps(y); + y += batch_num2; + const __m128 a_m_b1 = mx - my; + msum2 += a_m_b1 * a_m_b1; + d -= batch_num2; + } + + if (d > 0) { + __m128 mx = masked_read(d, x); + __m128 my = masked_read(d, y); + __m128 a_m_b1 = mx - my; + msum2 += a_m_b1 * a_m_b1; + } + + msum2 = _mm_hadd_ps(msum2, msum2); + msum2 = _mm_hadd_ps(msum2, msum2); + + return _mm_cvtss_f32(msum2); +} +#else + +VECTOR_TARGET_CLONES float VectorL2SquaredDistance(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + float diff = ax[i] - bx[i]; + + distance += diff * diff; + } + + return distance; +} + +#endif + +/* + * Get the L2 distance between vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_distance); +Datum l2_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8(sqrt((double)VectorL2SquaredDistance(a->dim, a->x, b->x))); +} + +/* + * Get the L2 squared distance between vectors + * This saves a sqrt calculation + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_l2_squared_distance); +Datum vector_l2_squared_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorL2SquaredDistance(a->dim, a->x, b->x)); +} + +#ifdef __aarch64__ +VECTOR_TARGET_CLONES float +VectorInnerProduct(int dim, float *ax, float *bx) +{ + float dis = 0.0f; + float32x4_t sum = vdupq_n_f32(0.0f); + float *pta = ax; + float *ptb = bx; + + int i = 0; + int prefetch_len = 8; + int batch_num = 4; + for (; i + batch_num <= dim; i += batch_num) { + prefetch_L1(pta + prefetch_len); + prefetch_L1(ptb + prefetch_len); + float32x4_t sub_a = vld1q_f32(pta); + float32x4_t sub_b = vld1q_f32(ptb); + sum = vmlaq_f32(sum, sub_a, sub_b); + pta += batch_num; + ptb += batch_num; + } + + dis = vaddvq_f32(sum); + for (; i < dim; ++i) { + dis += ax[i] * bx[i]; + } + return dis; +} +#else + +VECTOR_TARGET_CLONES float VectorInnerProduct(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + distance += ax[i] * bx[i]; + } + + return distance; +} + +#endif + +/* + * Get the inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(inner_product); +Datum inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorInnerProduct(a->dim, a->x, b->x)); +} + +/* + * Get the negative inner product of two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_negative_inner_product); +Datum vector_negative_inner_product(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)-VectorInnerProduct(a->dim, a->x, b->x)); +} + +VECTOR_TARGET_CLONES static double VectorCosineSimilarity(int dim, float *ax, float *bx) +{ + float similarity = 0.0; + float norma = 0.0; + float normb = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + similarity += ax[i] * bx[i]; + norma += ax[i] * ax[i]; + normb += bx[i] * bx[i]; + } + + /* Use sqrt(a * b) over sqrt(a) * sqrt(b) */ + return static_cast(similarity) / sqrt(static_cast(norma) * static_cast(normb)); +} + +/* + * Get the cosine distance between two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(cosine_distance); +Datum cosine_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + double similarity; + + CheckDims(a, b); + + similarity = VectorCosineSimilarity(a->dim, a->x, b->x); +#ifdef _MSC_VER + /* /fp:fast may not propagate NaN */ + if (isnan(similarity)) { + PG_RETURN_FLOAT8(NAN); + } +#endif + + /* Keep in range */ + if (similarity > 1) { + similarity = 1.0; + } else if (similarity < -1) { + similarity = -1.0; + } + + PG_RETURN_FLOAT8(1.0 - similarity); +} + +/* + * Get the distance for spherical k-means + * Currently uses angular distance since needs to satisfy triangle inequality + * Assumes inputs are unit vectors (skips norm) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_spherical_distance); +Datum vector_spherical_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + double distance; + + CheckDims(a, b); + + distance = (double)VectorInnerProduct(a->dim, a->x, b->x); + /* Prevent NaN with acos with loss of precision */ + if (distance > 1) { + distance = 1; + } else if (distance < -1) { + distance = -1; + } + + PG_RETURN_FLOAT8(acos(distance) / M_PI); +} + +/* Does not require FMA, but keep logic simple */ +VECTOR_TARGET_CLONES static float VectorL1Distance(int dim, float *ax, float *bx) +{ + float distance = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < dim; i++) { + distance += fabsf(ax[i] - bx[i]); + } + + return distance; +} + +/* + * Get the L1 distance between two vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l1_distance); +Datum l1_distance(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + CheckDims(a, b); + + PG_RETURN_FLOAT8((double)VectorL1Distance(a->dim, a->x, b->x)); +} + +/* + * Get the dimensions of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_dims); +Datum vector_dims(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + + PG_RETURN_INT32(a->dim); +} + +/* + * Get the L2 norm of a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_norm); +Datum vector_norm(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0.0; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + norm += (double)ax[i] * (double)ax[i]; + } + + PG_RETURN_FLOAT8(sqrt(norm)); +} + +/* + * Normalize a vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_normalize); +Datum l2_normalize(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0; + Vector *result; + float *rx; + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) { + norm += (double)ax[i] * (double)ax[i]; + } + + norm = sqrt(norm); + /* Return zero vector for zero norm */ + if (norm > 0) { + for (int i = 0; i < a->dim; i++) { + rx[i] = ax[i] / norm; + } + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Add vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_add); +Datum vector_add(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] + bx[i]; + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Subtract vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_sub); +Datum vector_sub(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] - bx[i]; + } + + /* Check for overflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Multiply vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_mul); +Datum vector_mul(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + float *ax = a->x; + float *bx = b->x; + Vector *result; + float *rx; + + CheckDims(a, b); + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0, imax = a->dim; i < imax; i++) { + rx[i] = ax[i] * bx[i]; + } + + /* Check for overflow and underflow */ + for (int i = 0, imax = a->dim; i < imax; i++) { + if (isinf(rx[i])) { + float_overflow_error(); + } + + if (rx[i] == 0 && !(ax[i] == 0 || bx[i] == 0)) { + float_underflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + +/* + * Concatenate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_concat); +Datum vector_concat(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + Vector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitVector(dim); + + for (int i = 0; i < a->dim; i++) { + result->x[i] = a->x[i]; + } + + for (int i = 0; i < b->dim; i++) { + result->x[i + a->dim] = b->x[i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Quantize a vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(binary_quantize); +Datum binary_quantize(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + VarBit *result = InitBitVector(a->dim); + unsigned char *rx = VARBITS(result); + + for (int i = 0; i < a->dim; i++) { + rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8)); + } + + PG_RETURN_VARBIT_P(result); +} + +/* + * Get a subvector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(subvector); +Datum subvector(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + int32 start = PG_GETARG_INT32(1); + int32 count = PG_GETARG_INT32(2); + int32 end; + float *ax = a->x; + Vector *result; + int dim; + + if (count < 1) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + /* Indexing starts at 1, like substring */ + if (start < 1) { + ereport(WARNING, (errmsg("when the start position is less than 1, it will begin with the first dimension"))); + start = 1; + } else if (start > a->dim) { + ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("vector must have at least 1 dimension"))); + } + + /* + * Check if (start + count > a->dim), avoiding integer overflow. a->dim + * and count are both positive, so a->dim - count won't overflow. + */ + if (start > a->dim - count) { + end = a->dim + 1; + } else { + end = start + count; + } + + dim = end - start; + CheckDim(dim); + result = InitVector(dim); + + for (int i = 0; i < dim; i++) { + result->x[i] = ax[start - 1 + i]; + } + + PG_RETURN_POINTER(result); +} + +/* + * Internal helper to compare vectors + */ +int vector_cmp_internal(Vector *a, Vector *b) +{ + int dim = Min(a->dim, b->dim); + + /* Check values before dimensions to be consistent with Postgres arrays */ + for (int i = 0; i < dim; i++) { + if (a->x[i] < b->x[i]) { + return -1; + } + + if (a->x[i] > b->x[i]) { + return 1; + } + } + + if (a->dim < b->dim) { + return -1; + } + + if (a->dim > b->dim) { + return 1; + } + + return 0; +} + +/* + * Less than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_lt); +Datum vector_lt(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) < 0); +} + +/* + * Less than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_le); +Datum vector_le(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) <= 0); +} + +/* + * Equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_eq); +Datum vector_eq(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) == 0); +} + +/* + * Not equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ne); +Datum vector_ne(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) != 0); +} + +/* + * Greater than or equal + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_ge); +Datum vector_ge(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) >= 0); +} + +/* + * Greater than + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_gt); +Datum vector_gt(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_BOOL(vector_cmp_internal(a, b) > 0); +} + +/* + * Compare vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_cmp); +Datum vector_cmp(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + + PG_RETURN_INT32(vector_cmp_internal(a, b)); +} + +/* + * Accumulate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_accum); +Datum vector_accum(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + Vector *newval = PG_GETARG_VECTOR_P(1); + float8 *statevalues; + int16 dim; + bool newarr; + float8 n; + Datum *statedatums; + float *x = newval->x; + ArrayType *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_accum"); + dim = STATE_DIMS(statearray); + newarr = dim == 0; + + if (newarr) { + dim = newval->dim; + } else { + CheckExpectedDim(dim, newval->dim); + } + + n = statevalues[0] + 1.0; + + statedatums = (Datum *)CreateStateDatums(dim); + statedatums[0] = Float8GetDatum(n); + + if (newarr) { + for (int i = 0; i < dim; i++) { + statedatums[i + 1] = Float8GetDatum((double)x[i]); + } + } else { + for (int i = 0; i < dim; i++) { + double v = statevalues[i + 1] + x[i]; + + /* Check for overflow */ + if (isinf(v)) { + float_overflow_error(); + } + + statedatums[i + 1] = Float8GetDatum(v); + } + } + + /* Use float8 array like float4_accum */ + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Combine vectors or half vectors (also used for halfvec_combine) + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_combine); +Datum vector_combine(PG_FUNCTION_ARGS) +{ + /* Must also update parameters of halfvec_combine if modifying */ + ArrayType *statearray1 = PG_GETARG_ARRAYTYPE_P(0); + ArrayType *statearray2 = PG_GETARG_ARRAYTYPE_P(1); + float8 *statevalues1; + float8 *statevalues2; + float8 n; + float8 n1; + float8 n2; + int16 dim; + Datum *statedatums; + ArrayType *result; + + /* Check arrays before using */ + statevalues1 = CheckStateArray(statearray1, "vector_combine"); + statevalues2 = CheckStateArray(statearray2, "vector_combine"); + + n1 = statevalues1[0]; + n2 = statevalues2[0]; + + if (n1 == 0.0) { + n = n2; + dim = STATE_DIMS(statearray2); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues2[i]); + } else if (n2 == 0.0) { + n = n1; + dim = STATE_DIMS(statearray1); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) + statedatums[i] = Float8GetDatum(statevalues1[i]); + } else { + n = n1 + n2; + dim = STATE_DIMS(statearray1); + CheckExpectedDim(dim, STATE_DIMS(statearray2)); + statedatums = (Datum *)CreateStateDatums(dim); + for (int i = 1; i <= dim; i++) { + double v = statevalues1[i] + statevalues2[i]; + + /* Check for overflow */ + if (isinf(v)) + float_overflow_error(); + + statedatums[i] = Float8GetDatum(v); + } + } + + statedatums[0] = Float8GetDatum(n); + + result = construct_array(statedatums, dim + 1, FLOAT8OID, sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE); + + pfree(statedatums); + + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Average vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_avg); +Datum vector_avg(PG_FUNCTION_ARGS) +{ + ArrayType *statearray = PG_GETARG_ARRAYTYPE_P(0); + float8 *statevalues; + float8 n; + uint16 dim; + Vector *result; + + /* Check array before using */ + statevalues = CheckStateArray(statearray, "vector_avg"); + n = statevalues[0]; + + /* SQL defines AVG of no values to be NULL */ + if (n == 0.0) { + PG_RETURN_NULL(); + } + + /* Create vector */ + dim = STATE_DIMS(statearray); + CheckDim(dim); + result = InitVector(dim); + for (int i = 0; i < dim; i++) { + result->x[i] = statevalues[i + 1] / n; + CheckElement(result->x[i]); + } + + PG_RETURN_POINTER(result); +} + +/* + * Convert sparse vector to dense vector + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_vector); +Datum sparsevec_to_vector(PG_FUNCTION_ARGS) +{ + SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); + int32 typmod = PG_GETARG_INT32(1); + Vector *result; + int dim = svec->dim; + float *values = SPARSEVEC_VALUES(svec); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (int i = 0; i < svec->nnz; i++) { + result->x[svec->indices[i]] = values[i]; + } + + PG_RETURN_POINTER(result); +} + +#ifdef __aarch64__ +void VectorMadd(size_t n, const float *ax, float bf, const float *bx, float *cx) +{ + const size_t nSimd = n - (n & 3); + const float32x4_t bfv = vdupq_n_f32(bf); + size_t i; + + for (i = 0; i < nSimd; i += 4) { + const float32x4_t ai = vld1q_f32(ax + i); + const float32x4_t bi = vld1q_f32(bx + i); + const float32x4_t ci = vfmaq_f32(ai, bfv, bi); + vst1q_f32(cx + i, ci); + } + for (; i < n; ++i) { + cx[i] = ax[i] + bf * bx[i]; + } +} +#else +void VectorMadd(size_t n, const float *ax, float bf, const float *bx, float *cx) +{ + for (size_t i = 0; i < n; i++) { + cx[i] = ax[i] + bf * bx[i]; + } +} +#endif + +#ifdef __aarch64__ +struct ElementOpL2 { + static float32x4_t op(float32x4_t x, float32x4_t y) { + float32x4_t tmp = vsubq_f32(x, y); + return vmulq_f32(tmp, tmp); + } +}; + +struct ElementOpIP { + static float32x4_t op(float32x4_t x, float32x4_t y) { + return vmulq_f32(x, y); + } +}; + +template +void VectorOpNYD4(size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + float32x4_t x0 = vld1q_f32(x); + float *y; + __builtin_prefetch(pqTable, 0, 3); + + size_t i; + for (i = 0; i < ny; i++) { + y = DatumGetVector(pqTable + (offset + i) * subSize)->x; + float32x4_t accu = ElementOp::op(x0, vld1q_f32(y)); + dis[i] = vaddvq_f32(accu); + } +} + +template +void VectorOpNYD8(size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + /* neon support 128 bit, float is 32 bit, 4 float one batch */ + int batch = 4; + float32x4_t x0 = vld1q_f32(x); + float32x4_t x1 = vld1q_f32(x + batch); + float *y; + __builtin_prefetch(pqTable, 0, 3); + + size_t i; + for (i = 0; i < ny; i++) { + y = DatumGetVector(pqTable + (offset + i) * subSize)->x; + float32x4_t accu = ElementOp::op(x0, vld1q_f32(y)); + y += batch; + accu = vaddq_f32(accu, ElementOp::op(x1, vld1q_f32(y))); + dis[i] = vaddvq_f32(accu); + } +} + +template +void VectorOpNYD16(size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + /* neon support 128 bit, float is 32 bit, 4 float one batch */ + int batch = 4; + float32x4_t x0 = vld1q_f32(x); + float32x4_t x1 = vld1q_f32(x + batch); + float32x4_t x2 = vld1q_f32(x + batch * 2); + float32x4_t x3 = vld1q_f32(x + batch * 3); + float *y; + __builtin_prefetch(pqTable, 0, 3); + + size_t i; + for (i = 0; i < ny; i++) { + y = DatumGetVector(pqTable + (offset + i) * subSize)->x; + float32x4_t accu = ElementOp::op(x0, vld1q_f32(y)); + y += batch; + accu = vaddq_f32(accu, ElementOp::op(x1, vld1q_f32(y))); + y += batch; + accu = vaddq_f32(accu, ElementOp::op(x2, vld1q_f32(y))); + y += batch; + accu = vaddq_f32(accu, ElementOp::op(x3, vld1q_f32(y))); + dis[i] = vaddvq_f32(accu); + } +} +#endif + +void VectorL2SquaredDistanceNYRef(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + float *y; + for (size_t i = 0; i < ny; i++) { + y = DatumGetVector(pqTable + (offset + i) * subSize)->x; + dis[i] = VectorL2SquaredDistance(d, x, y); + } +} + +#ifdef __aarch64__ +void VectorL2SquaredDistanceNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ +#define DISPATCH(dval) \ + case dval: \ + VectorOpNYD##dval(ny, x, pqTable, subSize, offset, dis); \ + return; + + switch (d) { + DISPATCH(4) + DISPATCH(8) + DISPATCH(16) + default: + VectorL2SquaredDistanceNYRef(d, ny, x, pqTable, subSize, offset, dis); + return; + } +#undef DISPATCH +} +#else +void VectorL2SquaredDistanceNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + VectorL2SquaredDistanceNYRef(d, ny, x, pqTable, subSize, offset, dis); +} +#endif + +void VectorInnerProductNYRef(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + float *y; + for (size_t i = 0; i < ny; i++) { + y = DatumGetVector(pqTable + (offset + i) * subSize)->x; + dis[i] = VectorInnerProduct(d, x, y); + } +} + +#ifdef __aarch64__ +void VectorInnerProductNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ +#define DISPATCH(dval) \ + case dval: \ + VectorOpNYD##dval(ny, x, pqTable, subSize, offset, dis); \ + return; + + switch (d) { + DISPATCH(4) + DISPATCH(8) + DISPATCH(16) + default: + VectorInnerProductNYRef(d, ny, x, pqTable, subSize, offset, dis); + return; + } +#undef DISPATCH +} +#else +void VectorInnerProductNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis) +{ + VectorInnerProductNYRef(d, ny, x, pqTable, subSize, offset, dis); +} +#endif + +/* + * WAL-log a range of blocks in a relation. + * + * An image of all pages with block numbers 'startblk' <= X < 'endblk' is + * written to the WAL. If the range is large, this is done in multiple WAL + * records. + * + * If all page follows the standard page layout, with a PageHeader and unused + * space between pd_lower and pd_upper, set 'page_std' to true. That allows + * the unused space to be left out from the WAL records, making them smaller. + * + * NOTE: This function acquires exclusive-locks on the pages. Typically, this + * is used on a newly-built relation, and the caller is holding a + * AccessExclusiveLock on it, so no other backend can be accessing it at the + * same time. If that's not the case, you must ensure that this does not + * cause a deadlock through some other means. + */ +void LogNewpageRange(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std) +{ + int flags; + BlockNumber blkno; + + flags = REGBUF_FORCE_IMAGE; + if (page_std) { + flags |= REGBUF_STANDARD; + } + + /* + * Iterate over all the pages in the range. They are collected into + * batches of XLR_MAX_BLOCK_ID pages, and a single WAL-record is written + * for each batch. + */ + XLogEnsureRecordSpace(XLR_MAX_BLOCK_ID - 1, 0); + + blkno = startblk; + while (blkno < endblk) { + Buffer bufpack[XLR_MAX_BLOCK_ID]; + XLogRecPtr recptr; + int nbufs; + int i; + + CHECK_FOR_INTERRUPTS(); + + /* Collect a batch of blocks. */ + nbufs = 0; + while (nbufs < XLR_MAX_BLOCK_ID && blkno < endblk) { + Buffer buf = ReadBufferExtended(rel, forknum, blkno, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + /* + * Completely empty pages are not WAL-logged. Writing a WAL record + * would change the LSN, and we don't want that. We want the page + * to stay empty. + */ + if (!PageIsNew(BufferGetPage(buf))) { + bufpack[nbufs++] = buf; + } else { + UnlockReleaseBuffer(buf); + } + blkno++; + } + + /* Nothing more to do if all remaining blocks were empty. */ + if (nbufs == 0) { + break; + } + + /* Write WAL record for this batch. */ + XLogBeginInsert(); + + START_CRIT_SECTION(); + for (i = 0; i < nbufs; i++) { + MarkBufferDirty(bufpack[i]); + XLogRegisterBuffer(i, bufpack[i], flags); + } + + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI | XLOG_MERGE_RECORD); + + for (i = 0; i < nbufs; i++) { + PageSetLSN(BufferGetPage(bufpack[i]), recptr); + UnlockReleaseBuffer(bufpack[i]); + } + END_CRIT_SECTION(); + } +} + +int PlanCreateIndexWorkers(Relation heapRelation, IndexInfo *indexInfo) +{ + int parallelWorkers = RelationGetParallelWorkers(heapRelation, 0); + int maxHashbucketIndexWorker = 32; + + if (parallelWorkers != 0) { + parallelWorkers = Min(maxHashbucketIndexWorker, parallelWorkers); + } + + if (indexInfo->ii_Concurrent && indexInfo->ii_ParallelWorkers > 0) { + ereport(NOTICE, (errmsg("switch off parallel mode when concurrently flag is set"))); + parallelWorkers = 0; + } + + if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_GLOBAL_TEMP && indexInfo->ii_ParallelWorkers > 0) { + ereport(NOTICE, (errmsg("switch off parallel mode for global temp table"))); + parallelWorkers = 0; + } + + /* disable parallel building index for system table */ + if (IsCatalogRelation(heapRelation)) { + parallelWorkers = 0; + } + return parallelWorkers; +} diff --git a/src/common/backend/utils/cache/knl_globaltabdefcache.cpp b/src/common/backend/utils/cache/knl_globaltabdefcache.cpp index 51cc45db95669fed5d229fd42f6458d914e0b198..16996d64750d170511fd37a52501720c1851cdef 100644 --- a/src/common/backend/utils/cache/knl_globaltabdefcache.cpp +++ b/src/common/backend/utils/cache/knl_globaltabdefcache.cpp @@ -433,7 +433,7 @@ Relation CopyRelationData(Relation newrel, Relation rel, MemoryContext rules_cxt * otherwise, do the copy work here * if the variable changed, there is no lock and no rel inval msg, * set it zero and reinit it when copy into local */ - Assert(sizeof(RelationData) == 528); + Assert(sizeof(RelationData) == 544); /* all copied exclude pointer */ *newrel = *rel; Assert(rel->rd_createSubid == InvalidSubTransactionId); diff --git a/src/common/backend/utils/cache/relcache.cpp b/src/common/backend/utils/cache/relcache.cpp index 1cb5201fa1913ddb51672c9977378e23ce8b9ee6..130cc24ebc1745a71031d74d1a8aa9b58011e0d4 100755 --- a/src/common/backend/utils/cache/relcache.cpp +++ b/src/common/backend/utils/cache/relcache.cpp @@ -2914,6 +2914,8 @@ void RelationInitIndexAccessInfo(Relation relation, HeapTuple index_tuple) } else { relation->rd_ind_partition_all_usable = true; /* trivial for non-partitioned index */ } + relation->pqTable = NULL; + relation->pqDistanceTable = NULL; } /* diff --git a/src/common/backend/utils/misc/guc/guc_sql.cpp b/src/common/backend/utils/misc/guc/guc_sql.cpp index cf4f2fb7d286c640f8ed0348cc39f355199cab1d..a6e886815055eaeed6366b931d4cd6c0324c6475 100755 --- a/src/common/backend/utils/misc/guc/guc_sql.cpp +++ b/src/common/backend/utils/misc/guc/guc_sql.cpp @@ -148,6 +148,8 @@ #include "utils/xml.h" #include "workload/cpwlm.h" #include "workload/workload.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/ivfflat.h" #include "utils/guc_sql.h" #define DEFAULT_USTATS_TRACKER_NAPTIME 20 @@ -2540,6 +2542,58 @@ static void InitSqlConfigureNamesInt() NULL, NULL, NULL}, + {{"hnsw_ef_search", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Sets the size of the dynamic candidate list for search"), + gettext_noop("Valid range is 1..1000.")}, + &u_sess->datavec_ctx.hnsw_ef_search, + HNSW_DEFAULT_EF_SEARCH, + HNSW_MIN_EF_SEARCH, + HNSW_MAX_EF_SEARCH, + NULL, + NULL, + NULL}, + {{"hnsw_earlystop_threshold", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Set the maximum number of iterations in the HnswSearchLayer loop within the earlystop strategy."), + gettext_noop("Valid range is 160..INT32_MAX-1, INT32_MAX means disable the earlystop strategy.")}, + &u_sess->datavec_ctx.hnsw_earlystop_threshold, + HNSW_DEFAULT_THRESHOLD, + HNSW_MIN_THRESHOLD, + HNSW_MAX_THRESHOLD, + NULL, + NULL, + NULL}, + {{"ivfflat_probes", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Sets the number of probes"), + gettext_noop("Valid range is 1..lists."),}, + &u_sess->datavec_ctx.ivfflat_probes, + IVFFLAT_DEFAULT_PROBES, + IVFFLAT_MIN_LISTS, + IVFFLAT_MAX_LISTS, + NULL, + NULL, + NULL}, + {{"ivfpq_kreorder", + PGC_USERSET, + NODE_ALL, + QUERY_TUNING_OTHER, + gettext_noop("Sets the number of samples that need to be reordered after IVFPQ."), + NULL}, + &u_sess->datavec_ctx.ivfpq_kreorder, + 0, + 0, + INT_MAX, + NULL, + NULL, + NULL}, #endif /* End-of-list marker */ {{NULL, diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index 7ae47e37ecce044313d681ee3c1241ee9526a8bf..9ff467fb21d1072dc49a1a132190c0e6de6f3950 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -1404,6 +1404,17 @@ static void InitStorageConfigureNamesBool() NULL, NULL, NULL}, + {{"enable_pq", + PGC_POSTMASTER, + NODE_SINGLENODE, + QUERY_TUNING_OTHER, + gettext_noop("Whether enable pq in datavec"), + NULL}, + &g_instance.attr.attr_storage.enable_pq, + false, + NULL, + NULL, + NULL}, /* End-of-list marker */ {{"handle_toast_in_autovac", PGC_SIGHUP, diff --git a/src/gausskernel/CMakeLists.txt b/src/gausskernel/CMakeLists.txt index 0d1aefda1c2aba51695afeef989e295583c1449f..5f3eb2b6d47c2fd30355310b19c5f0b101ba88df 100755 --- a/src/gausskernel/CMakeLists.txt +++ b/src/gausskernel/CMakeLists.txt @@ -213,6 +213,7 @@ list(APPEND gaussdb_objects $ $ $ + $ $ $ $ diff --git a/src/gausskernel/optimizer/commands/amcmds.cpp b/src/gausskernel/optimizer/commands/amcmds.cpp index fb71252b9dbdd43aa22d87978cb7cf785674c741..c3783157b044d6168ef69ed7b24fa21c22893f9d 100644 --- a/src/gausskernel/optimizer/commands/amcmds.cpp +++ b/src/gausskernel/optimizer/commands/amcmds.cpp @@ -135,11 +135,15 @@ ObjectAddress CreateAccessMethod(CreateAmStmt *stmt) FILL_ANUM_PG_AM_REGPROC_VAULE(Anum_pg_am_amcanreturn, amRoutine->amcanreturnfuncname); FILL_ANUM_PG_AM_REGPROC_VAULE(Anum_pg_am_amcostestimate, amRoutine->amcostestimatefuncname); FILL_ANUM_PG_AM_REGPROC_VAULE(Anum_pg_am_amoptions, amRoutine->amoptionsfuncname); + FILL_ANUM_PG_AM_REGPROC_VAULE(Anum_pg_am_amdelete, amRoutine->amdeletefuncname); values[Anum_pg_am_amhandler - 1] = ObjectIdGetDatum(amHandler); tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + if (strcmp(stmt->amname, "hnsw") == 0) { + HeapTupleSetOid(tup, HNSW_AM_OID); + } amOid = simple_heap_insert(rel, tup); CatalogUpdateIndexes(rel, tup); heap_freetuple(tup); @@ -184,7 +188,7 @@ void RemoveAccessMethodById(Oid amOid) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to drop an access method."))); - if (IsSystemObjOid(amOid)) + if (IsSystemObjOid(amOid) && !u_sess->attr.attr_common.IsInplaceUpgrade) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("amOid %u is a builtin access method, it can not be droped", amOid))); @@ -250,6 +254,10 @@ static Oid lookup_regproc_am_handler_func(int16 procIndex, IndexAmRoutine *amRou nargs = PG_AM_ENDSCAN_ARGS_NUM; funcName = amRoutine->amendscanfuncname; break; + case Anum_pg_am_amdelete: + nargs = PG_AM_DELETE_ARGS_NUM; + funcName = amRoutine->amdeletefuncname; + break; case Anum_pg_am_ambuild: nargs = PG_AM_BUILD_ARGS_NUM; funcName = amRoutine->ambuildfuncname; diff --git a/src/gausskernel/optimizer/commands/dropcmds.cpp b/src/gausskernel/optimizer/commands/dropcmds.cpp index f68c705630fa57cf4a64a7322394bd6b8e45b67e..cbe0904634cd0729cfd267c521e9416ed1b610bd 100644 --- a/src/gausskernel/optimizer/commands/dropcmds.cpp +++ b/src/gausskernel/optimizer/commands/dropcmds.cpp @@ -80,6 +80,7 @@ static void DropExtensionInListIsSupported(List* objname) "packages", "ndpplugin", "datavec", + "chparser", #ifndef ENABLE_MULTIPLE_NODES "mysql_fdw", "oracle_fdw", diff --git a/src/gausskernel/optimizer/commands/explain.cpp b/src/gausskernel/optimizer/commands/explain.cpp index 8a801e91760539b89f5c6152295778c6d9ebe36b..54963edb289def0e01d679070d84877577e29206 100755 --- a/src/gausskernel/optimizer/commands/explain.cpp +++ b/src/gausskernel/optimizer/commands/explain.cpp @@ -264,7 +264,7 @@ static bool show_scan_distributekey(const Plan* plan) { return ( IsA(plan, CStoreScan) || IsA(plan, CStoreIndexScan) || IsA(plan, CStoreIndexHeapScan) || IsA(plan, SeqScan) || - IsA(plan, IndexScan) || IsA(plan, IndexOnlyScan) || IsA(plan, CteScan) || + IsA(plan, IndexScan) || IsA(plan, IndexOnlyScan) || IsA(plan, CteScan) || IsA(plan, AnnIndexScan) || IsA(plan, ForeignScan) || IsA(plan, VecForeignScan) || IsA(plan, BitmapHeapScan) || IsA(plan, TsStoreScan) ); } @@ -1929,6 +1929,12 @@ static void ExplainNodePartition(const Plan* plan, ExplainState* es) flag = 1; } break; + case T_AnnIndexScan: + if (((AnnIndexScan*)plan->lefttree)->scan.pruningInfo->expr != NULL) { + appendStringInfo(es->str, "Iterations: %s", "PART"); + flag = 1; + } + break; #ifdef ENABLE_MULTIPLE_NODES case T_TsStoreScan: if (((TsStoreScan*)plan->lefttree)->pruningInfo->expr != NULL) { @@ -1956,7 +1962,8 @@ static bool GetSubPartitionIterations(const Plan* plan, const ExplainState* es, RowToVec* rowToVecPlan = (RowToVec*)curPlan->lefttree; Plan* scanPlan = (Plan*)rowToVecPlan->plan.lefttree; if (!(IsA(scanPlan, Scan) || IsA(scanPlan, SeqScan) || IsA(scanPlan, IndexOnlyScan) || - IsA(scanPlan, IndexScan) || IsA(scanPlan, BitmapHeapScan) || IsA(scanPlan, TidScan))) { + IsA(scanPlan, IndexScan) || IsA(scanPlan, BitmapHeapScan) || IsA(scanPlan, TidScan) || + IsA(scanPlan, AnnIndexScan))) { break; } curPlan = &rowToVecPlan->plan; @@ -1971,6 +1978,7 @@ static bool GetSubPartitionIterations(const Plan* plan, const ExplainState* es, #endif case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapIndexScan: case T_BitmapHeapScan: case T_CStoreScan: @@ -2239,6 +2247,15 @@ static void ExplainNode( pt_index_name = explain_get_index_name(indexonlyscan->indexid); pt_index_owner = get_namespace_name(get_rel_namespace(indexonlyscan->indexid)); } break; + case T_AnnIndexScan: { + AnnIndexScan* annindexscan = (AnnIndexScan*)plan; + + ExplainIndexScanDetails(annindexscan->indexid, annindexscan->indexorderdir, es); + ExplainScanTarget((Scan*)annindexscan, es); + + pt_index_name = explain_get_index_name(annindexscan->indexid); + pt_index_owner = get_namespace_name(get_rel_namespace(annindexscan->indexid)); + } break; case T_BitmapIndexScan: { BitmapIndexScan* bitmapindexscan = (BitmapIndexScan*)plan; const char* indexname = explain_get_index_name(bitmapindexscan->indexid); @@ -2666,6 +2683,15 @@ static void ExplainNode( if (es->analyze) ExplainPropertyLong("Heap Fetches", ((IndexOnlyScanState*)planstate)->ioss_HeapFetches, es); break; + case T_AnnIndexScan: + show_scan_qual(((AnnIndexScan*)plan)->indexqualorig, "Index Cond", planstate, ancestors, es); + if (((AnnIndexScan*)plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, planstate, es); + show_scan_qual(((AnnIndexScan*)plan)->indexorderbyorig, "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan*)plan)->indexqualorig, "Index Cond", planstate, ancestors, es); break; @@ -2681,7 +2707,7 @@ static void ExplainNode( if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); show_llvm_info(planstate, es); - break; + break; #ifdef PGXC case T_ModifyTable: case T_VecModifyTable: { @@ -3183,6 +3209,7 @@ static void ExplainNode( #endif case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_BitmapIndexScan: case T_CStoreIndexScan: @@ -8327,7 +8354,7 @@ static const char* explain_get_index_name(Oid indexId) } /* - * Add some additional details about an IndexScan or IndexOnlyScan + * Add some additional details about an IndexScan, IndexOnlyScan or AnnIndexScan */ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, ExplainState* es) { @@ -8446,6 +8473,7 @@ static void ExplainTargetRel(Plan* plan, Index rti, ExplainState* es, bool multi #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_CStoreIndexScan: case T_CStoreIndexCtidScan: diff --git a/src/gausskernel/optimizer/commands/indexcmds.cpp b/src/gausskernel/optimizer/commands/indexcmds.cpp index a7c6bde9675ae3754a7bb820baccc46e7187df22..4211158a4af9e295d18b06f578164e2ec5de9e1d 100644 --- a/src/gausskernel/optimizer/commands/indexcmds.cpp +++ b/src/gausskernel/optimizer/commands/indexcmds.cpp @@ -833,7 +833,7 @@ ObjectAddress DefineIndex(Oid relationId, IndexStmt* stmt, Oid indexRelationId, if (strcmp(stmt->accessMethod, "btree") == 0) { elog(ERROR, "btree index is not supported for ustore, please use ubtree instead"); } - if (strcmp(stmt->accessMethod, "ubtree") != 0) { + if (strcmp(stmt->accessMethod, "ubtree") != 0 && strcmp(stmt->accessMethod, "hnsw") != 0) { elog(ERROR, "%s index is not supported for ustore", (stmt->accessMethod)); } if (has_dedup_opt) { diff --git a/src/gausskernel/optimizer/gplanmgr/gplanmgr.cpp b/src/gausskernel/optimizer/gplanmgr/gplanmgr.cpp index ee848707792a29840d47053c31685d7ca61bffa9..d9775eab9831bcd4d92dd3cbdf2487fbfa7bcdd0 100644 --- a/src/gausskernel/optimizer/gplanmgr/gplanmgr.cpp +++ b/src/gausskernel/optimizer/gplanmgr/gplanmgr.cpp @@ -1591,6 +1591,20 @@ ExtractPlanIndexesUsages(Node *plan, void *context) return false; } + if (IsA(plan, AnnIndexScan)) { + AnnIndexScan *idxScan = (AnnIndexScan *)plan; + + PlanIndexUasge *index = MakePlanIndexUasge(idxScan->scan.scanrelid, + idxScan->indexid, + idxScan->indexqualorig, + idxScan->is_partial, + idxScan->selectivity); + + idxCxt->usage_list = lappend(idxCxt->usage_list, index); + + return false; + } + if (IsA(plan, NestLoop)) { NestLoop *nl = (NestLoop *)plan; List *tmpList = (List *)copyObject(nl->nestParams); diff --git a/src/gausskernel/optimizer/path/allpaths.cpp b/src/gausskernel/optimizer/path/allpaths.cpp index 41f8e196a8243858a473966c2e1689690109932e..34f8b091bf0ac35174325cad92af05a1a31c2b44 100755 --- a/src/gausskernel/optimizer/path/allpaths.cpp +++ b/src/gausskernel/optimizer/path/allpaths.cpp @@ -4107,7 +4107,8 @@ static Path* create_partiterator_path(PlannerInfo* root, RelOptInfo* rel, Path* case T_BitmapHeapScan: case T_TidScan: case T_IndexScan: - case T_IndexOnlyScan: { + case T_IndexOnlyScan: + case T_AnnIndexScan: { PartIteratorPath* itrpath = makeNode(PartIteratorPath); itrpath->subPath = path; @@ -4127,7 +4128,10 @@ static Path* create_partiterator_path(PlannerInfo* root, RelOptInfo* rel, Path* /* scan parttition from lower boundary to upper boundary by default */ itrpath->direction = ForwardScanDirection; - if (NIL != path->pathkeys && (T_IndexOnlyScan == path->pathtype || T_IndexScan == path->pathtype)) { + if (NIL != path->pathkeys && + (T_IndexOnlyScan == path->pathtype || + T_IndexScan == path->pathtype || + T_AnnIndexScan == path->pathtype)) { /* try to inherit pathkeys from IndexPath/IndexOnlyScan since only */ make_partiterator_pathkey(root, rel, relation, itrpath, path->pathkeys); } diff --git a/src/gausskernel/optimizer/path/costsize.cpp b/src/gausskernel/optimizer/path/costsize.cpp index e1edaba99fcdc18ae0fbfebebc2c1a5f863f8a60..1a53ecb044bbb4cf54c4ac1b5c8848a27bf79a33 100755 --- a/src/gausskernel/optimizer/path/costsize.cpp +++ b/src/gausskernel/optimizer/path/costsize.cpp @@ -959,6 +959,44 @@ static bool enable_parametrized_path(PlannerInfo* root, RelOptInfo* baserel, Pat #define HEAP_PAGES_FETCHED(isUstore, pages_fetched, allvisfrac) \ (isUstore) ? 0.0 : ceil((pages_fetched) * (1.0 - (allvisfrac))) +// Recursively extract filter conditions +static void extract_conditions(Node* node, List** conditions) +{ + if (node == NULL) { + return; + } + + if (IsA(node, List)) { + // If it's a List, traverse the linked list + List* list = (List*)node; + ListCell* lc; + foreach(lc, list) { + extract_conditions((Node*)lfirst(lc), conditions); + } + } else if (IsA(node, BoolExpr)) { + // Handle boolean expressions (AND/OR/NOT) + BoolExpr* expr = (BoolExpr*)node; + ListCell* lc; + foreach(lc, expr->args) { + extract_conditions((Node*)lfirst(lc), conditions); + } + } else if (IsA(node, OpExpr) || IsA(node, ScalarArrayOpExpr) || IsA(node, NullTest) || IsA(node, BooleanTest)) { + // Handle basic conditions (e.g., a > 100, a IS NULL, etc.) + *conditions = lappend(*conditions, node); + } +} + +// Extract WHERE clause conditions +List* extract_where_conditions(PlannerInfo* root) +{ + List* conditions = NIL; + Query* parse = root->parse; + + if (parse->jointree != NULL && parse->jointree->quals != NULL) { + extract_conditions(parse->jointree->quals, &conditions); + } + return conditions; +} /* * cost_index * Determines and returns the cost of scanning a relation using an index. @@ -998,6 +1036,46 @@ void cost_index(IndexPath* path, PlannerInfo* root, double loop_count) double pages_fetched; bool ispartitionedindex = path->indexinfo->rel->isPartitionedTable; bool disable_path = false; + int dop = SET_DOP(path->path.dop); + bool isAnnIndex = index->isAnnIndex; + // Calculate selectivity + Selectivity total_sel = 1.0; + ListCell* lc; + // Extract LIMIT value + Query* parse = root->parse; + Node* limitNode = parse->limitCount; + int64 limitValue = 0; + Cost annIndexCost = 0; + List* where_conditions = extract_where_conditions(root); + path->annCount = 0; + if (isAnnIndex && index->relam ==HNSW_AM_OID) { + foreach(lc, where_conditions) { + Node* clause = (Node*)lfirst(lc); + Selectivity sel = clause_selectivity(root, clause, 0, JOIN_INNER, NULL); + total_sel *= sel; + } + if (limitNode != NULL) { + // Check if it's a constant + if (IsA(limitNode, Const)) { + Const* constNode = (Const*)limitNode; + limitValue = DatumGetInt64(constNode->constvalue); + } else { + annIndexCost = g_instance.cost_cxt.disable_cost; + } + if (total_sel > 0) { + annIndexCost = (limitValue / total_sel) / ANN_INDEX_COST; + } + } else { + annIndexCost = g_instance.cost_cxt.disable_cost; + limitValue = baserel->tuples; + } + if (total_sel > 0) { + path->annCount = limitValue / total_sel; + } + if (path->annCount > baserel->tuples) { + annIndexCost = g_instance.cost_cxt.disable_cost; + } + } if (enable_parametrized_path(root, baserel, (Path*)path) || (!u_sess->attr.attr_sql.enable_indexscan && !indexonly) || (!u_sess->attr.attr_sql.enable_indexonlyscan && indexonly)) { @@ -1057,7 +1135,9 @@ void cost_index(IndexPath* path, PlannerInfo* root, double loop_count) /* all costs for touching index itself included here */ startup_cost += indexStartupCost; run_cost += indexTotalCost - indexStartupCost; - + if (isAnnIndex) { + run_cost += annIndexCost; + } /* estimate number of main-table tuples fetched */ tuples_fetched = clamp_row_est(indexSelectivity * RELOPTINFO_LOCAL_FIELD(root, baserel, tuples)); @@ -5147,6 +5227,7 @@ bool has_indexed_join_quals(NestPath* joinpath) switch (innerpath->pathtype) { case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: indexclauses = ((IndexPath*)innerpath)->indexclauses; break; case T_BitmapHeapScan: { diff --git a/src/gausskernel/optimizer/path/indxpath.cpp b/src/gausskernel/optimizer/path/indxpath.cpp index d1646ac7f0866bb55e8a12754b77fb076e6b4df1..ae129e3d0bf02df7fc84e85f2c6e7389ddbd26aa 100755 --- a/src/gausskernel/optimizer/path/indxpath.cpp +++ b/src/gausskernel/optimizer/path/indxpath.cpp @@ -21,6 +21,7 @@ #include "access/skey.h" #include "access/sysattr.h" +#include "access/multi_redo_api.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "catalog/pg_collation.h" @@ -915,6 +916,17 @@ static List* build_index_paths(PlannerInfo* root, RelOptInfo* rel, IndexOptInfo* bool index_only_scan = false; int indexcol; + bool can_parallel = IS_STREAM_PLAN && (u_sess->opt_cxt.query_dop > 1) && (ST_BITMAPSCAN != scantype) && + (!rel->isPartitionedTable); + + if (index->isAnnIndex && IsExtremeRedo()) { + if (ST_BITMAPSCAN != scantype) { + ereport(NOTICE, (errmsg("Ann Index does not support extreme RTO"), + errhint("This will show as Seq Scan"))); + } + return NIL; + } + /* * Check that index supports the desired scan type(s) */ diff --git a/src/gausskernel/optimizer/plan/createplan.cpp b/src/gausskernel/optimizer/plan/createplan.cpp index 8b6d862ecb45ff7f3e1bd82f7b4330213e626440..61b38f27e82e2252184c06229b939428f2afa241 100755 --- a/src/gausskernel/optimizer/plan/createplan.cpp +++ b/src/gausskernel/optimizer/plan/createplan.cpp @@ -162,6 +162,8 @@ static CStoreIndexHeapScan* make_cstoreindex_heapscan(PlannerInfo* root, Path* b Plan* lefttree, List* bitmapqualorig, Index scanrelid); static CStoreIndexAnd* make_cstoreindex_and(List* ctidplans); static CStoreIndexOr* make_cstoreindex_or(List* ctidplans); +static AnnIndexScan* make_annindexscan(List* qptlist, List* qpqual, Index scanrelid, Oid indexid, List* indexqual, + List* indexqualorig, List* indexorderby, List* indexorderbyorig, ScanDirection indexscandir, double indexselectivity, bool is_partial,double annCount); static TidScan* make_tidscan(List* qptlist, List* qpqual, Index scanrelid, List* tidquals); static FunctionScan* make_functionscan(List* qptlist, List* qpqual, Index scanrelid, Node* funcexpr, List* funccolnames, List* funccoltypes, List* funccoltypmods, List* funccolcollations); @@ -359,6 +361,7 @@ static Plan* create_plan_recurse(PlannerInfo* root, Path* best_path) #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_SeqScan: case T_BitmapHeapScan: case T_TidScan: @@ -688,6 +691,7 @@ static Plan* create_scan_plan(PlannerInfo* root, Path* best_path) break; #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: + case T_AnnIndexScan: if (SUBQUERY_IS_PARAM(root) && PATH_REQ_UPPER(best_path) != NULL) { scan_clauses = list_concat(scan_clauses, rel->subplanrestrictinfo); } @@ -787,7 +791,7 @@ static bool IsScanPath(NodeTag type) { return ( type == T_CStoreScan || type == T_CStoreIndexScan || type == T_CStoreIndexHeapScan || type == T_SeqScan || - type == T_IndexScan || type == T_IndexOnlyScan || type == T_BitmapHeapScan + type == T_IndexScan || type == T_IndexOnlyScan || type == T_BitmapHeapScan || type == T_AnnIndexScan ); } @@ -1044,6 +1048,7 @@ void disuse_physical_tlist(Plan* plan, Path* path) case T_SeqScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_CStoreIndexHeapScan: case T_TidScan: @@ -1074,6 +1079,7 @@ void disuse_physical_tlist(Plan* plan, Path* path) case T_SeqScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_CStoreIndexHeapScan: case T_TidScan: { @@ -2650,18 +2656,34 @@ static Scan* create_indexscan_plan( indexselectivity, (best_path->indexinfo->indpred != NIL)); } else { - scan_plan = (Scan*)make_indexscan(tlist, - qpqual, - baserelid, - indexoid, - fixed_indexquals, - stripped_indexquals, - fixed_indexorderbys, - indexorderbys, - best_path->indexscandir, - indexselectivity, - (best_path->indexinfo->indpred != NIL)); - ((IndexScan*)scan_plan)->is_ustore = best_path->is_ustore; + if (best_path->isAnnIndex) { + scan_plan = (Scan*)make_annindexscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + best_path->indexscandir, + indexselectivity, + (best_path->indexinfo->indpred != NIL), + best_path->annCount); + ((AnnIndexScan*)scan_plan)->is_ustore = best_path->is_ustore; + } else { + scan_plan = (Scan*)make_indexscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + best_path->indexscandir, + indexselectivity, + (best_path->indexinfo->indpred != NIL)); + ((IndexScan*)scan_plan)->is_ustore = best_path->is_ustore; + } } } @@ -3358,6 +3380,7 @@ static void ModifyWorktableWtParam(Node* planNode, int oldWtParam, int newWtPara #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_SeqScan: case T_ForeignScan: case T_ExtensiblePlan: @@ -5959,6 +5982,31 @@ static CStoreIndexHeapScan* make_cstoreindex_heapscan(PlannerInfo* root, Path* b return node; } +static AnnIndexScan* make_annindexscan(List* qptlist, List* qpqual, Index scanrelid, Oid indexid, List* indexqual, + List* indexqualorig, List* indexorderby, List* indexorderbyorig, ScanDirection indexscandir, double indexselectivity, bool is_partial, double annCount) +{ + AnnIndexScan* node = makeNode(AnnIndexScan); + Plan* plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->annCount = annCount; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; + node->indexorderdir = indexscandir; + node->selectivity = indexselectivity; + node->is_partial = is_partial; + return node; +} + + static TidScan* make_tidscan(List* qptlist, List* qpqual, Index scanrelid, List* tidquals) { TidScan* node = makeNode(TidScan); @@ -9448,6 +9496,7 @@ bool is_projection_capable_plan(Plan* plan) #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_TidScan: case T_CStoreIndexScan: @@ -9478,7 +9527,7 @@ bool IsPlanForPartitionScan(Plan* plan) return false; } if (IsA(plan, SeqScan) || IsA(plan, IndexScan) || IsA(plan, IndexOnlyScan) || IsA(plan, BitmapHeapScan) || - IsA(plan, BitmapIndexScan) || IsA(plan, TidScan) || IsA(plan, VecToRow)) { + IsA(plan, BitmapIndexScan) || IsA(plan, TidScan) || IsA(plan, VecToRow) || IsA(plan, AnnIndexScan)) { return true; } if (IsA(plan, CStoreScan) || IsA(plan, CStoreIndexScan) || IsA(plan, CStoreIndexCtidScan) || @@ -9531,6 +9580,7 @@ static Plan* setPartitionParam(PlannerInfo* root, Plan* plan, RelOptInfo* rel) #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_BitmapIndexScan: case T_TidScan: @@ -9572,6 +9622,7 @@ static Plan* setBucketInfoParam(PlannerInfo* root, Plan* plan, RelOptInfo* rel) case T_CStoreScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexOnlyScan: case T_BitmapHeapScan: case T_BitmapIndexScan: case T_TidScan: diff --git a/src/gausskernel/optimizer/plan/planagg.cpp b/src/gausskernel/optimizer/plan/planagg.cpp index 47fb4bff3eed8a32c8a87e207b29c4f8e9ca46ba..4cfa9741f24e64fe1b54b529a3bab949b2252674 100644 --- a/src/gausskernel/optimizer/plan/planagg.cpp +++ b/src/gausskernel/optimizer/plan/planagg.cpp @@ -461,7 +461,8 @@ static void get_pathkeys_for_partiteratorpath(RelOptInfo *final_rel, Expr* mminf } PartIteratorPath *itrpath = (PartIteratorPath *)path; if (itrpath->path.pathkeys == NULL && - (itrpath->subPath->pathtype == T_IndexOnlyScan || itrpath->subPath->pathtype == T_IndexScan)) { + (itrpath->subPath->pathtype == T_IndexOnlyScan || itrpath->subPath->pathtype == T_IndexScan || + itrpath->subPath->pathtype == T_AnnIndexScan)) { IndexPath *indexPath = (IndexPath *)itrpath->subPath; // only supprot btree index. if (!OID_IS_BTREE(indexPath->indexinfo->relam)) { diff --git a/src/gausskernel/optimizer/plan/planner.cpp b/src/gausskernel/optimizer/plan/planner.cpp index eac0ad1ed02192437c9ef2880309f0ff96db4cae..033992f3c2bc725cf1cf58308df55ee927003319 100755 --- a/src/gausskernel/optimizer/plan/planner.cpp +++ b/src/gausskernel/optimizer/plan/planner.cpp @@ -9915,6 +9915,7 @@ static bool vector_engine_walker_internal(Plan* result_plan, bool check_rescan, } case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_TidScan: case T_FunctionScan: { @@ -10363,6 +10364,7 @@ Plan* vectorize_plan(Plan* result_plan, bool ignore_remotequery, bool forceVecto } case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_TidScan: case T_FunctionScan: { diff --git a/src/gausskernel/optimizer/plan/setrefs.cpp b/src/gausskernel/optimizer/plan/setrefs.cpp index ec5db1b956d047e662a47f96b9188e14efb035b6..41450a632abec220f749b639602e8bd835c28b23 100644 --- a/src/gausskernel/optimizer/plan/setrefs.cpp +++ b/src/gausskernel/optimizer/plan/setrefs.cpp @@ -358,6 +358,20 @@ static Plan* set_plan_refs(PlannerInfo* root, Plan* plan, int rtoffset) } return set_indexonlyscan_references(root, splan, rtoffset); } break; + case T_AnnIndexScan: { + AnnIndexScan* splan = (AnnIndexScan*)plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + if (splan->scan.plan.distributed_keys != NIL) { + splan->scan.plan.distributed_keys = fix_scan_list(root, splan->scan.plan.distributed_keys, rtoffset); + } + splan->scan.plan.qual = fix_scan_list(root, splan->scan.plan.qual, rtoffset); + splan->indexqual = fix_scan_list(root, splan->indexqual, rtoffset); + splan->indexqualorig = fix_scan_list(root, splan->indexqualorig, rtoffset); + splan->indexorderby = fix_scan_list(root, splan->indexorderby, rtoffset); + splan->indexorderbyorig = fix_scan_list(root, splan->indexorderbyorig, rtoffset); + } break; case T_CStoreIndexScan: { CStoreIndexScan* splan = (CStoreIndexScan*)plan; @@ -537,6 +551,7 @@ static Plan* set_plan_refs(PlannerInfo* root, Plan* plan, int rtoffset) #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_TidScan: case T_CStoreIndexScan: diff --git a/src/gausskernel/optimizer/plan/stream_remove.cpp b/src/gausskernel/optimizer/plan/stream_remove.cpp index 59c7a1d66b2279868e8cdbf1b8ec00e1bfe49dd9..e135ca4b47173d0d3bcac26d083eb6db255c0b76 100644 --- a/src/gausskernel/optimizer/plan/stream_remove.cpp +++ b/src/gausskernel/optimizer/plan/stream_remove.cpp @@ -178,6 +178,9 @@ static List *fetch_qual_from_scan(const Scan *scan) case T_IndexOnlyScan: { return ((IndexOnlyScan *)scan)->indexqual; } + case T_AnnIndexScan: { + return ((AnnIndexScan *)scan)->indexqual; + } default: { return NULL; } @@ -260,7 +263,8 @@ static bool is_stream_limit_plan(const Stream *stream) case T_SeqScan: case T_IndexScan: case T_BitmapIndexScan: - case T_IndexOnlyScan: { + case T_IndexOnlyScan: + case T_AnnIndexScan: { return is_select_const_with_distribute_qual_plan((Scan *)lefttree); } default: @@ -285,7 +289,8 @@ static bool is_stream_plan(const Stream *stream) case T_SeqScan: case T_IndexScan: case T_BitmapIndexScan: - case T_IndexOnlyScan: { + case T_IndexOnlyScan: + case T_AnnIndexScan: { return is_select_const_with_distribute_qual_plan((Scan *)lefttree); } default: diff --git a/src/gausskernel/optimizer/plan/streamplan.cpp b/src/gausskernel/optimizer/plan/streamplan.cpp index 354819530e054120fb011123323144eb8bc52233..753769e0ed28ff4f4fc472b70f318b069127b7ff 100644 --- a/src/gausskernel/optimizer/plan/streamplan.cpp +++ b/src/gausskernel/optimizer/plan/streamplan.cpp @@ -48,7 +48,8 @@ static int g_support_hashfilter_types[] = { T_SubqueryScan, T_BitmapHeapScan, T_CStoreIndexHeapScan, - T_CteScan + T_CteScan, + T_AnnIndexScan }; /* diff --git a/src/gausskernel/optimizer/plan/streamplan_utils.cpp b/src/gausskernel/optimizer/plan/streamplan_utils.cpp index 3780cf127fd823fb1831b8f9876ab3f701ddf0fe..15d38eb156b15129490ce7f4c6158f6113833567 100755 --- a/src/gausskernel/optimizer/plan/streamplan_utils.cpp +++ b/src/gausskernel/optimizer/plan/streamplan_utils.cpp @@ -89,6 +89,11 @@ List* check_op_list_template(Plan* result_plan, List* (*check_eval)(Node*)) res_list = list_concat_unique(res_list, check_eval((Node*)splan->indexqual)); } break; + case T_AnnIndexScan: { + AnnIndexScan* splan = (AnnIndexScan*)result_plan; + + res_list = list_concat_unique(res_list, check_eval((Node*)splan->indexqual)); + } break; case T_CStoreIndexScan: { CStoreIndexScan* splan = (CStoreIndexScan*)result_plan; @@ -360,6 +365,7 @@ void stream_path_walker(Path* path, ContainStreamContext* context) case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: { IndexPath* indexPath = (IndexPath*)path; if (g_instance.attr.attr_storage.enable_delta_store && diff --git a/src/gausskernel/optimizer/plan/subselect.cpp b/src/gausskernel/optimizer/plan/subselect.cpp index b5245fbbd12fb6ff2591024f570c31ae786466a6..dd6d3d257597fef920fbce5ffb9c9c50d83dd8bb 100644 --- a/src/gausskernel/optimizer/plan/subselect.cpp +++ b/src/gausskernel/optimizer/plan/subselect.cpp @@ -3083,6 +3083,18 @@ static Bitmapset* finalize_plan(PlannerInfo* root, Plan* plan, Bitmapset* valid_ context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_AnnIndexScan: + (void)finalize_primnode((Node*)((AnnIndexScan*)plan)->indexqual, &context); + (void)finalize_primnode((Node*)((AnnIndexScan*)plan)->indexorderby, &context); + + /* + * we need not look at indexqualorig, since it will have the same + * param references as indexqual. Likewise, we can ignore + * indexorderbyorig. + */ + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_BitmapIndexScan: (void)finalize_primnode((Node*)((BitmapIndexScan*)plan)->indexqual, &context); @@ -3113,7 +3125,6 @@ static Bitmapset* finalize_plan(PlannerInfo* root, Plan* plan, Bitmapset* valid_ (void)finalize_primnode((Node*)((CStoreIndexHeapScan*)plan)->bitmapqualorig, &context); context.paramids = bms_add_members(context.paramids, scan_params); break; - case T_TidScan: (void)finalize_primnode((Node*)((TidScan*)plan)->tidquals, &context); context.paramids = bms_add_members(context.paramids, scan_params); diff --git a/src/gausskernel/optimizer/sqladvisor/sqladvisor_extract.cpp b/src/gausskernel/optimizer/sqladvisor/sqladvisor_extract.cpp index 9ff94dccf0f5501d105857014a8bb7f208301b9b..a9ffe47121eb8a3fbaf33449e5004375c4aec5cb 100644 --- a/src/gausskernel/optimizer/sqladvisor/sqladvisor_extract.cpp +++ b/src/gausskernel/optimizer/sqladvisor/sqladvisor_extract.cpp @@ -233,6 +233,19 @@ static List* extractNodeIndexOnlyScan(Plan* plan, List* ancestors, List* rtable, return resSubplan; } +static List* extractNodeAnnIndexScan(Plan* plan, List* ancestors, List* rtable, List* subplans) +{ + List* resSubplan = NIL; + AnnIndexScan* annindexScan = (AnnIndexScan*)plan; + + extractQual(annindexScan->indexqualorig, plan, ancestors, rtable, subplans); + extractQual(plan->qual, plan, ancestors, rtable, subplans); + resSubplan = extractSubplan((Expr*)annindexScan->scan.plan.targetlist, resSubplan, subplans); + resSubplan = extractSubplan((Expr*)annindexScan->scan.plan.qual, resSubplan, subplans); + resSubplan = extractSubplan((Expr*)annindexScan->indexqualorig, resSubplan, subplans); + return resSubplan; +} + static List* extractNodeCStoreIndexScan(Plan* plan, List* ancestors, List* rtable, List* subplans) { List* resSubplan = NIL; @@ -478,6 +491,9 @@ void extractNode(Plan* plan, List* ancestors, List* rtable, List* subplans) case T_IndexOnlyScan: { resSubplan = extractNodeIndexOnlyScan(plan, ancestors, rtable, subplans); } break; + case T_AnnIndexScan: { + resSubplan = extractNodeAnnIndexScan(plan, ancestors, rtable, subplans); + } break; case T_BitmapIndexScan: { BitmapIndexScan* bitmapIndexScan = (BitmapIndexScan*)plan; extractQual(bitmapIndexScan->indexqualorig, plan, ancestors, rtable, subplans); diff --git a/src/gausskernel/optimizer/util/bucketpruning.cpp b/src/gausskernel/optimizer/util/bucketpruning.cpp index d0afed49cc4608a64d19dd1301ac64a3cefc5ed8..2a611bff21e4f0969c7d547ffe8371cd805a860c 100644 --- a/src/gausskernel/optimizer/util/bucketpruning.cpp +++ b/src/gausskernel/optimizer/util/bucketpruning.cpp @@ -731,6 +731,7 @@ void setPlanBucketId(Plan* plan, ParamListInfo params, MemoryContext cxt) case T_CStoreScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_BitmapIndexScan: case T_TidScan: diff --git a/src/gausskernel/optimizer/util/learn/encoding.cpp b/src/gausskernel/optimizer/util/learn/encoding.cpp index 4697cacb39d5053b8a0ba5adcd31dbef3192a474..99054e619b54fd3050a0d83fd2dd3a1440492e3c 100644 --- a/src/gausskernel/optimizer/util/learn/encoding.cpp +++ b/src/gausskernel/optimizer/util/learn/encoding.cpp @@ -83,6 +83,7 @@ const OperationInfo G_OPERATION_INFO_TABLE[G_MAX_OPERATION_NUMBER] = { #endif {T_IndexScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_INDEX}, {T_CStoreIndexScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_INDEX}, + {T_AnnIndexScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_ANN_INDEX}, {T_IndexOnlyScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_INDEX_ONLY}, {T_BitmapIndexScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_BITMAP_INDEX}, {T_CStoreIndexHeapScan, TEXT_OPTNAME_SCAN, TEXT_STRATEGY_SCAN_BITMAP_HEAP}, @@ -509,7 +510,7 @@ static inline bool IsScan(Plan* plan) IsA(plan, FunctionScan) || IsA(plan, ValuesScan) || IsA(plan, CteScan) || IsA(plan, WorkTableScan) || IsA(plan, ForeignScan) || IsA(plan, VecScan) || IsA(plan, VecIndexScan) || IsA(plan, VecIndexOnlyScan) || IsA(plan, VecBitmapIndexScan) || - IsA(plan, VecBitmapHeapScan); + IsA(plan, VecBitmapHeapScan) || IsA(plan, AnnIndexScan); } static inline bool IsJoin(Plan* plan) @@ -914,6 +915,9 @@ static void GetSpecialPlanOptCondition(PlanState* planstate, StringInfo conditio case T_IndexOnlyScan: GetPlanOptConditionFromQual(((IndexOnlyScan*)plan)->indexqual, planstate, condition, maxlen, rtable); break; + case T_AnnIndexScan: + GetPlanOptConditionFromQual(((AnnIndexScan*)plan)->indexqualorig, planstate, condition, maxlen, rtable); + break; case T_BitmapIndexScan: GetPlanOptConditionFromQual(((BitmapIndexScan*)plan)->indexqualorig, planstate, condition, maxlen, rtable); break; @@ -981,6 +985,7 @@ static void GetPlanOptCondition(PlanState* planstate, StringInfo condition, int case T_Agg: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_CStoreIndexScan: case T_BitmapHeapScan: case T_CStoreIndexHeapScan: diff --git a/src/gausskernel/optimizer/util/nodegroups.cpp b/src/gausskernel/optimizer/util/nodegroups.cpp index ecf8ef85502796821ec47dcb2b3dee4e30c10e67..b93ae7f0dede60e1c91e16364ca924cbc9bdfa97 100644 --- a/src/gausskernel/optimizer/util/nodegroups.cpp +++ b/src/gausskernel/optimizer/util/nodegroups.cpp @@ -1269,6 +1269,7 @@ unsigned int ng_get_dest_num_data_nodes(Plan* plan) #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_CStoreIndexScan: case T_BitmapIndexScan: case T_BitmapHeapScan: diff --git a/src/gausskernel/optimizer/util/optcommon.cpp b/src/gausskernel/optimizer/util/optcommon.cpp index f5d96f95bb2d770e6796d57ed9a0e8777eefa267..63ab7cf2408fb0d61b7aaa459aba538a64972732 100755 --- a/src/gausskernel/optimizer/util/optcommon.cpp +++ b/src/gausskernel/optimizer/util/optcommon.cpp @@ -279,6 +279,14 @@ void GetPlanNodePlainText( *pt_operation = "BITMAP"; *pname = *sname = *pt_options = "CStore Index Or"; break; + case T_AnnIndexScan: + *pt_operation = "INDEX"; + if (((AnnIndexScan*)plan)->scan.isPartTbl) { + *pname = *sname = *pt_options = "Partitioned Ann Index Scan"; + } else { + *pname = *sname = *pt_options = "Ann Index Scan"; + } + break; case T_TidScan: *pt_operation = "TABLE ACCESS"; if (((Scan*)plan)->isPartTbl) diff --git a/src/gausskernel/optimizer/util/pathnode.cpp b/src/gausskernel/optimizer/util/pathnode.cpp index 969bdcd74ca3edb2ac823301c412c54da8cb3353..e24adb353e9fd101fe7580eaf1cc04ea4d6078a6 100755 --- a/src/gausskernel/optimizer/util/pathnode.cpp +++ b/src/gausskernel/optimizer/util/pathnode.cpp @@ -1180,7 +1180,8 @@ static void set_scan_hint(Path* new_path, HintState* hstate) scanHint = find_scan_hint(hstate, new_path->parent->relids, HINT_KEYWORD_TABLESCAN); break; } - case T_IndexScan: { + case T_IndexScan: + case T_AnnIndexScan: { scanHint = find_scan_hint(hstate, new_path->parent->relids, HINT_KEYWORD_INDEXSCAN); break; } @@ -1402,7 +1403,8 @@ void set_index_hint_value(Path* new_path, List* indexhintList) break; } case T_IndexScan: - case T_IndexOnlyScan: { + case T_IndexOnlyScan: + case T_AnnIndexScan: { isIndexScan = true; matchIndex = find_index_hint_value(indexhintList, index_path->indexinfo->indexoid, &hintMask); break; @@ -2296,6 +2298,7 @@ bool is_partitionIndex_Subpath(Path* subpath) switch (subpath->pathtype) { case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: is_index_path = true; break; default: @@ -2330,6 +2333,7 @@ bool is_pwj_path(Path* pwjpath) return ret; } + /* * create_index_path * Creates a path node for an index scan. @@ -2359,13 +2363,15 @@ IndexPath* create_index_path(PlannerInfo* root, IndexOptInfo* index, List* index Relids required_outer, Bitmapset *upper_params, double loop_count) { IndexPath* pathnode = makeNode(IndexPath); + bool isAnnIndex = index->isAnnIndex; RelOptInfo* rel = index->rel; List* indexquals = NIL; List* indexqualcols = NIL; pathnode->is_ustore = rel->is_ustore; + pathnode->isAnnIndex = isAnnIndex; - pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan; + pathnode->path.pathtype = isAnnIndex ? T_AnnIndexScan : (indexonly ? T_IndexOnlyScan : T_IndexScan); pathnode->path.parent = rel; pathnode->path.pathtarget = rel->reltarget; pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer, upper_params); @@ -4651,7 +4657,8 @@ Path* reparameterize_path(PlannerInfo* root, Path* path, Relids required_outer, case T_SeqScan: return create_seqscan_path(root, rel, required_outer); case T_IndexScan: - case T_IndexOnlyScan: { + case T_IndexOnlyScan: + case T_AnnIndexScan: { IndexPath* ipath = (IndexPath*)path; IndexPath* newpath = makeNode(IndexPath); diff --git a/src/gausskernel/optimizer/util/plananalyzer.cpp b/src/gausskernel/optimizer/util/plananalyzer.cpp index bbb2d38dc86c82e69aa4cd68fa58fc53d9d8ec1a..07e72516d834f24e71b871eceb72c6853894f403 100644 --- a/src/gausskernel/optimizer/util/plananalyzer.cpp +++ b/src/gausskernel/optimizer/util/plananalyzer.cpp @@ -972,6 +972,7 @@ List* PlanAnalyzerOperator(QueryDesc* querydesc, PlanState* planstate) } case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapIndexScan: { /* Check unsuitable index scan */ issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, true, false); diff --git a/src/gausskernel/optimizer/util/plancat.cpp b/src/gausskernel/optimizer/util/plancat.cpp index c889dfdff0442e158992e6c8c05e801b4532dbb0..9e756e2ebd9c6ca765a7c3718a77d7ea6ddc5ff8 100755 --- a/src/gausskernel/optimizer/util/plancat.cpp +++ b/src/gausskernel/optimizer/util/plancat.cpp @@ -576,6 +576,7 @@ void get_relation_info(PlannerInfo* root, Oid relationObjectId, bool inhparent, info->amsearchnulls = indexRelation->rd_am->amsearchnulls; info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple); info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap); + info->isAnnIndex = (info->relam == HNSW_AM_OID || info->relam == IVFFLAT_AM_OID); /* * Fetch the ordering information for the index, if any. diff --git a/src/gausskernel/optimizer/util/planmem_walker.cpp b/src/gausskernel/optimizer/util/planmem_walker.cpp index 245f3382fceb0b30e469bc02224bf522fc23ab0e..62c227ed0680514d1e73a58125c6d037e884fded 100644 --- a/src/gausskernel/optimizer/util/planmem_walker.cpp +++ b/src/gausskernel/optimizer/util/planmem_walker.cpp @@ -316,6 +316,15 @@ bool plan_tree_walker(Node* node, MethodWalker walker, void* context) /* Other fields are lists of basic items, nothing to walk. */ break; + case T_AnnIndexScan: + if (walk_scan_node_fields((Scan*)node, walker, context)) + return true; + if (p2walker((Node*)((AnnIndexScan*)node)->indexqual, context)) { + return true; + } + /* Other fields are lists of basic items, nothing to walk. */ + break; + case T_CStoreIndexScan: if (walk_scan_node_fields((Scan*)node, walker, context)) return true; diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index ab6deb4f53efa8b6bd269ab1c5378df41b5c99b1..a79181ee96286c40c2eefd1096e7f8ffa01b8476 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -271,6 +271,7 @@ #include "ddes/dms/ss_reform_common.h" #include "ddes/dms/ss_dms_auxiliary.h" #include "storage/gs_uwal/gs_uwal.h" +#include "access/datavec/utils.h" #ifdef ENABLE_UT #define static @@ -3140,6 +3141,15 @@ int PostmasterMain(int argc, char* argv[]) } } + /* init datavec pq */ + if (g_instance.attr.attr_storage.enable_pq) { + int ret = PQInit(); + if (ret != 0) { + ereport(PANIC, (errmsg("datavec PQ init failed, ret: %d", ret))); + } + ereport(LOG, (errmsg("datavec PQ init success."))); + } + /* init sharestorge(dorado) */ ShareStorageInit(); exrto_standby_read_init(); @@ -9642,6 +9652,7 @@ void ExitPostmaster(int status) * MUST -- vadim 05-10-1999 */ DMSUninit(); + PQUinit(); CloseGaussPidDir(); diff --git a/src/gausskernel/process/tcop/utility.cpp b/src/gausskernel/process/tcop/utility.cpp index e7366e4d72dd9162ae5fb805020a50013296c113..5a8420828d9a7ee33cd98e631a3199f00e5ff81f 100755 --- a/src/gausskernel/process/tcop/utility.cpp +++ b/src/gausskernel/process/tcop/utility.cpp @@ -6726,7 +6726,7 @@ ProcessUtilitySlow(Node *parse_tree, break; case OBJECT_TSPARSER: #ifdef PGXC - if (!IsInitdb) { + if (!IsInitdb && !u_sess->attr.attr_common.IsInplaceUpgrade && !u_sess->exec_cxt.extension_is_valid) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("user-defined text search parser is not yet supported."))); diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index 4fdc38be1a2bc8671bad501b0c42b87cca9d3b81..7806a3352acf1d17530bfc2aaaa69bb17e1c069c 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -1097,6 +1097,8 @@ void knl_instance_init() knl_g_datadir_init(&g_instance.datadir_cxt); knl_g_listen_sock_init(&g_instance.listen_cxt); + g_instance.pq_inited = false; + #ifdef USE_SPQ knl_g_spq_context_init(&g_instance.spq_cxt); #endif diff --git a/src/gausskernel/process/threadpool/knl_session.cpp b/src/gausskernel/process/threadpool/knl_session.cpp index 829ddaa66f9aac7ca00735cb2efdb1129a47d125..8b73066cc2392e2179c50369eed55bc5d5bf171a 100755 --- a/src/gausskernel/process/threadpool/knl_session.cpp +++ b/src/gausskernel/process/threadpool/knl_session.cpp @@ -59,6 +59,7 @@ #include "workload/workload.h" #include "parser/scanner.h" #include "pgstat.h" +#include "access/datavec/bitvec.h" THR_LOCAL knl_session_context* u_sess; @@ -1472,6 +1473,15 @@ static void knl_u_libsw_init(knl_u_libsw_context* libsw_cxt) libsw_cxt->redirect_manager = New(CurrentMemoryContext) RedirectManager(); } +static void knl_u_datavec_init(knl_u_datavec_context* datavec_cxt) +{ + BitvecInit(); + datavec_cxt->hnsw_ef_search = 0; + datavec_cxt->hnsw_earlystop_threshold = 0; + datavec_cxt->ivfflat_probes = 0; + datavec_cxt->ivfpq_kreorder = 0; +} + void knl_session_init(knl_session_context* sess_cxt) { Assert (0 != strncmp(CurrentMemoryContext->name, "ErrorContext", sizeof("ErrorContext"))); @@ -1573,6 +1583,8 @@ void knl_session_init(knl_session_context* sess_cxt) knl_u_clientConnTime_init(&sess_cxt->clientConnTime_cxt); knl_u_opfusion_reuse_init(&sess_cxt->opfusion_reuse_ctx); + + knl_u_datavec_init(&sess_cxt->datavec_ctx); MemoryContextSeal(sess_cxt->top_mem_cxt); } diff --git a/src/gausskernel/runtime/executor/Makefile b/src/gausskernel/runtime/executor/Makefile index 474227e2bd8f5e2c50ac2bc4c787c133f3211cd3..23d369995ebd7312fe0943d866e670e08b4fc3be 100644 --- a/src/gausskernel/runtime/executor/Makefile +++ b/src/gausskernel/runtime/executor/Makefile @@ -39,7 +39,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ - nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o nodeAnnIndexscan.o\ nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ diff --git a/src/gausskernel/runtime/executor/execAmi.cpp b/src/gausskernel/runtime/executor/execAmi.cpp index e860c8c84077c1809bfaf8718495628bbeeee9a8..17826af40b06a0cf7707f1105a4211b41319f6a5 100755 --- a/src/gausskernel/runtime/executor/execAmi.cpp +++ b/src/gausskernel/runtime/executor/execAmi.cpp @@ -18,6 +18,7 @@ #include "executor/exec/execdebug.h" #include "executor/node/nodeAgg.h" +#include "executor/node/nodeAnnIndexscan.h" #include "executor/node/nodeAppend.h" #include "executor/node/nodeBitmapAnd.h" #include "executor/node/nodeBitmapHeapscan.h" @@ -178,6 +179,10 @@ void ExecReScanByType(PlanState* node) ExecReScanIndexOnlyScan((IndexOnlyScanState*)node); break; + case T_AnnIndexScanState: + ExecReScanAnnIndexScan((AnnIndexScanState*)node); + break; + case T_BitmapIndexScanState: ExecReScanBitmapIndexScan((BitmapIndexScanState*)node); break; @@ -413,6 +418,10 @@ void ExecMarkPos(PlanState* node) case T_IndexOnlyScanState: ExecIndexOnlyMarkPos((IndexOnlyScanState*)node); break; + + case T_AnnIndexScanState: + ExecAnnIndexMarkPos((AnnIndexScanState*)node); + break; case T_TidScanState: ExecTidMarkPos((TidScanState*)node); @@ -475,6 +484,10 @@ void ExecRestrPos(PlanState* node) ExecIndexOnlyRestrPos((IndexOnlyScanState*)node); break; + case T_AnnIndexScanState: + ExecAnnIndexRestrPos((AnnIndexScanState*)node); + break; + case T_TidScanState: ExecTidRestrPos((TidScanState*)node); break; @@ -521,6 +534,7 @@ bool ExecSupportsMarkRestore(Path *pathnode) case T_SeqScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_TidScan: case T_ValuesScan: case T_Material: @@ -609,6 +623,10 @@ bool ExecSupportsBackwardScan(Plan* node) return index_supports_backward_scan(((IndexOnlyScan*)node)->indexid) && target_list_supports_backward_scan(node->targetlist); + case T_AnnIndexScan: + return index_supports_backward_scan(((AnnIndexScan*)node)->indexid) && + target_list_supports_backward_scan(node->targetlist); + case T_SubqueryScan: return ExecSupportsBackwardScan(((SubqueryScan*)node)->subplan) && target_list_supports_backward_scan(node->targetlist); diff --git a/src/gausskernel/runtime/executor/execCurrent.cpp b/src/gausskernel/runtime/executor/execCurrent.cpp index 9e49bdc1435ef353ac3ae01c47536a40dc1dd319..e2a719bd19e2872dc61532f4ca860bc7890b1816 100644 --- a/src/gausskernel/runtime/executor/execCurrent.cpp +++ b/src/gausskernel/runtime/executor/execCurrent.cpp @@ -267,6 +267,7 @@ static ScanState* search_plan_tree(PlanState* node, Oid table_oid) case T_SeqScanState: case T_IndexScanState: case T_IndexOnlyScanState: + case T_AnnIndexScanState: case T_BitmapHeapScanState: case T_TidScanState: { ScanState *sstate = (ScanState *)node; diff --git a/src/gausskernel/runtime/executor/execProcnode.cpp b/src/gausskernel/runtime/executor/execProcnode.cpp index fc5be0b57c547a188e0e91d1cd2d234b2ac392c6..48bb3f0117a26d4837a811aead67283554805be0 100755 --- a/src/gausskernel/runtime/executor/execProcnode.cpp +++ b/src/gausskernel/runtime/executor/execProcnode.cpp @@ -76,6 +76,7 @@ #include "executor/executor.h" #include "executor/node/nodeAgg.h" +#include "executor/node/nodeAnnIndexscan.h" #include "executor/node/nodeAppend.h" #include "executor/node/nodeBitmapAnd.h" #include "executor/node/nodeBitmapHeapscan.h" @@ -205,6 +206,7 @@ bool NeedStubExecution(Plan* plan) case T_SeqScan: case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapIndexScan: case T_BitmapHeapScan: case T_TidScan: @@ -328,6 +330,8 @@ PlanState* ExecInitNodeByType(Plan* node, EState* estate, int eflags) return (PlanState*)ExecInitIndexScan((IndexScan*)node, estate, eflags); case T_IndexOnlyScan: return (PlanState*)ExecInitIndexOnlyScan((IndexOnlyScan*)node, estate, eflags); + case T_AnnIndexScan: + return (PlanState*)ExecInitAnnIndexScan((AnnIndexScan*)node, estate, eflags); case T_BitmapIndexScan: return (PlanState*)ExecInitBitmapIndexScan((BitmapIndexScan*)node, estate, eflags); case T_BitmapHeapScan: @@ -1159,6 +1163,10 @@ static void ExecEndNodeByType(PlanState* node) ExecEndIndexOnlyScan((IndexOnlyScanState*)node); break; + case T_AnnIndexScanState: + ExecEndAnnIndexScan((AnnIndexScanState*)node); + break; + case T_BitmapIndexScanState: ExecEndBitmapIndexScan((BitmapIndexScanState*)node); break; diff --git a/src/gausskernel/runtime/executor/instrument.cpp b/src/gausskernel/runtime/executor/instrument.cpp index a30b80bcfdcaef4c712effafe0928637b0eb3efb..7a2f92775bc1600bf3521435bc5497b48d6c93ee 100644 --- a/src/gausskernel/runtime/executor/instrument.cpp +++ b/src/gausskernel/runtime/executor/instrument.cpp @@ -1168,6 +1168,13 @@ Instrumentation* ThreadInstrumentation::allocInstrSlot(int plan_node_id, int par pname = "CStore Index Or"; plan_type = IO_OP; break; + case T_AnnIndexScan: + if (((AnnIndexScan*)plan)->scan.isPartTbl) + pname = "Partitioned Ann Index Scan"; + else + pname = "Ann Index Scan"; + plan_type = IO_OP; + break; case T_TidScan: if (((Scan*)plan)->isPartTbl) pname = "Partitioned Tid Scan"; @@ -1432,6 +1439,7 @@ Instrumentation* ThreadInstrumentation::allocInstrSlot(int plan_node_id, int par #endif /* ENABLE_MULTIPLE_NODES */ case T_IndexScan: case T_IndexOnlyScan: + case T_AnnIndexScan: case T_BitmapHeapScan: case T_CStoreIndexScan: case T_CStoreIndexCtidScan: diff --git a/src/gausskernel/runtime/executor/nodeAnnIndexscan.cpp b/src/gausskernel/runtime/executor/nodeAnnIndexscan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba8a0c886450d0f107a84d0b5a1129e3e60b94f8 --- /dev/null +++ b/src/gausskernel/runtime/executor/nodeAnnIndexscan.cpp @@ -0,0 +1,926 @@ +/* ------------------------------------------------------------------------- + * + * nodeAnnIndexscan.cpp + * Routines to support indexed scans of relations + * + * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * + * IDENTIFICATION + * src/gausskernel/runtime/executor/nodeAnnIndexscan.cpp + * + * ------------------------------------------------------------------------- + * + * INTERFACE ROUTINES + * ExecAnnIndexScan scans a relation using an ann index + * AnnIndexNext retrieve next tuple using ann index + * ExecInitAnnIndexScan creates and initializes state info. + * ExecReScanAnnIndexScan rescans the ann indexed relation. + * ExecEndAnnIndexScan releases all storage. + * ExecAnnIndexMarkPos marks scan position. + * ExecAnnIndexRestrPos restores scan position. + */ +#include "postgres.h" +#include "knl/knl_variable.h" + +#include "access/relscan.h" +#include "access/tableam.h" +#include "catalog/pg_partition_fn.h" +#include "commands/cluster.h" +#include "executor/exec/execdebug.h" +#include "executor/node/nodeIndexscan.h" +#include "optimizer/clauses.h" +#include "storage/tcap.h" +#include "utils/array.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/rel_gs.h" +#include "gstrace/gstrace_infra.h" +#include "gstrace/executer_gstrace.h" +#include "nodes/makefuncs.h" +#include "optimizer/pruning.h" +#include "executor/node/nodeAnnIndexscan.h" + +static TupleTableSlot* ExecAnnIndexScan(PlanState* state); +static TupleTableSlot* AnnIndexNext(AnnIndexScanState* node); +static void ExecInitNextPartitionForAnnIndexScan(AnnIndexScanState* node); + +/* ---------------------------------------------------------------- + * AnnIndexNext + * + * Retrieve a tuple from the AnnIndexScan node's current_relation + * using the index specified in the IndexScanState information. + * ---------------------------------------------------------------- + */ +static TupleTableSlot* AnnIndexNext(AnnIndexScanState* node) +{ + EState* estate = NULL; + ExprContext* econtext = NULL; + ScanDirection direction; + IndexScanDesc scandesc; + HeapTuple tuple; + TupleTableSlot* slot = NULL; + bool isUstore = false; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((AnnIndexScan*)node->ss.ps.plan)->indexorderdir)) { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + scandesc = node->iss_ScanDesc; + scandesc->count = (int64_t)node->annCount; + isUstore = RelationIsUstoreFormat(node->ss.ss_currentRelation); + /* + * ok, now that we have what we need, fetch the next tuple. + */ + // we should change abs_idx_getnext to call IdxScanAm(scan)->idx_getnext and channge .idx_getnext in g_HeapIdxAm to + // IndexGetnextSlot + while (true) { + CHECK_FOR_INTERRUPTS(); + + IndexScanDesc indexScan = GetIndexScanDesc(scandesc); + if (isUstore) { + if (!IndexGetnextSlot(scandesc, direction, slot, &node->ss.ps.state->have_current_xact_date)) { + break; + } + } else { + if ((tuple = scan_handler_idx_getnext(scandesc, direction, InvalidOid, InvalidBktId, + &node->ss.ps.state->have_current_xact_date)) == NULL) { + break; + } + /* Update indexScan, because hashbucket may switch current index in scan_handler_idx_getnext */ + indexScan = GetIndexScanDesc(scandesc); + /* + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree_ext()'d. + */ + (void)ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + indexScan->xs_cbuf, /* buffer containing tuple */ + false); /* don't pfree */ + } + + /* + * If the index was lossy, we have to recheck the index quals using + * the fetched tuple. + */ + if (indexScan->xs_recheck) { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqualorig, econtext, false)) { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + continue; + } + } + + return slot; + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + return ExecClearTuple(slot); +} + +/* + * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool AnnIndexRecheck(AnnIndexScanState* node, TupleTableSlot* slot) +{ + ExprContext* econtext = NULL; + + /* + * extract necessary information from index scan node + */ + econtext = node->ss.ps.ps_ExprContext; + + /* Does the tuple meet the indexqual condition? */ + econtext->ecxt_scantuple = slot; + + ResetExprContext(econtext); + + return ExecQual(node->indexqualorig, econtext, false); +} + +/* ---------------------------------------------------------------- + * ExecAnnIndexScan(node) + * ---------------------------------------------------------------- + */ +static TupleTableSlot* ExecAnnIndexScan(PlanState* state) +{ + AnnIndexScanState* node = castNode(AnnIndexScanState, state); + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady) { + /* + * set a flag for partitioned table, so we can deal with it specially + * when we rescan the partitioned table + */ + if (node->ss.isPartTbl) { + if (PointerIsValid(node->ss.partitions)) { + node->ss.ss_ReScan = true; + ExecReScan((PlanState*)node); + } + } else { + ExecReScan((PlanState*)node); + } + } else if (DB_IS_CMPT(B_FORMAT) && node->iss_NumRuntimeKeys != 0 && u_sess->parser_cxt.has_set_uservar) { + ExprContext* econtext = node->iss_RuntimeContext; + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, node->iss_RuntimeKeys, node->iss_NumRuntimeKeys); + } + + return ExecScan(&node->ss, (ExecScanAccessMtd)AnnIndexNext, (ExecScanRecheckMtd)AnnIndexRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanAnnIndexScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void ExecReScanAnnIndexScan(AnnIndexScanState* node) +{ + /* + * For recursive-stream rescan, if number of RuntimeKeys not euqal zero, + * just return without rescan. + * + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->iss_NumRuntimeKeys != 0) { + if (node->ss.ps.state->es_recursive_next_iteration) { + node->iss_RuntimeKeysReady = false; + return; + } + + ExprContext* econtext = node->iss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, node->iss_RuntimeKeys, node->iss_NumRuntimeKeys); + } + node->iss_RuntimeKeysReady = true; + + /* + * deal with partitioned table + */ + bool partpruning = !RelationIsSubPartitioned(node->ss.ss_currentRelation) && + ENABLE_SQL_BETA_FEATURE(PARTITION_OPFUSION) && list_length(node->ss.partitions) == 1; + /* if only one partition is scaned in indexscan, we don't need do rescan for partition */ + if (node->ss.isPartTbl && !partpruning) { + /* + * if node->ss.ss_ReScan = true, just do rescaning as non-partitioned + * table; else switch to next partition for scaning. + */ + if (node->ss.ss_ReScan || + (((Scan *)node->ss.ps.plan)->partition_iterator_elimination)) { + /* reset the rescan falg */ + node->ss.ss_ReScan = false; + } else { + if (!PointerIsValid(node->ss.partitions)) { + /* + * give up rescaning the index if there is no partition to scan + */ + return; + } + + /* switch to next partition for scaning */ + Assert(PointerIsValid(node->iss_ScanDesc)); + scan_handler_idx_endscan(node->iss_ScanDesc); + /* initialize Scan for the next partition */ + ExecInitNextPartitionForAnnIndexScan(node); + ExecScanReScan(&node->ss); + return; + } + } + + /* reset index scan */ + scan_handler_idx_rescan( + node->iss_ScanDesc, node->iss_ScanKeys, node->iss_NumScanKeys, node->iss_OrderByKeys, node->iss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndAnnIndexScan + * ---------------------------------------------------------------- + */ +void ExecEndAnnIndexScan(AnnIndexScanState* node) +{ + Relation index_relation_desc; + IndexScanDesc index_scan_desc; + Relation relation; + + /* + * extract information from the node + */ + index_relation_desc = node->iss_RelationDesc; + index_scan_desc = node->iss_ScanDesc; + relation = node->ss.ss_currentRelation; + + /* + * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext + */ +#ifdef NOT_USED + ExecFreeExprContext(&node->ss.ps); + if (node->iss_RuntimeContext) + FreeExprContext(node->iss_RuntimeContext, true); +#endif + + /* + * clear out tuple table slots + */ + (void)ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + (void)ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close the index relation (no-op if we didn't open it) + */ + if (index_scan_desc) + scan_handler_idx_endscan(index_scan_desc); + + /* + * close the index relation (no-op if we didn't open it) + * close the index relation if the relation is non-partitioned table + * close the index partitions and table partitions if the relation is + * non-partitioned table + */ + if (node->ss.isPartTbl) { + if (PointerIsValid(node->iss_IndexPartitionList)) { + Assert(PointerIsValid(index_relation_desc)); + Assert(PointerIsValid(node->ss.partitions)); + Assert(node->ss.partitions->length == node->iss_IndexPartitionList->length); + + Assert(PointerIsValid(node->iss_CurrentIndexPartition)); + releaseDummyRelation(&(node->iss_CurrentIndexPartition)); + + Assert(PointerIsValid(node->ss.ss_currentPartition)); + releaseDummyRelation(&(node->ss.ss_currentPartition)); + + if (RelationIsSubPartitioned(relation)) { + releaseSubPartitionList(index_relation_desc, &(node->iss_IndexPartitionList), NoLock); + releaseSubPartitionList(node->ss.ss_currentRelation, &(node->ss.subpartitions), NoLock); + } else { + /* close index partition */ + releasePartitionList(node->iss_RelationDesc, &(node->iss_IndexPartitionList), NoLock); + } + + /* close table partition */ + releasePartitionList(node->ss.ss_currentRelation, &(node->ss.partitions), NoLock); + } + } + + if (index_relation_desc) + index_close(index_relation_desc, NoLock); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecAnnIndexMarkPos + * ---------------------------------------------------------------- + */ +void ExecAnnIndexMarkPos(AnnIndexScanState* node) +{ + scan_handler_idx_markpos(node->iss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecAnnIndexRestrPos + * ---------------------------------------------------------------- + */ +void ExecAnnIndexRestrPos(AnnIndexScanState* node) +{ + scan_handler_idx_restrpos(node->iss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecAnnInitIndexScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: index scans have 2 sets of state information because + * we have to keep track of the base relation and the + * index relation. + * ---------------------------------------------------------------- + */ +void ExecInitAnnIndexRelation(AnnIndexScanState* node, EState* estate, int eflags) +{ + AnnIndexScanState* index_state = node; + Snapshot scanSnap; + Relation current_relation = index_state->ss.ss_currentRelation; + AnnIndexScan *index_scan = (AnnIndexScan *)node->ss.ps.plan; + + /* + * Choose user-specified snapshot if TimeCapsule clause exists, otherwise + * estate->es_snapshot instead. + */ + scanSnap = TvChooseScanSnap(index_state->iss_RelationDesc, &index_scan->scan, &index_state->ss); + + /* deal with partition info */ + if (index_state->ss.isPartTbl) { + index_state->iss_ScanDesc = NULL; + + if (index_scan->scan.itrs > 0) { + Partition current_partition = NULL; + Partition currentindex = NULL; + + /* Initialize table partition list and index partition list for following scan */ + ExecInitPartitionForAnnIndexScan(index_state, estate); + + if (index_state->ss.partitions != NIL) { + /* construct a dummy relation with the first table partition for following scan */ + if (RelationIsSubPartitioned(current_relation)) { + Partition subOnePart = (Partition)list_nth(index_state->ss.partitions, index_state->ss.currentSlot); + List *currentSubpartList = (List *)list_nth(index_state->ss.subpartitions, 0); + List *currentindexlist = (List *)list_nth(index_state->iss_IndexPartitionList, 0); + current_partition = (Partition)list_nth(currentSubpartList, 0); + currentindex = (Partition)list_nth(currentindexlist, 0); + Relation subOnePartRel = partitionGetRelation(index_state->ss.ss_currentRelation, subOnePart); + index_state->ss.ss_currentPartition = + partitionGetRelation(subOnePartRel, current_partition); + releaseDummyRelation(&subOnePartRel); + } else { + current_partition = (Partition)list_nth(index_state->ss.partitions, 0); + currentindex = (Partition)list_nth(index_state->iss_IndexPartitionList, 0); + index_state->ss.ss_currentPartition = + partitionGetRelation(index_state->ss.ss_currentRelation, current_partition); + } + + index_state->iss_CurrentIndexPartition = + partitionGetRelation(index_state->iss_RelationDesc, currentindex); + + /* + * Verify if a DDL operation that froze all tuples in the relation + * occured after taking the snapshot. + */ + if (RelationIsUstoreFormat(index_state->ss.ss_currentPartition)) { + TransactionId relfrozenxid64 = InvalidTransactionId; + getPartitionRelxids(index_state->ss.ss_currentPartition, &relfrozenxid64); + if (TransactionIdPrecedes(FirstNormalTransactionId, scanSnap->xmax) && + !TransactionIdIsCurrentTransactionId(relfrozenxid64) && + TransactionIdPrecedes(scanSnap->xmax, relfrozenxid64)) { + ereport(ERROR, (errcode(ERRCODE_SNAPSHOT_INVALID), + (errmsg("Snapshot too old, IndexRelation is PartTbl, the info: snapxmax is %lu, " + "snapxmin is %lu, csn is %lu, relfrozenxid64 is %lu, globalRecycleXid is %lu.", + scanSnap->xmax, scanSnap->xmin, scanSnap->snapshotcsn, relfrozenxid64, + g_instance.undo_cxt.globalRecycleXid)))); + } + } + + /* Initialize scan descriptor for partitioned table */ + index_state->iss_ScanDesc = scan_handler_idx_beginscan(index_state->ss.ss_currentPartition, + index_state->iss_CurrentIndexPartition, + scanSnap, + index_state->iss_NumScanKeys, + index_state->iss_NumOrderByKeys, + (ScanState*)index_state); + } + } + } else { + /* + * Verify if a DDL operation that froze all tuples in the relation + * occured after taking the snapshot. + */ + if (RelationIsUstoreFormat(current_relation)) { + TransactionId relfrozenxid64 = InvalidTransactionId; + getRelationRelxids(current_relation, &relfrozenxid64); + if (TransactionIdPrecedes(FirstNormalTransactionId, scanSnap->xmax) && + !TransactionIdIsCurrentTransactionId(relfrozenxid64) && + TransactionIdPrecedes(scanSnap->xmax, relfrozenxid64)) { + ereport(ERROR, (errcode(ERRCODE_SNAPSHOT_INVALID), + (errmsg("Snapshot too old, IndexRelation is not PartTbl, the info: snapxmax is %lu, " + "snapxmin is %lu, csn is %lu, relfrozenxid64 is %lu, globalRecycleXid is %lu.", + scanSnap->xmax, scanSnap->xmin, scanSnap->snapshotcsn, relfrozenxid64, + g_instance.undo_cxt.globalRecycleXid)))); + } + } + + /* + * Initialize scan descriptor. + */ + index_state->iss_ScanDesc = scan_handler_idx_beginscan(current_relation, + index_state->iss_RelationDesc, + scanSnap, + index_state->iss_NumScanKeys, + index_state->iss_NumOrderByKeys, + (ScanState*)index_state); + } + + return; +} + +AnnIndexScanState* ExecInitAnnIndexScan(AnnIndexScan* node, EState* estate, int eflags) +{ + AnnIndexScanState* index_state = NULL; + Relation current_relation; + bool relisTarget = false; + + gstrace_entry(GS_TRC_ID_ExecInitIndexScan); + /* + * create state structure + */ + index_state = makeNode(AnnIndexScanState); + index_state->ss.ps.plan = (Plan*)node; + index_state->ss.ps.state = estate; + index_state->ss.isPartTbl = node->scan.isPartTbl; + index_state->ss.currentSlot = 0; + index_state->ss.partScanDirection = node->indexorderdir; + index_state->ss.ps.ExecProcNode = ExecAnnIndexScan; + index_state->annCount = node->annCount; + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &index_state->ss.ps); + + index_state->ss.ps.ps_vec_TupFromTlist = false; + + /* + * initialize child expressions + * + * Note: we don't initialize all of the indexqual expression, only the + * sub-parts corresponding to runtime keys (see below). Likewise for + * indexorderby, if any. But the indexqualorig expression is always + * initialized even though it will only be used in some uncommon cases --- + * would be nice to improve that. (Problem is that any SubPlans present + * in the expression must be found now...) + */ + if (estate->es_is_flt_frame) { + index_state->ss.ps.qual = (List*)ExecInitQualByFlatten(node->scan.plan.qual, (PlanState*)index_state); + index_state->indexqualorig = (List*)ExecInitQualByFlatten(node->indexqualorig, (PlanState*)index_state); + } else { + index_state->ss.ps.targetlist = (List*)ExecInitExprByRecursion( + (Expr*)node->scan.plan.targetlist, (PlanState*)index_state); + index_state->ss.ps.qual = (List*)ExecInitExprByRecursion((Expr*)node->scan.plan.qual, (PlanState*)index_state); + index_state->indexqualorig = + (List*)ExecInitExprByRecursion((Expr*)node->indexqualorig, (PlanState*)index_state); + } + + /* + * open the base relation and acquire appropriate lock on it. + */ + current_relation = ExecOpenScanRelation(estate, node->scan.scanrelid); + + index_state->ss.ss_currentRelation = current_relation; + index_state->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &index_state->ss.ps, current_relation->rd_tam_ops); + ExecInitScanTupleSlot(estate, &index_state->ss, current_relation->rd_tam_ops); + + /* + * get the scan type from the relation descriptor. + */ + ExecAssignScanType(&index_state->ss, CreateTupleDescCopy(RelationGetDescr(current_relation))); + index_state->ss.ss_ScanTupleSlot->tts_tupleDescriptor->td_tam_ops = current_relation->rd_tam_ops; + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&index_state->ss.ps); + + index_state->ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor->td_tam_ops = + index_state->ss.ss_ScanTupleSlot->tts_tupleDescriptor->td_tam_ops; + + ExecAssignScanProjectionInfo(&index_state->ss); + + Assert(index_state->ss.ps.ps_ResultTupleSlot->tts_tupleDescriptor->td_tam_ops); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) { + gstrace_exit(GS_TRC_ID_ExecInitIndexScan); + return index_state; + } + + /* + * Open the index relation. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the index, so we can avoid + * taking another lock here. Otherwise we need a normal reader's lock. + */ + relisTarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + index_state->iss_RelationDesc = index_open(node->indexid, relisTarget ? NoLock : AccessShareLock); + if (!IndexIsUsable(index_state->iss_RelationDesc->rd_index)) { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("can't initialize index scans using unusable index \"%s\"", + RelationGetRelationName(index_state->iss_RelationDesc)))); + } + /* + * Initialize index-specific scan state + */ + index_state->iss_RuntimeKeysReady = false; + index_state->iss_RuntimeKeys = NULL; + index_state->iss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState*)index_state, + index_state->iss_RelationDesc, + node->indexqual, + false, + &index_state->iss_ScanKeys, + &index_state->iss_NumScanKeys, + &index_state->iss_RuntimeKeys, + &index_state->iss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState*)index_state, + index_state->iss_RelationDesc, + node->indexorderby, + true, + &index_state->iss_OrderByKeys, + &index_state->iss_NumOrderByKeys, + &index_state->iss_RuntimeKeys, + &index_state->iss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (index_state->iss_NumRuntimeKeys != 0) { + ExprContext* stdecontext = index_state->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &index_state->ss.ps); + index_state->iss_RuntimeContext = index_state->ss.ps.ps_ExprContext; + index_state->ss.ps.ps_ExprContext = stdecontext; + } else { + index_state->iss_RuntimeContext = NULL; + } + + /* deal with partition info */ + ExecInitAnnIndexRelation(index_state, estate, eflags); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (index_state->iss_ScanDesc == NULL) { + index_state->ss.ps.stubType = PST_Scan; + } else if (index_state->iss_NumRuntimeKeys == 0) { + scan_handler_idx_rescan_local(index_state->iss_ScanDesc, + index_state->iss_ScanKeys, + index_state->iss_NumScanKeys, + index_state->iss_OrderByKeys, + index_state->iss_NumOrderByKeys); + } + + /* + * all done. + */ + gstrace_exit(GS_TRC_ID_ExecInitIndexScan); + return index_state; +} + +/* + * @@GaussDB@@ + * Target : data partition + * Brief : construt a dummy relation with the next partition and the partitiobed + * : table for the following AnnIndexscan, and swith the scaning relation to + * : the dummy relation + * Description : input a AnnIndexScanState node, to construct a dummy relation + * : with the next partition. + * Input : AnnIndexScanState *node + * Output : void + * Notes : NULL + */ +static void ExecInitNextPartitionForAnnIndexScan(AnnIndexScanState* node) +{ + Partition current_partition = NULL; + Relation current_partition_rel = NULL; + Partition current_index_partition = NULL; + Relation current_index_partition_rel = NULL; + AnnIndexScan* plan = NULL; + int paramNo = -1; + ParamExecData* param = NULL; + int subPartParamno = -1; + ParamExecData* SubPrtParam = NULL; + + AnnIndexScanState* indexState = node; + AnnIndexScan *indexScan = (AnnIndexScan *)node->ss.ps.plan; + Snapshot scanSnap = TvChooseScanSnap(indexState->iss_RelationDesc, &indexScan->scan, &indexState->ss); + + plan = (AnnIndexScan*)(node->ss.ps.plan); + + /* get partition sequnce */ + paramNo = plan->scan.plan.paramno; + param = &(node->ss.ps.state->es_param_exec_vals[paramNo]); + node->ss.currentSlot = (int)param->value; + + subPartParamno = plan->scan.plan.subparamno; + SubPrtParam = &(node->ss.ps.state->es_param_exec_vals[subPartParamno]); + + Oid heapOid = node->iss_RelationDesc->rd_index->indrelid; + Relation heapRelation = heap_open(heapOid, AccessShareLock); + + /* no heap scan here */ + node->ss.ss_currentScanDesc = NULL; + + /* construct a dummy relation with the next index partition */ + if (RelationIsSubPartitioned(heapRelation)) { + Partition subOnePart = (Partition)list_nth(node->ss.partitions, node->ss.currentSlot); + List *subPartList = (List *)list_nth(node->ss.subpartitions, node->ss.currentSlot); + List *subIndexList = (List *)list_nth(node->iss_IndexPartitionList, + node->ss.currentSlot); + current_partition = (Partition)list_nth(subPartList, (int)SubPrtParam->value); + + Relation subOnePartRel = partitionGetRelation(node->ss.ss_currentRelation, subOnePart); + + current_partition_rel = partitionGetRelation(subOnePartRel, current_partition); + current_index_partition = (Partition)list_nth(subIndexList, (int)SubPrtParam->value); + releaseDummyRelation(&subOnePartRel); + } else { + current_partition = (Partition)list_nth(node->ss.partitions, node->ss.currentSlot); + current_partition_rel = partitionGetRelation(node->ss.ss_currentRelation, current_partition); + current_index_partition = (Partition)list_nth(node->iss_IndexPartitionList, node->ss.currentSlot); + } + + current_index_partition_rel = partitionGetRelation(node->iss_RelationDesc, current_index_partition); + + Assert(PointerIsValid(node->iss_CurrentIndexPartition)); + releaseDummyRelation(&(node->iss_CurrentIndexPartition)); + node->iss_CurrentIndexPartition = current_index_partition_rel; + + /* update scan-related partition */ + releaseDummyRelation(&(node->ss.ss_currentPartition)); + node->ss.ss_currentPartition = current_partition_rel; + + /* Initialize scan descriptor. */ + node->iss_ScanDesc = scan_handler_idx_beginscan(node->ss.ss_currentPartition, + node->iss_CurrentIndexPartition, + scanSnap, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + (ScanState*)node); + + if (node->iss_ScanDesc != NULL) { + scan_handler_idx_rescan_local( + node->iss_ScanDesc, node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); + } + + heap_close(heapRelation, AccessShareLock); +} + +/* + * @@GaussDB@@ + * Target : data partition + * Brief : get index partitions list and table partitions list for the + * : the following AnnIndexScan + * Description : Init partitions list in AnnIndexScanState. + * Input : AnnIndexScanState* index_state, EState* estate + * Output : void + * Notes : NULL + */ +void ExecInitPartitionForAnnIndexScan(AnnIndexScanState* index_state, EState* estate) +{ + AnnIndexScan* plan = NULL; + Relation current_relation = NULL; + Partition table_partition = NULL; + Partition index_partition = NULL; + + index_state->ss.partitions = NIL; + index_state->ss.ss_currentPartition = NULL; + index_state->iss_IndexPartitionList = NIL; + index_state->iss_CurrentIndexPartition = NULL; + + plan = (AnnIndexScan*)index_state->ss.ps.plan; + current_relation = index_state->ss.ss_currentRelation; + + if (plan->scan.itrs > 0) { + Oid indexid = plan->indexid; + bool relisTarget = false; + Partition indexpartition = NULL; + LOCKMODE lock; + + /* + * get relation's lockmode that hangs on whether + * it's one of the target relations of the query + */ + relisTarget = ExecRelationIsTargetRelation(estate, plan->scan.scanrelid); + lock = (relisTarget ? RowExclusiveLock : AccessShareLock); + index_state->ss.lockMode = lock; + index_state->lockMode = lock; + + PruningResult* resultPlan = NULL; + + if (plan->scan.pruningInfo->expr != NULL) { + resultPlan = GetPartitionInfo(plan->scan.pruningInfo, estate, current_relation); + } else { + resultPlan = plan->scan.pruningInfo; + } + + if (resultPlan->ls_rangeSelectedPartitions != NULL) { + index_state->ss.part_id = resultPlan->ls_rangeSelectedPartitions->length; + } else { + index_state->ss.part_id = 0; + } + + ListCell* cell1 = NULL; + ListCell* cell2 = NULL; + List* part_seqs = resultPlan->ls_rangeSelectedPartitions; + List* partitionnos = resultPlan->ls_selectedPartitionnos; + Assert(list_length(part_seqs) == list_length(partitionnos)); + StringInfo partNameInfo = makeStringInfo(); + StringInfo partOidInfo = makeStringInfo(); + + forboth (cell1, part_seqs, cell2, partitionnos) { + Oid tablepartitionid = InvalidOid; + Oid indexpartitionid = InvalidOid; + List* partitionIndexOidList = NIL; + int partSeq = lfirst_int(cell1); + int partitionno = lfirst_int(cell2); + + /* get table partition and add it to a list for following scan */ + tablepartitionid = getPartitionOidFromSequence(current_relation, partSeq, partitionno); + table_partition = + PartitionOpenWithPartitionno(current_relation, tablepartitionid, partitionno, lock, true); + /* Skip concurrent dropped partitions */ + if (table_partition == NULL) { + continue; + } + index_state->ss.partitions = lappend(index_state->ss.partitions, table_partition); + + appendStringInfo(partNameInfo, "%s ", table_partition->pd_part->relname.data); + appendStringInfo(partOidInfo, "%u ", tablepartitionid); + + if (RelationIsSubPartitioned(current_relation)) { + ListCell *lc1 = NULL; + ListCell *lc2 = NULL; + SubPartitionPruningResult* subPartPruningResult = + GetSubPartitionPruningResult(resultPlan->ls_selectedSubPartitions, partSeq, partitionno); + if (subPartPruningResult == NULL) { + continue; + } + List *subpartList = subPartPruningResult->ls_selectedSubPartitions; + List *subpartitionnos = subPartPruningResult->ls_selectedSubPartitionnos; + Assert(list_length(subpartList) == list_length(subpartitionnos)); + List *subIndexList = NULL; + List *subRelationList = NULL; + + forboth (lc1, subpartList, lc2, subpartitionnos) + { + int subpartSeq = lfirst_int(lc1); + int subpartitionno = lfirst_int(lc2); + Relation tablepartrel = partitionGetRelation(current_relation, table_partition); + Oid subpartitionid = getPartitionOidFromSequence(tablepartrel, subpartSeq, subpartitionno); + Partition subpart = PartitionOpenWithPartitionno(tablepartrel, subpartitionid, subpartitionno, + AccessShareLock, true); + /* Skip concurrent dropped partitions */ + if (subpart == NULL) { + continue; + } + + partitionIndexOidList = PartitionGetPartIndexList(subpart); + + Assert(partitionIndexOidList != NULL); + if (!PointerIsValid(partitionIndexOidList)) { + ereport(ERROR, (errmodule(MOD_OPT), errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("no local indexes found for partition %s", + PartitionGetPartitionName(subpart)))); + } + + indexpartitionid = searchPartitionIndexOid(indexid, partitionIndexOidList); + list_free_ext(partitionIndexOidList); + indexpartition = partitionOpen(index_state->iss_RelationDesc, indexpartitionid, AccessShareLock); + + releaseDummyRelation(&tablepartrel); + + if (indexpartition->pd_part->indisusable == false) { + ereport( + ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), errmodule(MOD_EXECUTOR), + errmsg( + "can't initialize bitmap index scans using unusable local index \"%s\" for partition", + PartitionGetPartitionName(indexpartition)))); + } + + subIndexList = lappend(subIndexList, indexpartition); + subRelationList = lappend(subRelationList, subpart); + } + + index_state->iss_IndexPartitionList = lappend(index_state->iss_IndexPartitionList, + subIndexList); + index_state->ss.subpartitions = lappend(index_state->ss.subpartitions, subRelationList); + index_state->ss.subPartLengthList = + lappend_int(index_state->ss.subPartLengthList, list_length(subIndexList)); + } else { + /* get index partition and add it to a list for following scan */ + partitionIndexOidList = PartitionGetPartIndexList(table_partition); + Assert(PointerIsValid(partitionIndexOidList)); + if (!PointerIsValid(partitionIndexOidList)) { + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("no local indexes found for partition %s", + PartitionGetPartitionName(table_partition)))); + } + indexpartitionid = searchPartitionIndexOid(indexid, partitionIndexOidList); + list_free_ext(partitionIndexOidList); + + index_partition = partitionOpen(index_state->iss_RelationDesc, indexpartitionid, lock); + if (index_partition->pd_part->indisusable == false) { + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("can't initialize index scans using unusable local index \"%s\"", + PartitionGetPartitionName(index_partition)))); + } + index_state->iss_IndexPartitionList = lappend(index_state->iss_IndexPartitionList, index_partition); + } + } + /* + * Set the total scaned num of partition from level 1 partition, subpartition + * list is drilled down into node->subpartitions for each node_partition entry; + * + * Note: we do not set is value from select partittins from pruning-result as some of + * pre-pruned partitions could be dropped from conecurrent DDL, node->partitions + * is refreshed partition list to be scanned; + */ + if (index_state->ss.partitions != NULL) { + index_state->ss.part_id = list_length(index_state->ss.partitions); + } else { + index_state->ss.part_id = 0; + } + } +} \ No newline at end of file diff --git a/src/gausskernel/runtime/executor/nodePartIterator.cpp b/src/gausskernel/runtime/executor/nodePartIterator.cpp index 50e320ddc980dd948b534be31f2c663b5b063171..ab852a318720f7c6c03507ad43761aa2c865a331 100755 --- a/src/gausskernel/runtime/executor/nodePartIterator.cpp +++ b/src/gausskernel/runtime/executor/nodePartIterator.cpp @@ -71,6 +71,7 @@ static int GetScanPartitionNum(PartIteratorState* node, PlanState* noden) case T_SeqScanState: case T_IndexScanState: case T_IndexOnlyScanState: + case T_AnnIndexScanState: case T_BitmapHeapScanState: case T_TidScanState: partitionScan = ((ScanState*)noden)->part_id; @@ -120,7 +121,8 @@ static void InitScanPartition(PartIteratorState* node, int partitionScan, PlanSt if (IsA(noden, VecToRowState)) { subPartLengthList = ((VecToRowState *)noden)->subPartLengthList; } else if (IsA(noden, ScanState) || IsA(noden, SeqScanState) || IsA(noden, IndexOnlyScanState) || - IsA(noden, IndexScanState) || IsA(noden, BitmapHeapScanState) || IsA(noden, TidScanState)) { + IsA(noden, IndexScanState) || IsA(noden, BitmapHeapScanState) || IsA(noden, TidScanState) || + IsA(noden, AnnIndexScanState)) { subPartLengthList = ((ScanState *)noden)->subPartLengthList; } @@ -194,7 +196,8 @@ static TupleTableSlot* ExecPartIterator(PlanState* planState) if (IsA(noden, VecToRowState)) { subPartLengthList = ((VecToRowState *)noden)->subPartLengthList; } else if (IsA(noden, ScanState) || IsA(noden, SeqScanState) || IsA(noden, IndexOnlyScanState) || - IsA(noden, IndexScanState) || IsA(noden, BitmapHeapScanState) || IsA(noden, TidScanState)) { + IsA(noden, IndexScanState) || IsA(noden, BitmapHeapScanState) || IsA(noden, TidScanState) || + IsA(noden, AnnIndexScanState)) { subPartLengthList = ((ScanState *)noden)->subPartLengthList; } diff --git a/src/gausskernel/runtime/executor/nodeStub.cpp b/src/gausskernel/runtime/executor/nodeStub.cpp index 760185b3ebc71ebd9203c6655b7d5ebc5315b084..a9a90090391641fea8eb40e48952433292bcdd3f 100644 --- a/src/gausskernel/runtime/executor/nodeStub.cpp +++ b/src/gausskernel/runtime/executor/nodeStub.cpp @@ -20,6 +20,7 @@ #include "executor/node/nodeSeqscan.h" #include "executor/node/nodeIndexscan.h" #include "executor/node/nodeIndexonlyscan.h" +#include "executor/node/nodeAnnIndexscan.h" #include "executor/node/nodeBitmapIndexscan.h" #include "executor/node/nodeBitmapHeapscan.h" #include "executor/node/nodeTidscan.h" @@ -178,6 +179,9 @@ void ExecEndNodeStubScan(PlanState* node) #ifdef USE_SPQ case T_SpqBitmapHeapScan: #endif + case T_AnnIndexScan: + ExecEndAnnIndexScan((AnnIndexScanState*)node); + break; case T_BitmapIndexScan: ExecEndBitmapIndexScan((BitmapIndexScanState*)node); break; diff --git a/src/gausskernel/runtime/executor/nodeSubplan.cpp b/src/gausskernel/runtime/executor/nodeSubplan.cpp index 5f78ba810d63bc3ee5f828de39a2dacd87bdc9f0..8780e393598dddf527e05008bdce86c44545917a 100644 --- a/src/gausskernel/runtime/executor/nodeSubplan.cpp +++ b/src/gausskernel/runtime/executor/nodeSubplan.cpp @@ -993,7 +993,7 @@ void ExecSetParamPlan(SubPlanState* node, ExprContext* econtext) if (u_sess->parser_cxt.has_set_uservar && DB_IS_CMPT(B_FORMAT)) { if (IsA(planstate, SeqScanState)) { scan_handler_tbl_restrpos(castNode(SeqScanState, planstate)->ss_currentScanDesc); - } else if (IsA(planstate, IndexScanState)) { + } else if (IsA(planstate, IndexScanState) || IsA(planstate, AnnIndexScanState)) { ExecReScan(planstate); } } diff --git a/src/gausskernel/runtime/opfusion/opfusion_scan.cpp b/src/gausskernel/runtime/opfusion/opfusion_scan.cpp index 4991f0d8465307d5804fe9f42238f724c87b7bd7..2b408b07d13d32756272ab246657b5393df62515 100644 --- a/src/gausskernel/runtime/opfusion/opfusion_scan.cpp +++ b/src/gausskernel/runtime/opfusion/opfusion_scan.cpp @@ -58,6 +58,9 @@ ScanFusion* ScanFusion::getScanFusion(Node* node, PlannedStmt* planstmt, ParamLi scan = New(CurrentMemoryContext) IndexOnlyScanFusion((IndexOnlyScan*)node, planstmt, params); break; + case T_AnnIndexScan: + scan = New(CurrentMemoryContext) IndexScanFusion((IndexScan*)node, planstmt, params); + break; default: ereport(ERROR, (errmodule(MOD_EXECUTOR), diff --git a/src/gausskernel/runtime/vecexecutor/vecnode/vecpartiterator.cpp b/src/gausskernel/runtime/vecexecutor/vecnode/vecpartiterator.cpp index 7c32d462dfb9f73ee5af46231f77e4abc5f2c245..3dbde048c5bbb4142a01f41edd19837f60b810bd 100644 --- a/src/gausskernel/runtime/vecexecutor/vecnode/vecpartiterator.cpp +++ b/src/gausskernel/runtime/vecexecutor/vecnode/vecpartiterator.cpp @@ -86,6 +86,7 @@ static int GetVecscanPartitionNum(const PartIteratorState* node) case T_SeqScanState: case T_IndexScanState: case T_IndexOnlyScanState: + case T_AnnIndexScanState: case T_BitmapHeapScanState: partitionScan = scanState->part_id; break; diff --git a/src/gausskernel/storage/access/CMakeLists.txt b/src/gausskernel/storage/access/CMakeLists.txt index 6e9ab44f956db237ae9d19ab9dd64b5ebc5c6c93..6f3f78929d1fb286d41598d22a6f00d4be05398f 100755 --- a/src/gausskernel/storage/access/CMakeLists.txt +++ b/src/gausskernel/storage/access/CMakeLists.txt @@ -22,7 +22,8 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/table ${CMAKE_CURRENT_SOURCE_DIR}/transam ${CMAKE_CURRENT_SOURCE_DIR}/ubtree - ${CMAKE_CURRENT_SOURCE_DIR}/ustore + ${CMAKE_CURRENT_SOURCE_DIR}/ustore + ${CMAKE_CURRENT_SOURCE_DIR}/datavec ) if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") @@ -50,3 +51,4 @@ add_subdirectory(table) add_subdirectory(transam) add_subdirectory(ubtree) add_subdirectory(ustore) +add_subdirectory(datavec) diff --git a/src/gausskernel/storage/access/Makefile b/src/gausskernel/storage/access/Makefile index 52c39249cb494ef720027ac6b318f9bb3977a1d2..fe5b2c81d43e306b5fc323938e93c91f72413a62 100644 --- a/src/gausskernel/storage/access/Makefile +++ b/src/gausskernel/storage/access/Makefile @@ -2,7 +2,7 @@ subdir = src/gausskernel/storage/access top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = cbtree common heap index nbtree ubtree psort rmgrdesc transam obs hash spgist gist gin hbstore redo table ustore +SUBDIRS = cbtree common heap index nbtree ubtree psort rmgrdesc transam obs hash spgist gist gin hbstore redo table ustore datavec ifeq ($(enable_lite_mode), no) SUBDIRS += archive endif diff --git a/src/gausskernel/storage/access/common/reloptions.cpp b/src/gausskernel/storage/access/common/reloptions.cpp index 20484b00e8b2813328200b4abc2bf854343dc2e7..b2609b0a50f74164ede9042c0244b46e4dd64a2a 100644 --- a/src/gausskernel/storage/access/common/reloptions.cpp +++ b/src/gausskernel/storage/access/common/reloptions.cpp @@ -17,6 +17,8 @@ #include "miscadmin.h" #include "knl/knl_variable.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/ivfflat.h" #include "access/gist_private.h" #include "access/hash.h" #include "access/nbtree.h" @@ -123,6 +125,8 @@ static relopt_bool boolRelOpts[] = { {{"compress_diff_convert", "Whether do diiffer convert in compression", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE}, false}, {{"deduplication", "Enables \"deduplication\" feature for btree index", RELOPT_KIND_BTREE}, false}, + {{"enable_pq", "Whether to enable PQ", RELOPT_KIND_HNSW | RELOPT_KIND_IVFFLAT }, GENERIC_DEFAULT_ENABLE_PQ }, + {{"by_residual", "Whether to use residual during IVFPQ", RELOPT_KIND_IVFFLAT}, IVFPQ_DEFAULT_RESIDUAL}, /* list terminator */ {{NULL}}}; @@ -254,6 +258,23 @@ static relopt_int intRelOpts[] = { 7}, {{ "collate", "set relation default collation", RELOPT_KIND_HEAP }, 0, 0, 2000000000 }, {{ "relrewrite", "set relation relrewrite", RELOPT_KIND_HEAP | RELOPT_KIND_TOAST }, 0, 0, 2000000000 }, + {{ "m", "Max number of connections", RELOPT_KIND_HNSW }, HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M }, + {{ "ef_construction", "Size of the dynamic candidate list for construction", RELOPT_KIND_HNSW }, + HNSW_DEFAULT_EF_CONSTRUCTION, + HNSW_MIN_EF_CONSTRUCTION, + HNSW_MAX_EF_CONSTRUCTION }, + {{ "pq_m", "Number of PQ subquantizer", RELOPT_KIND_HNSW |RELOPT_KIND_IVFFLAT}, + GENERIC_DEFAULT_PQ_M, + GENERIC_MIN_PQ_M, + GENERIC_MAX_PQ_M }, + {{ "pq_ksub", "Number of centroids for each PQ subquantizer", RELOPT_KIND_HNSW | RELOPT_KIND_IVFFLAT }, + GENERIC_DEFAULT_PQ_KSUB, + GENERIC_MIN_PQ_KSUB, + GENERIC_MAX_PQ_KSUB }, + {{ "lists", "Number of inverted lists", RELOPT_KIND_IVFFLAT }, + IVFFLAT_DEFAULT_LISTS, + IVFFLAT_MIN_LISTS, + IVFFLAT_MAX_LISTS }, /* list terminator */ {{NULL}} }; @@ -469,7 +490,7 @@ static relopt_string stringRelOpts[] = { }, { {"storage_type", "Specifies the Table accessor routines", - RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_TOAST}, + RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_TOAST | RELOPT_KIND_HNSW}, strlen(TABLE_ACCESS_METHOD_ASTORE), false, ValidateStrOptTableAccessMethod, diff --git a/src/gausskernel/storage/access/datavec/CMakeLists.txt b/src/gausskernel/storage/access/datavec/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1b2a4b6c90fdfc49c25e1c8fb60c244d73167123 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/CMakeLists.txt @@ -0,0 +1,16 @@ +#This is the main CMAKE for build all components. +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_datavec_SRC) + +set(TGT_datavec_INC + ${PROJECT_SRC_DIR}/include + ${LZ4_INCLUDE_PATH} + ${LIBCGROUP_INCLUDE_PATH} + ${EVENT_INCLUDE_PATH} + ${ZLIB_INCLUDE_PATH} +) + +set(datavec_DEF_OPTIONS ${MACRO_OPTIONS}) +set(datavec_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(datavec_LINK_OPTIONS ${BIN_LINK_OPTIONS}) +add_static_objtarget(gausskernel_storage_access_datavec TGT_datavec_SRC TGT_datavec_INC "${datavec_DEF_OPTIONS}" "${datavec_COMPILE_OPTIONS}" "${datavec_LINK_OPTIONS}") + diff --git a/src/gausskernel/storage/access/datavec/Makefile b/src/gausskernel/storage/access/datavec/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..c32985f862e7f2b746527bbadfb442f8f54caf0b --- /dev/null +++ b/src/gausskernel/storage/access/datavec/Makefile @@ -0,0 +1,17 @@ +subdir = src/gausskernel/storage/access/datavec +top_builddir = ../../../../.. +include $(top_builddir)/src/Makefile.global + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif + +OBJS = bitutils.o hnsw.o hnswbuild.o hnswdelete.o hnswinsert.o hnswscan.o hnswutils.o hnswvacuum.o \ + ivfbuild.o ivfflat.o ivfinsert.o ivfkmeans.o ivfscan.o ivfutils.o ivfvacuum.o vecindex.o \ + utils.o hnswadaptor.o ivfadaptor.o + +include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/access/datavec/bitutils.cpp b/src/gausskernel/storage/access/datavec/bitutils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2f8290582e91285bbe24fc57ad596c92f35383ab --- /dev/null +++ b/src/gausskernel/storage/access/datavec/bitutils.cpp @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * bitutils.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/bitutils.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/datavec/bitvec.h" +#include "access/datavec/halfvec.h" /* for USE_DISPATCH and USE_TARGET_CLONES */ +#include "port/pg_bitutils.h" + +#if defined(USE_DISPATCH) +#define BIT_DISPATCH +#endif + +#ifdef BIT_DISPATCH +#include + +#if defined(USE__GET_CPUID) +#include +#else +#include +#endif + +#define TARGET_AVX512_POPCOUNT +#endif + +/* Disable for LLVM due to crash with bitcode generation */ +#if defined(USE_TARGET_CLONES) && !defined(__POPCNT__) && !defined(__llvm__) +#define BIT_TARGET_CLONES __attribute__((target_clones("default", "popcnt"))) +#else +#define BIT_TARGET_CLONES +#endif + +/* Use built-ins when possible for inlining */ +#if defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_INT_64) +#define popcount64(x) __builtin_popcountl(x) +#elif defined(HAVE__BUILTIN_POPCOUNT) && defined(HAVE_LONG_LONG_INT_64) +#define popcount64(x) __builtin_popcountll(x) +#elif !defined(_MSC_VER) +/* Fails to resolve with MSVC */ +#define popcount64(x) pg_popcount64(x) +#endif + +BIT_TARGET_CLONES static uint64 BitHammingDistanceDefault(uint32 bytes, unsigned char *ax, unsigned char *bx, + uint64 distance) +{ +#ifdef popcount64 + errno_t rc = EOK; + for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64)) { + uint64 axs; + uint64 bxs; + + /* Ensure aligned */ + rc = memcpy_s(&axs, sizeof(uint64), ax, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(&bxs, sizeof(uint64), bx, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + + distance += popcount64(axs ^ bxs); + + ax += sizeof(uint64); + bx += sizeof(uint64); + } +#endif + + for (uint32 i = 0; i < bytes; i++) + distance += pg_number_of_ones[ax[i] ^ bx[i]]; + + return distance; +} + +#ifdef BIT_DISPATCH +TARGET_AVX512_POPCOUNT static uint64 BitHammingDistanceAvx512Popcount(uint32 bytes, unsigned char *ax, + unsigned char *bx, uint64 distance) +{ + __m512i dist = _mm512_setzero_si512(); + + for (; bytes >= sizeof(__m512i); bytes -= sizeof(__m512i)) { + __m512i axs = _mm512_loadu_si512((const __m512i *)ax); + __m512i bxs = _mm512_loadu_si512((const __m512i *)bx); + + dist = _mm512_add_epi64(dist, _mm512_popcnt_epi64(_mm512_xor_si512(axs, bxs))); + + ax += sizeof(__m512i); + bx += sizeof(__m512i); + } + + distance += _mm512_reduce_add_epi64(dist); + + return BitHammingDistanceDefault(bytes, ax, bx, distance); +} +#endif + +BIT_TARGET_CLONES static double BitJaccardDistanceDefault(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, + uint64 aa, uint64 bb) +{ +#ifdef popcount64 + errno_t rc = EOK; + for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64)) { + uint64 axs; + uint64 bxs; + + /* Ensure aligned */ + rc = memcpy_s(&axs, sizeof(uint64), ax, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + rc = memcpy_s(&bxs, sizeof(uint64), bx, sizeof(uint64)); + securec_check(rc, "\0", "\0"); + + ab += popcount64(axs & bxs); + aa += popcount64(axs); + bb += popcount64(bxs); + + ax += sizeof(uint64); + bx += sizeof(uint64); + } +#endif + + for (uint32 i = 0; i < bytes; i++) { + ab += pg_number_of_ones[ax[i] & bx[i]]; + aa += pg_number_of_ones[ax[i]]; + bb += pg_number_of_ones[bx[i]]; + } + + if (ab == 0) { + return 1; + } else { + return 1 - (ab / ((double)(aa + bb - ab))); + } +} + +#ifdef BIT_DISPATCH +TARGET_AVX512_POPCOUNT static double BitJaccardDistanceAvx512Popcount(uint32 bytes, unsigned char *ax, + unsigned char *bx, uint64 ab, uint64 aa, + uint64 bb) +{ + __m512i abx = _mm512_setzero_si512(); + __m512i aax = _mm512_setzero_si512(); + __m512i bbx = _mm512_setzero_si512(); + + for (; bytes >= sizeof(__m512i); bytes -= sizeof(__m512i)) { + __m512i axs = _mm512_loadu_si512((const __m512i *)ax); + __m512i bxs = _mm512_loadu_si512((const __m512i *)bx); + + abx = _mm512_add_epi64(abx, _mm512_popcnt_epi64(_mm512_and_si512(axs, bxs))); + aax = _mm512_add_epi64(aax, _mm512_popcnt_epi64(axs)); + bbx = _mm512_add_epi64(bbx, _mm512_popcnt_epi64(bxs)); + + ax += sizeof(__m512i); + bx += sizeof(__m512i); + } + + ab += _mm512_reduce_add_epi64(abx); + aa += _mm512_reduce_add_epi64(aax); + bb += _mm512_reduce_add_epi64(bbx); + + return BitJaccardDistanceDefault(bytes, ax, bx, ab, aa, bb); +} +#endif + +#ifdef BIT_DISPATCH +#define CPU_FEATURE_OSXSAVE (1 << 27) /* F1 ECX */ +#define CPU_FEATURE_AVX512F (1 << 16) /* F7,0 EBX */ +#define CPU_FEATURE_AVX512VPOPCNTDQ (1 << 14) /* F7,0 ECX */ + +#ifdef _MSC_VER +#define TARGET_XSAVE +#else +#define TARGET_XSAVE __attribute__((target("xsave"))) +#endif + +TARGET_XSAVE static bool SupportsAvx512Popcount() +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(USE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuid(exx, 1); +#endif + + /* Check OS supports XSAVE */ + if ((exx[2] & CPU_FEATURE_OSXSAVE) != CPU_FEATURE_OSXSAVE) + return false; + + /* Check XMM, YMM, and ZMM registers are enabled */ + if ((_xgetbv(0) & 0xe6) != 0xe6) + return false; + +#if defined(USE__GET_CPUID) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#else + __cpuidex(exx, 7, 0); +#endif + + /* Check AVX512F */ + if ((exx[1] & CPU_FEATURE_AVX512F) != CPU_FEATURE_AVX512F) + return false; + + /* Check AVX512VPOPCNTDQ */ + return (exx[2] & CPU_FEATURE_AVX512VPOPCNTDQ) == CPU_FEATURE_AVX512VPOPCNTDQ; +} +#endif + +void BitvecInit(void) +{ + /* + * Could skip pointer when single function, but no difference in + * performance + */ + BitHammingDistance = BitHammingDistanceDefault; + BitJaccardDistance = BitJaccardDistanceDefault; + +#ifdef BIT_DISPATCH + if (SupportsAvx512Popcount()) { + BitHammingDistance = BitHammingDistanceAvx512Popcount; + BitJaccardDistance = BitJaccardDistanceAvx512Popcount; + } +#endif +} diff --git a/src/gausskernel/storage/access/datavec/hnsw.cpp b/src/gausskernel/storage/access/datavec/hnsw.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f83bf5afd83f7acb5af6a05f475d70ae7a1f0356 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnsw.cpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnsw.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnsw.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "access/amapi.h" +#include "access/reloptions.h" +#include "commands/vacuum.h" +#include "access/datavec/hnsw.h" +#include "miscadmin.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" + +int hnsw_lock_tranche_id; + +/* + * Estimate the cost of an index scan + */ +static void hnswcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation) +{ + GenericCosts costs; + int m; + int entryLevel; + Relation index; + + /* Never use index without order */ + if (path->indexorderbys == NULL) { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + HnswGetMetaPageInfo(index, &m, NULL); + index_close(index, NoLock); + + /* Approximate entry level */ + entryLevel = (int)-log(1.0 / path->indexinfo->tuples) * HnswGetMl(m); + + /* TODO Improve estimate of visited tuples (currently underestimates) */ + /* Account for number of tuples (or entry level), m, and ef_search */ + costs.numIndexTuples = (entryLevel + 2) * m; + + genericcostestimate(root, path, loop_count, costs.numIndexTuples, &costs.indexStartupCost, &costs.indexTotalCost, + &costs.indexSelectivity, &costs.indexCorrelation); + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; +} + +/* + * Parse and validate the reloptions + */ +static bytea *hnswoptions_internal(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"m", RELOPT_TYPE_INT, offsetof(HnswOptions, m)}, + {"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)}, + {"enable_pq", RELOPT_TYPE_BOOL, offsetof(HnswOptions, enablePQ)}, + {"pq_m", RELOPT_TYPE_INT, offsetof(HnswOptions, pqM)}, + {"pq_ksub", RELOPT_TYPE_INT, offsetof(HnswOptions, pqKsub)}, + {"parallel_workers", RELOPT_TYPE_INT, offsetof(StdRdOptions, parallel_workers)}, + {"storage_type", RELOPT_TYPE_STRING, offsetof(HnswOptions, storage_type)}}; + + relopt_value *options; + int numoptions; + HnswOptions *rdopts; + + options = parseRelOptions(reloptions, validate, RELOPT_KIND_HNSW, &numoptions); + rdopts = (HnswOptions *)allocateReloptStruct(sizeof(HnswOptions), options, numoptions); + fillRelOptions((void *)rdopts, sizeof(HnswOptions), options, numoptions, validate, tab, lengthof(tab)); + + return (bytea *)rdopts; +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool hnswvalidate_internal(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswhandler); +Datum hnswhandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = HNSW_FUNC_NUM; + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + errno_t rc = 0; + rc = strcpy_s(amroutine->ambuildfuncname, NAMEDATALEN, "hnswbuild"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambuildemptyfuncname, NAMEDATALEN, "hnswbuildempty"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->aminsertfuncname, NAMEDATALEN, "hnswinsert"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambulkdeletefuncname, NAMEDATALEN, "hnswbulkdelete"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvacuumcleanupfuncname, NAMEDATALEN, "hnswvacuumcleanup"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amcostestimatefuncname, NAMEDATALEN, "hnswcostestimate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amoptionsfuncname, NAMEDATALEN, "hnswoptions"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvalidatefuncname, NAMEDATALEN, "hnswvalidate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambeginscanfuncname, NAMEDATALEN, "hnswbeginscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amrescanfuncname, NAMEDATALEN, "hnswrescan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amgettuplefuncname, NAMEDATALEN, "hnswgettuple"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amendscanfuncname, NAMEDATALEN, "hnswendscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amdeletefuncname, NAMEDATALEN, "hnswdelete"); + securec_check(rc, "\0", "\0"); + + PG_RETURN_POINTER(amroutine); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbuild); +Datum hnswbuild(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "hnsw index do not support extreme rto."); + } + Relation heap = (Relation)PG_GETARG_POINTER(0); + Relation index = (Relation)PG_GETARG_POINTER(1); + IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2); + IndexBuildResult *result = hnswbuild_internal(heap, index, indexinfo); + + PG_RETURN_POINTER(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbuildempty); +Datum hnswbuildempty(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "hnsw index do not support extreme rto."); + } + Relation index = (Relation)PG_GETARG_POINTER(0); + hnswbuildempty_internal(index); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswinsert); +Datum hnswinsert(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "hnsw index do not support extreme rto."); + } + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + bool *isnull = reinterpret_cast(PG_GETARG_POINTER(2)); + ItemPointer ht_ctid = (ItemPointer)PG_GETARG_POINTER(3); + Relation heaprel = (Relation)PG_GETARG_POINTER(4); + IndexUniqueCheck checkunique = (IndexUniqueCheck)PG_GETARG_INT32(5); + bool result = hnswinsert_internal(rel, values, isnull, ht_ctid, heaprel, checkunique); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbulkdelete); +Datum hnswbulkdelete(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "hnsw index do not support extreme rto."); + } + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback)PG_GETARG_POINTER(2); + void *callbackState = static_cast(PG_GETARG_POINTER(3)); + stats = hnswbulkdelete_internal(info, stats, callback, callbackState); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswvacuumcleanup); +Datum hnswvacuumcleanup(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "hnsw index do not support extreme rto."); + } + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + stats = hnswvacuumcleanup_internal(info, stats); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswcostestimate); +Datum hnswcostestimate(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0); + IndexPath *path = (IndexPath *)PG_GETARG_POINTER(1); + double loopcount = static_cast(PG_GETARG_FLOAT8(2)); + Cost *startupcost = (Cost *)PG_GETARG_POINTER(3); + Cost *totalcost = (Cost *)PG_GETARG_POINTER(4); + Selectivity *selectivity = (Selectivity *)PG_GETARG_POINTER(5); + double *correlation = reinterpret_cast(PG_GETARG_POINTER(6)); + hnswcostestimate_internal(root, path, loopcount, startupcost, totalcost, selectivity, correlation); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswoptions); +Datum hnswoptions(PG_FUNCTION_ARGS) +{ + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result = hnswoptions_internal(reloptions, validate); + + if (NULL != result) + PG_RETURN_BYTEA_P(result); + + PG_RETURN_NULL(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswvalidate); +Datum hnswvalidate(PG_FUNCTION_ARGS) +{ + Oid opclassoid = PG_GETARG_OID(0); + bool result = hnswvalidate_internal(opclassoid); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswbeginscan); +Datum hnswbeginscan(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); + IndexScanDesc scan = hnswbeginscan_internal(rel, nkeys, norderbys); + + PG_RETURN_POINTER(scan); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswrescan); +Datum hnswrescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey)PG_GETARG_POINTER(1); + int nkeys = PG_GETARG_INT32(2); + ScanKey orderbys = (ScanKey)PG_GETARG_POINTER(3); + int norderbys = PG_GETARG_INT32(4); + hnswrescan_internal(scan, scankey, nkeys, orderbys, norderbys); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswgettuple); +Datum hnswgettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanDirection direction = (ScanDirection)PG_GETARG_INT32(1); + + if (NULL == scan) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function hnswgettuple"))); + + bool result = hnswgettuple_internal(scan, direction); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswendscan); +Datum hnswendscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + hnswendscan_internal(scan); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnswdelete); +Datum hnswdelete(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + const bool *isnull = (const bool *)PG_GETARG_POINTER(2); + ItemPointer heapTCtid = (ItemPointer)PG_GETARG_POINTER(3); + bool isRollbackIndex = (bool)PG_GETARG_POINTER(4); + + bool result = hnswdelete_internal(rel, values, isnull, heapTCtid, isRollbackIndex); + + PG_RETURN_BOOL(result); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/datavec/hnswadaptor.cpp b/src/gausskernel/storage/access/datavec/hnswadaptor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..21684b415a010c5635d782c8cb0250e6ed1d7529 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswadaptor.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswadaptor.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswadaptor.cpp + * + * ------------------------------------------------------------------------- + */ +#include +#include "access/datavec/hnsw.h" +#include "access/datavec/utils.h" + +// return PQ_ERROR if error occurs +#define PQ_RETURN_IFERR(ret) \ + do { \ + int _status_ = (ret); \ + if (SECUREC_UNLIKELY(_status_ != PQ_SUCCESS)) { \ + return _status_; \ + } \ + } while (0) + +int pq_resolve_path(char* absolute_path, const char* raw_path, const char* filename) +{ + char path[MAX_PATH_LEN] = { 0 }; + + if (!realpath(raw_path, path)) { + if (errno != ENOENT && errno != EACCES) { + return PQ_ERROR; + } + } + + int ret = snprintf_s(absolute_path, MAX_PATH_LEN, MAX_PATH_LEN - 1, "%s/%s", path, filename); + if (ret < 0) { + return PQ_ERROR; + } + return PQ_SUCCESS; +} + +int pq_load_symbol(char *symbol, void **sym_lib_handle) +{ +#ifndef WIN32 + const char *dlsym_err = NULL; + + *sym_lib_handle = dlsym(g_pq_func.handle, symbol); + dlsym_err = dlerror(); + if (dlsym_err != NULL) { + ereport(FATAL, (errcode(ERRCODE_INVALID_OPERATION), + errmsg("incompatible library \"%s\", load %s failed, %s", PQ_SO_NAME, symbol, dlsym_err))); + return PQ_ERROR; + } +#endif // !WIN32 + return PQ_SUCCESS; +} + +#define PQ_LOAD_SYMBOL_FUNC(func) pq_load_symbol(#func, (void **)&g_pq_func.func) + +int pq_open_dl(void **lib_handle, char *symbol) +{ +#ifndef WIN32 + *lib_handle = dlopen(symbol, RTLD_LAZY); + if (*lib_handle == NULL) { + ereport(ERROR, (errcode_for_file_access(), errmsg("could not load library %s, %s", PQ_SO_NAME, dlerror()))); + return PQ_ERROR; + } + return PQ_SUCCESS; +#else + return PQ_ERROR; +#endif +} + +void pq_close_dl(void *lib_handle) +{ +#ifndef WIN32 + (void)dlclose(lib_handle); +#endif +} + +int pq_load_symbols(char *lib_dl_path) +{ + PQ_RETURN_IFERR(pq_open_dl(&g_pq_func.handle, lib_dl_path)); + + PQ_RETURN_IFERR(PQ_LOAD_SYMBOL_FUNC(ComputePQTable)); + PQ_RETURN_IFERR(PQ_LOAD_SYMBOL_FUNC(ComputeVectorPQCode)); + PQ_RETURN_IFERR(PQ_LOAD_SYMBOL_FUNC(GetPQDistanceTableSdc)); + PQ_RETURN_IFERR(PQ_LOAD_SYMBOL_FUNC(GetPQDistanceTableAdc)); + PQ_RETURN_IFERR(PQ_LOAD_SYMBOL_FUNC(GetPQDistance)); + + return PQ_SUCCESS; +} + +int pq_func_init() +{ + if (g_pq_func.inited) { + return PQ_SUCCESS; + } + + char lib_dl_path[MAX_PATH_LEN] = { 0 }; + char* raw_path = getenv(PQ_ENV_PATH); + if (raw_path == nullptr) { + ereport(ERROR, (errmsg("failed to get DATAVEC_PQ_LIB_PATH"))); + return PQ_ERROR; + } + + int ret = pq_resolve_path(lib_dl_path, raw_path, PQ_SO_NAME); + if (ret != PQ_SUCCESS) { + ereport(ERROR, (errmsg( + "failed to resolve the path of libvecturbo.so, lib_dl_path %s, raw_path %s", + lib_dl_path, raw_path))); + return PQ_ERROR; + } + + ret = pq_load_symbols(lib_dl_path); + if (ret != PQ_SUCCESS) { + return PQ_ERROR; + } + + g_pq_func.inited = true; + return PQ_SUCCESS; +} + +int PQInit() +{ +#ifdef __x86_64__ + ereport(FATAL, (errmsg("PQ only support in arm."))); +#endif + if (pq_func_init() != PQ_SUCCESS) { + ereport(FATAL, (errmsg("failed to init PQ library"))); + return PQ_ERROR; + } + g_instance.pq_inited = true; + return PQ_SUCCESS; +} + +void PQUinit() +{ + if (!g_instance.attr.attr_storage.enable_pq || ! g_instance.pq_inited) { + return; + } + g_instance.pq_inited = false; + ereport(LOG, (errmsg("datavec PQ uninit"))); + if (g_pq_func.handle != NULL) { + pq_close_dl(g_pq_func.handle); + g_pq_func.handle = NULL; + g_pq_func.inited = false; + } +} + +int ComputePQTable(VectorArray samples, PQParams *params) +{ + return g_pq_func.ComputePQTable(samples, params); +} + +int ComputeVectorPQCode(float *vector, const PQParams *params, uint8 *pqCode) +{ + return g_pq_func.ComputeVectorPQCode(vector, params, pqCode); +} + +int GetPQDistanceTableSdc(const PQParams *params, float *pqDistanceTable) +{ + return g_pq_func.GetPQDistanceTableSdc(params, pqDistanceTable); +} + +int GetPQDistanceTableAdc(float *vector, const PQParams *params, float *pqDistanceTable) +{ + return g_pq_func.GetPQDistanceTableAdc(vector, params, pqDistanceTable); +} + +int GetPQDistance(const uint8 *basecode, const uint8 *querycode, const PQParams *params, + const float *pqDistanceTable, float *pqDistance) +{ + return g_pq_func.GetPQDistance(basecode, querycode, params, pqDistanceTable, pqDistance); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/datavec/hnswbuild.cpp b/src/gausskernel/storage/access/datavec/hnswbuild.cpp new file mode 100644 index 0000000000000000000000000000000000000000..23c2d377a6aa8ec145aacde70fd71f87ac86dc35 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswbuild.cpp @@ -0,0 +1,1584 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswbuild.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswbuild.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "postmaster/bgworker.h" +#include "catalog/index.h" +#include "access/datavec/hnsw.h" +#include "miscadmin.h" +#include "storage/buf/bufmgr.h" +#include "storage/procarray.h" +#include "tcop/tcopprot.h" +#include "utils/datum.h" +#include "utils/memutils.h" +#include "commands/vacuum.h" + +#include "pgstat.h" + +#define CALLBACK_ITEM_POINTER HeapTuple hup + +#define PARALLEL_KEY_HNSW_SHARED UINT64CONST(0xA000000000000001) +#define PARALLEL_KEY_HNSW_AREA UINT64CONST(0xA000000000000002) +#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000003) +#define PROGRESS_CREATEIDX_TUPLES_DONE 0 + +#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2) + +/* + * Add sample + */ +static void AddSample(Datum *values, HnswBuildState *buildstate) +{ + VectorArray samples = buildstate->samples; + int targsamples = samples->maxlen; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + if (buildstate->kmeansnormprocinfo != NULL) { + if (!HnswCheckNorm(buildstate->kmeansnormprocinfo, buildstate->collation, value)) { + return; + } + + value = HnswNormValue(buildstate->typeInfo, buildstate->collation, value); + } + + if (samples->length < targsamples) { + VectorArraySet(samples, samples->length, DatumGetPointer(value)); + samples->length++; + } else { + if (buildstate->rowstoskip < 0) { + buildstate->rowstoskip = anl_get_next_S(samples->length, targsamples, &buildstate->rstate); + } + + if (buildstate->rowstoskip <= 0) { + int k = (int) (targsamples * anl_random_fract()); + Assert(k >= 0 && k < targsamples); + VectorArraySet(samples, k, DatumGetPointer(value)); + } + + buildstate->rowstoskip -= 1; + } +} + +/* + * Callback for sampling + */ +static void SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, + const bool *isnull, bool tupleIsAlive, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *) state; + MemoryContext oldCtx; + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add sample */ + AddSample(values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Sample rows with same logic as ANALYZE + */ +static void SampleRows(HnswBuildState *buildstate) +{ + int targsamples = buildstate->samples->maxlen; + BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap); + + buildstate->rowstoskip = -1; + + BlockSampler_Init(&buildstate->bs, totalblocks, targsamples); + + buildstate->rstate = anl_init_selection_state(targsamples); + while (BlockSampler_HasMore(&buildstate->bs)) { + BlockNumber targblock = BlockSampler_Next(&buildstate->bs); + + tableam_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + false, SampleCallback, (void *) buildstate, NULL, targblock, 1); + } +} + +PQParams *InitPQParamsInMemory(HnswBuildState *buildstate) +{ + PQParams *params = (PQParams*)palloc(sizeof(PQParams)); + params->pqM = buildstate->pqM; + params->pqKsub = buildstate->pqKsub; + params->funcType = getPQfunctionType(buildstate->procinfo, buildstate->normprocinfo); + params->dim = buildstate->dimensions; + params->subItemSize = buildstate->typeInfo->itemSize(buildstate->dimensions / buildstate->pqM); + params->pqTable = buildstate->pqTable; + return params; +} + +static void ComputeHnswPQ(HnswBuildState *buildstate) +{ + MemoryContext pqCtx = AllocSetContextCreate(CurrentMemoryContext, + "Hnsw PQ temporary context", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(pqCtx); + + ComputePQTable(buildstate->samples, buildstate->params); + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(pqCtx); +} + +BlockNumber BlockSamplerGetBlock(BlockSampler bs) +{ + if (BlockSampler_HasMore(bs)) { + return BlockSampler_Next(bs); + } + return InvalidBlockNumber; +} + +static void EstimateRows(Relation onerel, double *totalrows) +{ + int64 targrows = HNSWPQ_DEFAULT_TARGET_ROWS * abs(default_statistics_target); + int64 numrows = 0; /* # rows now in reservoir */ + double samplerows = 0; /* total # rows collected */ + double liverows = 0; /* # live rows seen */ + double deadrows = 0; /* # dead rows seen */ + double rowstoskip = -1; /* -1 means not set yet */ + BlockNumber totalblocks; + TransactionId OldestXmin; + BlockSamplerData bs; + double rstate; + BlockNumber targblock = 0; + BlockNumber sampleblock = 0; + bool estimateTableRownum = false; + bool isAnalyzing = true; + + totalblocks = RelationGetNumberOfBlocks(onerel); + OldestXmin = GetOldestXmin(onerel); + /* Prepare for sampling block numbers */ + BlockSampler_Init(&bs, totalblocks, targrows); + /* Prepare for sampling rows */ + rstate = anl_init_selection_state(targrows); + + while (InvalidBlockNumber != (targblock = BlockSamplerGetBlock(&bs))) { + Buffer targbuffer; + Page targpage; + OffsetNumber targoffset, maxoffset; + + vacuum_delay_point(); + sampleblock++; + + targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock, RBM_NORMAL, NULL); + LockBuffer(targbuffer, BUFFER_LOCK_SHARE); + targpage = BufferGetPage(targbuffer); + + if (RelationIsUstoreFormat(onerel)) { + for (int i = 0; i < onerel->rd_att->natts; i++) { + if (onerel->rd_att->attrs[i].attcacheoff >= 0) { + onerel->rd_att->attrs[i].attcacheoff = -1; + } + } + + TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel), false, onerel->rd_tam_ops); + maxoffset = UHeapPageGetMaxOffsetNumber(targpage); + + /* Inner loop over all tuples on the selected page */ + for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++) { + RowPtr *lp = UPageGetRowPtr(targpage, targoffset); + bool sampleIt = false; + TransactionId xid; + UHeapTuple targTuple; + if (RowPtrIsDeleted(lp)) { + deadrows += 1; + continue; + } + if (!RowPtrIsNormal(lp)) { + if (RowPtrIsDeleted(lp)) { + deadrows += 1; + } + continue; + } + + if (!RowPtrHasStorage(lp)) { + continue; + } + + /* Allocate memory for target tuple. */ + targTuple = UHeapGetTuple(onerel, targbuffer, targoffset); + + switch (UHeapTupleSatisfiesOldestXmin(targTuple, OldestXmin, + targbuffer, true, &targTuple, &xid, NULL, onerel)) { + case UHEAPTUPLE_LIVE: + sampleIt = true; + liverows += 1; + break; + + case UHEAPTUPLE_DEAD: + case UHEAPTUPLE_RECENTLY_DEAD: + /* Count dead and recently-dead rows */ + deadrows += 1; + break; + + case UHEAPTUPLE_INSERT_IN_PROGRESS: + if (TransactionIdIsCurrentTransactionId(xid)) { + sampleIt = true; + liverows += 1; + } + break; + + case UHEAPTUPLE_DELETE_IN_PROGRESS: + if (TransactionIdIsCurrentTransactionId(xid)) { + deadrows += 1; + } else { + liverows += 1; + } + break; + + default: + elog(ERROR, "unexpected UHeapTupleSatisfiesOldestXmin result"); + break; + } + + if (sampleIt) { + ExecStoreTuple(targTuple, slot, InvalidBuffer, false); + + if (numrows >= targrows) { + if (rowstoskip < 0) { + rowstoskip = anl_get_next_S(samplerows, targrows, &rstate); + } + if (rowstoskip <= 0) { + int64 k = (int64)(targrows * anl_random_fract()); + + AssertEreport(k >= 0 && k < targrows, MOD_OPT, + "Index number out of range when replacing tuples."); + } + rowstoskip -= 1; + } + samplerows += 1; + } + + /* Free memory for target tuple. */ + if (targTuple) { + UHeapFreeTuple(targTuple); + } + } + + /* Now release the lock and pin on the page */ + ExecDropSingleTupleTableSlot(slot); + + for (int i = 0; i < onerel->rd_att->natts; i++) { + if (onerel->rd_att->attrs[i].attcacheoff >= 0) { + onerel->rd_att->attrs[i].attcacheoff = -1; + } + } + + goto uheap_end; + } + + maxoffset = PageGetMaxOffsetNumber(targpage); + /* Inner loop over all tuples on the selected page */ + for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++) { + ItemId itemid; + HeapTupleData targtuple; + bool sample_it = false; + + /* IO collector and IO scheduler for analyze statement */ + if (ENABLE_WORKLOAD_CONTROL) + IOSchedulerAndUpdate(IO_TYPE_READ, 10, IO_TYPE_ROW); + + targtuple.t_tableOid = InvalidOid; + targtuple.t_bucketId = InvalidBktId; + HeapTupleCopyBaseFromPage(&targtuple, targpage); + itemid = PageGetItemId(targpage, targoffset); + + if (!ItemIdIsNormal(itemid)) { + if (ItemIdIsDead(itemid)) + deadrows += 1; + continue; + } + + ItemPointerSet(&targtuple.t_self, targblock, targoffset); + + targtuple.t_tableOid = RelationGetRelid(onerel); + targtuple.t_bucketId = RelationGetBktid(onerel); + targtuple.t_data = (HeapTupleHeader)PageGetItem(targpage, itemid); + targtuple.t_len = ItemIdGetLength(itemid); + + switch (HeapTupleSatisfiesVacuum(&targtuple, OldestXmin, targbuffer, isAnalyzing)) { + case HEAPTUPLE_LIVE: + sample_it = true; + liverows += 1; + break; + + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: + /* Count dead and recently-dead rows */ + deadrows += 1; + break; + + case HEAPTUPLE_INSERT_IN_PROGRESS: + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targpage, targtuple.t_data))) { + sample_it = true; + liverows += 1; + } + break; + + case HEAPTUPLE_DELETE_IN_PROGRESS: + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXid(&targtuple))) + deadrows += 1; + else { + sample_it = true; + liverows += 1; + } + break; + + default: + ereport( + ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), errmsg("unexpected HeapTupleSatisfiesVacuum result"))); + break; + } + + if (sample_it) { + if (numrows < targrows) { + if (estimateTableRownum) { + numrows++; + } + } else { + if (rowstoskip < 0) { + rowstoskip = anl_get_next_S(samplerows, targrows, &rstate); + } + + if (rowstoskip <= 0) { + int64 k = (int64)(targrows * anl_random_fract()); + AssertEreport( + k >= 0 && k < targrows, MOD_OPT, "Index number out of range when replacing tuples."); + } + rowstoskip -= 1; + } + samplerows += 1; + } + } + +uheap_end: + UnlockReleaseBuffer(targbuffer); + } + if (bs.m > 0) { + *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); + } else { + *totalrows = 0.0; + } +} + +/* + * Build PQ table + */ +static void BuildPQtable(HnswBuildState *buildstate) +{ + int numSamples; + Relation index = buildstate->index; + + /* Skip samples for unlogged table */ + if (buildstate->heap == NULL) { + numSamples = 1; + } else { + double num; + EstimateRows(buildstate->heap, &num); + numSamples = (int)num; + } + PG_TRY(); + { + /* Sample rows */ + buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions, + buildstate->typeInfo->itemSize(buildstate->dimensions)); + } + PG_CATCH(); + { + ereport(ERROR, (errmsg("memory alloc failed during PQtable sampling, suggest using hnsw without PQ."))); + PG_RE_THROW(); + } + PG_END_TRY(); + if (buildstate->heap != NULL) { + SampleRows(buildstate); + if (buildstate->samples->length < buildstate->pqKsub) { + ereport(NOTICE, + (errmsg("hnsw PQ table created with little data"), + errdetail("This will cause low recall."), + errhint("Drop the index until the table has more data."))); + } + } + ComputeHnswPQ(buildstate); + VectorArrayFree(buildstate->samples); +} + + +/* + * Create the metapage + */ +static void CreateMetaPage(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + HnswMetaPage metap; + + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + + /* Set metapage data */ + metap = HnswPageGetMeta(page); + metap->magicNumber = HNSW_MAGIC_NUMBER; + metap->version = HNSW_VERSION; + metap->dimensions = buildstate->dimensions; + metap->m = buildstate->m; + metap->efConstruction = buildstate->efConstruction; + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + metap->insertPage = InvalidBlockNumber; + + /* set PQ info */ + metap->enablePQ = buildstate->enablePQ; + metap->pqM = buildstate->pqM; + metap->pqKsub = buildstate->pqKsub; + metap->pqcodeSize = buildstate->pqcodeSize; + metap->pqDisTableSize = 0; + metap->pqDisTableNblk = 0; + if (buildstate->enablePQ) { + metap->pqTableSize = (uint32)buildstate->pqTableSize; + metap->pqTableNblk = (uint16)( + (metap->pqTableSize + HNSW_PQTABLE_STORAGE_SIZE - 1) / HNSW_PQTABLE_STORAGE_SIZE); + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + uint32 disTableLen = buildstate->pqM * buildstate->pqKsub * buildstate->pqKsub; + metap->pqDisTableSize = (uint32)disTableLen * sizeof(float); + metap->pqDisTableNblk = (uint16)( + (metap->pqDisTableSize + HNSW_PQTABLE_STORAGE_SIZE - 1) / HNSW_PQTABLE_STORAGE_SIZE); + } + } else { + metap->pqTableSize = 0; + metap->pqTableNblk = 0; + } + + ((PageHeader)page)->pd_lower = ((char *)metap + sizeof(HnswMetaPageData)) - (char *)page; + + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* + * Create the append metapage + */ +static void CreateAppendMetaPage(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + HnswAppendMetaPage appMetap; + int slotTypeNum = 2; + + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + /* Set append metapage data */ + appMetap = HnswPageGetAppendMeta(page); + appMetap->magicNumber = HNSW_MAGIC_NUMBER; + appMetap->version = HNSW_VERSION; + appMetap->dimensions = buildstate->dimensions; + appMetap->m = buildstate->m; + appMetap->efConstruction = buildstate->efConstruction; + appMetap->entryBlkno = InvalidBlockNumber; + appMetap->entryOffno = InvalidOffsetNumber; + appMetap->entryLevel = -1; + + /* set PQ info */ + appMetap->enablePQ = buildstate->enablePQ; + appMetap->pqM = buildstate->pqM; + appMetap->pqKsub = buildstate->pqKsub; + appMetap->pqcodeSize = buildstate->pqcodeSize; + appMetap->pqDisTableSize = 0; + appMetap->pqDisTableNblk = 0; + if (buildstate->enablePQ) { + appMetap->pqTableSize = (uint32)buildstate->pqTableSize; + appMetap->pqTableNblk = (uint16)( + (appMetap->pqTableSize + HNSW_PQTABLE_STORAGE_SIZE - 1) / HNSW_PQTABLE_STORAGE_SIZE); + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + uint32 disTableLen = buildstate->pqM * buildstate->pqKsub * buildstate->pqKsub; + appMetap->pqDisTableSize = (uint32)disTableLen * sizeof(float); + appMetap->pqDisTableNblk = (uint16)( + (appMetap->pqDisTableSize + HNSW_PQTABLE_STORAGE_SIZE - 1) / HNSW_PQTABLE_STORAGE_SIZE); + } + } else { + appMetap->pqTableSize = 0; + appMetap->pqTableNblk = 0; + } + + /* set slot info */ + appMetap->npages = + (HNSW_DEFAULT_NPAGES_PER_SLOT * slotTypeNum) < (g_instance.attr.attr_storage.NBuffers / HNSW_BUFFER_THRESHOLD) + ? HNSW_DEFAULT_NPAGES_PER_SLOT + : (g_instance.attr.attr_storage.NBuffers / (slotTypeNum * HNSW_BUFFER_THRESHOLD)); + appMetap->slotStartBlkno = HNSW_PQTABLE_START_BLKNO + appMetap->pqTableNblk + appMetap->pqDisTableNblk; + appMetap->elementInsertSlot = InvalidBlockNumber; + appMetap->neighborInsertSlot = InvalidBlockNumber; + + ((PageHeader)page)->pd_lower = ((char *)appMetap + sizeof(HnswAppendMetaPageData)) - (char *)page; + + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* + * Create PQ-related pages + */ +static void CreatePQPages(HnswBuildState *buildstate) +{ + uint16 nblks; + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Buffer buf; + Page page; + uint16 pqTableNblk; + uint16 pqDisTableNblk; + + HnswGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &pqDisTableNblk, NULL); + + /* create pq table page */ + for (uint16 i = 0; i < pqTableNblk; i++) { + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } + + /* create pq distance table page */ + for (uint16 i = 0; i < pqDisTableNblk; i++) { + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } +} + +/* + * Add a new page + */ +static void HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, ForkNumber forkNum) +{ + /* Add a new page */ + Buffer newbuf = HnswNewBuffer(index, forkNum); + + /* Update previous page */ + HnswPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Commit */ + MarkBufferDirty(*buf); + UnlockReleaseBuffer(*buf); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); + CHECK_FOR_INTERRUPTS(); + LockBuffer(newbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Prepare new page */ + *buf = newbuf; + *page = BufferGetPage(*buf); + HnswInitPage(*buf, *page); +} + +/* + * Create graph pages + */ +static void CreateGraphPages(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + Size maxSize; + HnswElementTuple etup; + HnswNeighborTuple ntup; + BlockNumber insertPage; + HnswElement entryPoint; + Buffer buf; + Page page; + HnswElementPtr iter = buildstate->graph->head; + char *base = buildstate->hnswarea; + IndexTransInfo *idxXid; + Size pqcodesSize = buildstate->pqcodeSize; + + /* Calculate sizes */ + maxSize = HNSW_MAX_SIZE; + + /* Allocate once */ + etup = (HnswElementTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + + /* Prepare first page */ + buf = HnswNewBuffer(index, forkNum); + page = BufferGetPage(buf); + HnswInitPage(buf, page); + + /* Check vector and pqcode can be on the same page */ + if (!HnswPtrIsNull(base, buildstate->graph->head)) { + HnswElement head = (HnswElement)HnswPtrAccess(base, buildstate->graph->head); + Size elementSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY((Pointer)HnswPtrAccess(base, head->value))); + if (PageGetFreeSpace(page) < elementSize + MAXALIGN(pqcodesSize)) { + int maxPQcodeSize = ((PageGetFreeSpace(page) - elementSize) / 8) * 8; + ereport(ERROR, (errmsg("vector and pqcode must be on the same page, max pq_m is %d", maxPQcodeSize))); + } + } + + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + + while (!HnswPtrIsNull(base, iter)) { + HnswElement element = (HnswElement)HnswPtrAccess(base, iter); + Size etupSize; + Size ntupSize; + Size combinedSize; + Pointer valuePtr = (Pointer)HnswPtrAccess(base, element->value); + + /* Update iterator */ + iter = element->next; + + /* Zero memory for each element */ + MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr)); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m); + combinedSize = etupSize + MAXALIGN(pqcodesSize) + ntupSize + sizeof(ItemIdData); + + if (buildstate->isUStore) { + combinedSize += sizeof(IndexTransInfo); + } + + /* Initial size check */ + if (etupSize > HNSW_TUPLE_ALLOC_SIZE) { + elog(ERROR, "index tuple too large"); + } + + HnswSetElementTuple(base, etup, element); + + /* Keep element and neighbors on the same page if possible */ + if (PageGetFreeSpace(page) < etupSize + MAXALIGN(pqcodesSize) || + (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize)) { + HnswBuildAppendPage(index, &buf, &page, forkNum); + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } + + /* Calculate offsets */ + element->blkno = BufferGetBlockNumber(buf); + element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (combinedSize <= maxSize) { + element->neighborPage = element->blkno; + element->neighborOffno = OffsetNumberNext(element->offno); + } else { + element->neighborPage = element->blkno + 1; + element->neighborOffno = FirstOffsetNumber; + } + + ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno); + + if (buildstate->enablePQ) { + ((PageHeader)page)->pd_upper -= MAXALIGN(pqcodesSize); + Pointer codePtr = (Pointer) HnswPtrAccess(base, element->pqcodes); + errno_t rc = memcpy_s( + ((char*)page) + ((PageHeader)page)->pd_upper, pqcodesSize, codePtr, pqcodesSize); + securec_check_c(rc, "\0", "\0"); + } + + if (buildstate->isUStore) { + ((PageHeader)page)->pd_upper -= sizeof(IndexTransInfo); + idxXid = (IndexTransInfo *)(((char *)page) + ((PageHeader)page)->pd_upper); + idxXid->xmin = FrozenTransactionId; + idxXid->xmax = InvalidTransactionId; + } + + /* Add element */ + if (PageAddItem(page, (Item)etup, etupSize, InvalidOffsetNumber, false, false) != element->offno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + /* Add new page if needed */ + if (PageGetFreeSpace(page) < ntupSize) { + HnswBuildAppendPage(index, &buf, &page, forkNum); + if (buildstate->isUStore) { + HnswPageGetOpaque(page)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } + /* Add placeholder for neighbors */ + if (PageAddItem(page, (Item)ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } + + insertPage = BufferGetBlockNumber(buf); + + /* Commit */ + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + + entryPoint = (HnswElement)HnswPtrAccess(base, buildstate->graph->entryPoint); + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, entryPoint, insertPage, forkNum, true); + + pfree(etup); + pfree(ntup); +} + +/* + * Write neighbor tuples + */ +static void WriteNeighborTuples(HnswBuildState *buildstate) +{ + Relation index = buildstate->index; + ForkNumber forkNum = buildstate->forkNum; + int m = buildstate->m; + HnswElementPtr iter = buildstate->graph->head; + char *base = buildstate->hnswarea; + HnswNeighborTuple ntup; + + /* Allocate once */ + ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + + while (!HnswPtrIsNull(base, iter)) { + HnswElement element = (HnswElement)HnswPtrAccess(base, iter); + Buffer buf; + Page page; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); + + /* Update iterator */ + iter = element->next; + + /* Zero memory for each element */ + MemSet(ntup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = ReadBufferExtended(index, forkNum, element->neighborPage, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + + HnswSetNeighborTuple(base, ntup, element, m); + + if (!page_index_tuple_overwrite(page, element->neighborOffno, (Item)ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } + + pfree(ntup); +} + +/* + * Flush pages + */ +static void FlushPages(HnswBuildState *buildstate) +{ +#ifdef HNSW_MEMORY + elog(INFO, "memory: %zu MB", buildstate->graph->memoryUsed / (1024 * 1024)); +#endif + + CreateMetaPage(buildstate); + if (buildstate->enablePQ) { + CreatePQPages(buildstate); + /* Save PQ table and distance table */ + FlushPQInfo(buildstate); + } + CreateGraphPages(buildstate); + WriteNeighborTuples(buildstate); + + buildstate->graph->flushed = true; + MemoryContextReset(buildstate->graphCtx); +} + +/* + * Add a heap TID to an existing element + */ +static bool AddDuplicateInMemory(HnswElement element, HnswElement dup) +{ + LWLockAcquire(&dup->lock, LW_EXCLUSIVE); + + if (dup->heaptidsLength == HNSW_HEAPTIDS) { + LWLockRelease(&dup->lock); + return false; + } + + HnswAddHeapTid(dup, &element->heaptids[0]); + + LWLockRelease(&dup->lock); + + return true; +} + +/* + * Find duplicate element + */ +static bool FindDuplicateInMemory(char *base, HnswElement element) +{ + HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, 0); + Datum value = HnswGetValue(base, element); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *neighbor = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, neighbor->element); + Datum neighborValue = HnswGetValue(base, neighborElement); + /* Exit early since ordered by distance */ + if (!datumIsEqual(value, neighborValue, false, -1)) + return false; + + /* Check for space */ + if (AddDuplicateInMemory(element, neighborElement)) + return true; + } + + return false; +} + +/* + * Add to element list + */ +static void AddElementInMemory(char *base, HnswGraph *graph, HnswElement element) +{ + SpinLockAcquire(&graph->lock); + element->next = graph->head; + HnswPtrStore(base, graph->head, element); + SpinLockRelease(&graph->lock); +} + +/* + * Update neighbors + */ +static void UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswElement e, int m) +{ + for (int lc = e->level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, hc->element); + + if (neighborElement == NULL) { + continue; + } + + /* Use element for lock instead of hc since hc can be replaced */ + LWLockAcquire(&neighborElement->lock, LW_EXCLUSIVE); + HnswUpdateConnection(base, e, hc, lm, lc, NULL, NULL, procinfo, collation); + LWLockRelease(&neighborElement->lock); + } + } +} + +/* + * Update graph in memory + */ +static void UpdateGraphInMemory(FmgrInfo *procinfo, Oid collation, HnswElement element, int m, int efConstruction, + HnswElement entryPoint, HnswBuildState *buildstate) +{ + HnswGraph *graph = buildstate->graph; + char *base = buildstate->hnswarea; + + /* Look for duplicate */ + if (FindDuplicateInMemory(base, element)) { + return; + } + + /* Add element */ + AddElementInMemory(base, graph, element); + + /* Update neighbors */ + UpdateNeighborsInMemory(base, procinfo, collation, element, m); + + /* Update entry point if needed (already have lock) */ + if (entryPoint == NULL || element->level > entryPoint->level) { + HnswPtrStore(base, graph->entryPoint, element); + } +} + +/* + * Insert tuple in memory + */ +static void InsertTupleInMemory(HnswBuildState *buildstate, HnswElement element) +{ + FmgrInfo *procinfo = buildstate->procinfo; + Oid collation = buildstate->collation; + HnswGraph *graph = buildstate->graph; + HnswElement entryPoint; + LWLock *entryLock = &graph->entryLock; + LWLock *entryWaitLock = &graph->entryWaitLock; + int efConstruction = buildstate->efConstruction; + int m = buildstate->m; + char *base = buildstate->hnswarea; + + /* Wait if another process needs exclusive lock on entry lock */ + LWLockAcquire(entryWaitLock, LW_EXCLUSIVE); + LWLockRelease(entryWaitLock); + + /* Get entry point */ + LWLockAcquire(entryLock, LW_SHARED); + entryPoint = (HnswElement)HnswPtrAccess(base, graph->entryPoint); + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + LWLockRelease(entryLock); + + /* Tell other processes to wait and get exclusive lock */ + LWLockAcquire(entryWaitLock, LW_EXCLUSIVE); + LWLockAcquire(entryLock, LW_EXCLUSIVE); + LWLockRelease(entryWaitLock); + + /* Get latest entry point after lock is acquired */ + entryPoint = (HnswElement)HnswPtrAccess(base, graph->entryPoint); + } + + /* Find neighbors for element */ + HnswFindElementNeighbors(base, element, entryPoint, NULL, procinfo, collation, m, efConstruction, + false, buildstate->enablePQ, buildstate->params); + + /* Update graph in memory */ + UpdateGraphInMemory(procinfo, collation, element, m, efConstruction, entryPoint, buildstate); + + /* Release entry lock */ + LWLockRelease(entryLock); +} + +/* + * Insert tuple + */ +static bool InsertTuple(Relation index, Datum *values, const bool *isnull, ItemPointer heaptid, + HnswBuildState *buildstate) +{ + const HnswTypeInfo *typeInfo = buildstate->typeInfo; + HnswGraph *graph = buildstate->graph; + HnswElement element; + HnswAllocator *allocator = &buildstate->allocator; + Size valueSize; + Pointer valuePtr; + Pointer codePtr = NULL; + LWLock *flushLock = &graph->flushLock; + char *base = buildstate->hnswarea; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) { + typeInfo->checkValue(DatumGetPointer(value)); + } + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) { + if (!HnswCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) { + return false; + } + + value = HnswNormValue(typeInfo, buildstate->collation, value); + } + + /* Get datum size */ + valueSize = VARSIZE_ANY(DatumGetPointer(value)); + + /* Ensure graph not flushed when inserting */ + LWLockAcquire(flushLock, LW_SHARED); + + /* Are we in the on-disk phase? */ + if (graph->flushed) { + LWLockRelease(flushLock); + + return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + } + + /* + * In a parallel build, the HnswElement is allocated from the shared + * memory area, so we need to coordinate with other processes. + */ + LWLockAcquire(&graph->allocatorLock, LW_EXCLUSIVE); + + /* + * Check that we have enough memory available for the new element now that + * we have the allocator lock, and flush pages if needed. + */ + if (graph->memoryUsed >= graph->memoryTotal) { + LWLockRelease(&graph->allocatorLock); + + LWLockRelease(flushLock); + LWLockAcquire(flushLock, LW_EXCLUSIVE); + + if (!graph->flushed) { + ereport(NOTICE, (errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", + (int64)graph->indtuples), + errdetail("Building will take significantly more time."), + errhint("Increase maintenance_work_mem to speed up builds."))); + + FlushPages(buildstate); + } + + LWLockRelease(flushLock); + + return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + } + + /* Ok, we can proceed to allocate the element */ + element = HnswInitElement(base, heaptid, buildstate->m, buildstate->ml, buildstate->maxLevel, allocator); + valuePtr = (Pointer)HnswAlloc(allocator, valueSize); + if (buildstate->enablePQ) { + Size codesize = buildstate->pqM * sizeof(uint8); + codePtr = (Pointer)HnswAlloc(allocator, codesize); + } + + /* + * We have now allocated the space needed for the element, so we don't + * need the allocator lock anymore. Release it and initialize the rest of + * the element. + */ + LWLockRelease(&graph->allocatorLock); + + /* Copy the datum */ + errno_t rc = memcpy_s(valuePtr, valueSize, DatumGetPointer(value), valueSize); + securec_check(rc, "\0", "\0"); + HnswPtrStore(base, element->value, valuePtr); + HnswPtrStore(base, element->pqcodes, codePtr); + + /* Create a lock for the element */ + LWLockInitialize(&element->lock, hnsw_lock_tranche_id); + + /* Insert tuple */ + InsertTupleInMemory(buildstate, element); + + /* Release flush lock */ + LWLockRelease(flushLock); + + return true; +} + +/* + * Callback for table_index_build_scan + */ +static void BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + HnswGraph *graph = buildstate->graph; + MemoryContext oldCtx; + + ItemPointer tid = &hup->t_self; + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Insert tuple */ + if (InsertTuple(index, values, isnull, tid, buildstate)) { + /* Update progress */ + SpinLockAcquire(&graph->lock); + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++graph->indtuples); + SpinLockRelease(&graph->lock); + } + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Initialize the graph + */ +static void InitGraph(HnswGraph *graph, char *base, long memoryTotal) +{ + HnswPtrStore(base, graph->head, (HnswElement)NULL); + HnswPtrStore(base, graph->entryPoint, (HnswElement)NULL); + graph->memoryUsed = 0; + graph->memoryTotal = memoryTotal; + graph->flushed = false; + graph->indtuples = 0; + SpinLockInit(&graph->lock); + LWLockInitialize(&graph->entryLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->entryWaitLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->allocatorLock, hnsw_lock_tranche_id); + LWLockInitialize(&graph->flushLock, hnsw_lock_tranche_id); +} + +/* + * Initialize an allocator + */ +static void InitAllocator(HnswAllocator *allocator, void *(*alloc)(Size size, void *state), void *state) +{ + allocator->alloc = alloc; + allocator->state = state; +} + +/* + * Memory context allocator + */ +static void *HnswMemoryContextAlloc(Size size, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + void *chunk = MemoryContextAlloc(buildstate->graphCtx, size); + + buildstate->graphData.memoryUsed += MAXALIGN(size); + + return chunk; +} + +/* + * Shared memory allocator + */ +static void *HnswSharedMemoryAlloc(Size size, void *state) +{ + HnswBuildState *buildstate = (HnswBuildState *)state; + void *chunk = buildstate->hnswarea + buildstate->graph->memoryUsed; + + buildstate->graph->memoryUsed += MAXALIGN(size); + return chunk; +} + +/* + * Initialize the build state + */ +static void InitBuildState(HnswBuildState *buildstate, Relation heap, Relation index, IndexInfo *indexInfo, + ForkNumber forkNum, bool parallel) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + buildstate->forkNum = forkNum; + buildstate->typeInfo = HnswGetTypeInfo(index); + + buildstate->m = HnswGetM(index); + buildstate->efConstruction = HnswGetEfConstruction(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Disallow varbit since require fixed dimensions */ + if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) { + elog(ERROR, "type not supported for hnsw index"); + } + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) { + elog(ERROR, "column does not have dimensions"); + } + + if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) { + elog(ERROR, "column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions); + } + + if (buildstate->efConstruction < 2 * buildstate->m) { + elog(ERROR, "ef_construction must be greater than or equal to 2 * m"); + } + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + buildstate->kmeansnormprocinfo = HnswOptionalProcInfo(index, HNSW_KMEANS_NORMAL_PROC); + buildstate->collation = index->rd_indcollation[0]; + + InitGraph(&buildstate->graphData, NULL, u_sess->attr.attr_memory.maintenance_work_mem * 1024L); + buildstate->graph = &buildstate->graphData; + buildstate->ml = HnswGetMl(buildstate->m); + buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); + + buildstate->graphCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw build graph context", ALLOCSET_DEFAULT_SIZES); + buildstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw build temporary context", ALLOCSET_DEFAULT_SIZES); + + InitAllocator(&buildstate->allocator, &HnswMemoryContextAlloc, buildstate); + + buildstate->hnswleader = NULL; + buildstate->hnswshared = NULL; + buildstate->hnswarea = NULL; + + buildstate->enablePQ = HnswGetEnablePQ(index); + if (buildstate->enablePQ && !buildstate->typeInfo->supportPQ) { + ereport(ERROR, (errmsg("this data type cannot support hnswpq."))); + } + if (buildstate->enablePQ && !g_instance.pq_inited) { + ereport(ERROR, (errmsg("this instance has not currently loaded the pq dynamic library."))); + } + + buildstate->pqM = HnswGetPqM(index); + buildstate->pqKsub = HnswGetPqKsub(index); + if (buildstate->enablePQ) { + if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1) { + ereport(ERROR, (errmsg("dimensions must be greater than one for this opclass."))); + } + if (buildstate->dimensions % buildstate->pqM != 0) { + ereport(ERROR, (errmsg("dimensions must be divisible by pq_M, please reset pq_M."))); + } + Size subItemsize = buildstate->typeInfo->itemSize(buildstate->dimensions / buildstate->pqM); + subItemsize = MAXALIGN(subItemsize); + buildstate->pqTableSize = buildstate->pqM * buildstate->pqKsub * subItemsize; + buildstate->pqTable = parallel ? NULL : (char*)palloc0(buildstate->pqTableSize); + buildstate->pqcodeSize = buildstate->pqM * sizeof(uint8); + buildstate->params = InitPQParamsInMemory(buildstate); + } else { + buildstate->pqTable = NULL; + buildstate->pqTableSize = 0; + buildstate->pqcodeSize = 0; + buildstate->params = NULL; + } + buildstate->pqMode = HNSW_PQMODE_DEFAULT; + buildstate->pqDistanceTable = NULL; + + buildstate->isUStore = buildstate->heap ? RelationIsUstoreFormat(buildstate->heap) : false; +} + +/* + * Free resources + */ +static void FreeBuildState(HnswBuildState *buildstate, bool parallel) +{ + MemoryContextDelete(buildstate->graphCtx); + MemoryContextDelete(buildstate->tmpCtx); + if (buildstate->enablePQ && !parallel) { + pfree(buildstate->pqTable); + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + pfree(buildstate->pqDistanceTable); + } + pfree(buildstate->params); + } +} + +static double ParallelHeapScan(HnswBuildState *buildstate, int *nparticipanttuplesorts) +{ + HnswShared *hnswshared = buildstate->hnswleader->hnswshared; + double reltuples; + + BgworkerListWaitFinish(&buildstate->hnswleader->nparticipanttuplesorts); + pg_memory_barrier(); + + *nparticipanttuplesorts = buildstate->hnswleader->nparticipanttuplesorts; + buildstate->graph = &hnswshared->graphData; + buildstate->hnswarea = hnswshared->hnswarea; + reltuples = hnswshared->reltuples; + + return reltuples; +} + +/* + * Perform a worker's portion of a parallel insert + */ +static void HnswParallelScanAndInsert(Relation heapRel, Relation indexRel, HnswShared *hnswshared, char *hnswarea) +{ + HnswBuildState buildstate; + TableScanDesc scan; + double reltuples; + IndexInfo *indexInfo; + + /* Join parallel scan */ + indexInfo = BuildIndexInfo(indexRel); + InitBuildState(&buildstate, heapRel, indexRel, indexInfo, MAIN_FORKNUM, true); + buildstate.graph = &hnswshared->graphData; + buildstate.hnswarea = hnswarea; + buildstate.pqTable = hnswshared->pqTable; + if (buildstate.enablePQ) { + buildstate.params->pqTable = hnswshared->pqTable; + } + buildstate.pqDistanceTable = hnswshared->pqDistanceTable; + InitAllocator(&buildstate.allocator, &HnswSharedMemoryAlloc, &buildstate); + scan = tableam_scan_begin_parallel(heapRel, &hnswshared->heapdesc); + reltuples = tableam_index_build_scan(heapRel, indexRel, indexInfo, true, BuildCallback, (void *)&buildstate, scan); + + /* Record statistics */ + SpinLockAcquire(&hnswshared->mutex); + hnswshared->nparticipantsdone++; + hnswshared->reltuples += reltuples; + SpinLockRelease(&hnswshared->mutex); + + FreeBuildState(&buildstate, true); +} + +/* + * Perform work within a launched parallel process + */ +void HnswParallelBuildMain(const BgWorkerContext *bwc) +{ + HnswShared *hnswshared; + char *hnswarea; + Relation heapRel; + Relation indexRel; + + /* Look up shared state */ + hnswshared = (HnswShared *)bwc->bgshared; + + /* Open relations within worker */ + heapRel = heap_open(hnswshared->heaprelid, NoLock); + indexRel = index_open(hnswshared->indexrelid, NoLock); + + hnswarea = hnswshared->hnswarea; + + /* Perform inserts */ + HnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea); + + /* Close relations within worker */ + index_close(indexRel, NoLock); + heap_close(heapRel, NoLock); +} + +/* + * End parallel build + */ +static void HnswEndParallel(HnswLeader *hnswleader) +{ + HnswShared *hnswshared = hnswleader->hnswshared; + if (hnswshared) { + if (hnswshared->pqTable) { + pfree_ext(hnswshared->pqTable); + } + if (hnswshared->pqDistanceTable) { + pfree_ext(hnswshared->pqDistanceTable); + } + if (hnswshared->hnswarea) { + pfree_ext(hnswshared->hnswarea); + } + } + pfree_ext(hnswleader); + BgworkerListSyncQuit(); +} + +static HnswShared *HnswParallelInitshared(HnswBuildState *buildstate) +{ + HnswShared *hnswshared; + char *hnswarea; + Size esthnswarea; + Size estother; + char *pqTable; + float *pqDistanceTable; + errno_t rc; + uint32 pqDistanceTableSize = buildstate->pqM * buildstate->pqKsub * buildstate->pqKsub * sizeof(float); + + /* Store shared build state, for which we reserved space */ + hnswshared = + (HnswShared *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), sizeof(HnswShared)); + + /* Initialize immutable state */ + hnswshared->heaprelid = RelationGetRelid(buildstate->heap); + hnswshared->indexrelid = RelationGetRelid(buildstate->index); + hnswshared->pqDistanceTable = NULL; + if (buildstate->enablePQ) { + pqTable = (char *) MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), + buildstate->pqTableSize); + rc = memcpy_s(pqTable, buildstate->pqTableSize, buildstate->pqTable, buildstate->pqTableSize); + securec_check_c(rc, "\0", "\0"); + hnswshared->pqTable = pqTable; + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + pqDistanceTable = (float *) MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), + pqDistanceTableSize); + rc = memcpy_s(pqDistanceTable, pqDistanceTableSize, buildstate->pqDistanceTable, pqDistanceTableSize); + securec_check_c(rc, "\0", "\0"); + hnswshared->pqDistanceTable = pqDistanceTable; + } + } else { + hnswshared->pqTable = NULL; + } + SpinLockInit(&hnswshared->mutex); + /* Initialize mutable state */ + hnswshared->nparticipantsdone = 0; + hnswshared->reltuples = 0; + HeapParallelscanInitialize(&hnswshared->heapdesc, buildstate->heap); + + /* Leave space for other objects in shared memory */ + /* Docker has a default limit of 64 MB for shm_size */ + /* which happens to be the default value of maintenance_work_mem */ + esthnswarea = u_sess->attr.attr_memory.maintenance_work_mem * 1024L; + estother = 3 * 1024 * 1024; + if (esthnswarea > estother) + esthnswarea -= estother; + + hnswarea = (char *)palloc0_huge(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), esthnswarea); + /* Report less than allocated so never fails */ + InitGraph(&hnswshared->graphData, hnswarea, esthnswarea - 1024 * 1024); + + hnswshared->graphData.memoryUsed += MAXALIGN(1); + + hnswshared->hnswarea = hnswarea; + return hnswshared; +} + +/* + * Begin parallel build + */ +static void HnswBeginParallel(HnswBuildState *buildstate, int request) +{ + HnswShared *hnswshared; + HnswLeader *hnswleader = (HnswLeader *)palloc0(sizeof(HnswLeader)); + + Assert(request > 0); + + hnswshared = HnswParallelInitshared(buildstate); + /* Launch workers, saving status for leader/caller */ + hnswleader->nparticipanttuplesorts = LaunchBackgroundWorkers(request, hnswshared, HnswParallelBuildMain, NULL); + hnswleader->hnswshared = hnswshared; + + /* If no workers were successfully launched, back out (do serial build) */ + if (hnswleader->nparticipanttuplesorts == 0) { + HnswEndParallel(hnswleader); + return; + } + + /* Log participants */ + ereport(DEBUG1, (errmsg("using %d parallel workers", hnswleader->nparticipanttuplesorts))); + + /* Save leader state now that it's clear build will be parallel */ + buildstate->hnswleader = hnswleader; +} + +/* + * Build graph + */ +static void BuildGraph(HnswBuildState *buildstate, ForkNumber forkNum) +{ + int parallel_workers = 0; + + /* Calculate parallel workers */ + if (buildstate->heap != NULL) { + parallel_workers = PlanCreateIndexWorkers(buildstate->heap, buildstate->indexInfo); + } + + /* Attempt to launch parallel worker scan when required */ + if (parallel_workers > 0) { + HnswBeginParallel(buildstate, parallel_workers); + } + + /* Add tuples to graph */ + if (buildstate->heap != NULL) { + if (!buildstate->hnswleader) { + serial_build: + buildstate->reltuples = tableam_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, + true, BuildCallback, (void *)buildstate, NULL); + } else { + int nruns; + buildstate->reltuples = ParallelHeapScan(buildstate, &nruns); + if (nruns == 0) { + /* failed to startup any bgworker, retry to do serial build */ + goto serial_build; + } + } + + buildstate->indtuples = buildstate->graph->indtuples; + } + + /* Flush pages */ + if (!buildstate->graph->flushed) { + FlushPages(buildstate); + } + + /* End parallel build */ + if (buildstate->hnswleader) { + HnswEndParallel(buildstate->hnswleader); + } +} + +/* + * Build the index + */ +static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, HnswBuildState *buildstate, + ForkNumber forkNum) +{ +#ifdef HNSW_MEMORY + SeedRandom(42); +#endif + + InitBuildState(buildstate, heap, index, indexInfo, forkNum, false); + + if (buildstate->isUStore) { + ereport(ERROR, (errmsg("ustore table cannot support hnsw."))); + } + + if (buildstate->enablePQ) { + BuildPQtable(buildstate); + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + int pqM = buildstate->pqM; + int pqKsub = buildstate->pqKsub; + buildstate->pqDistanceTable = (float *)palloc(pqM * pqKsub * pqKsub * sizeof(float)); + GetPQDistanceTableSdc(buildstate->params, buildstate->pqDistanceTable); + } + } + + BuildGraph(buildstate, forkNum); + + if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) + LogNewpageRange(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true); + + FreeBuildState(buildstate, false); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult *hnswbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + HnswBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void hnswbuildempty_internal(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + HnswBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/src/gausskernel/storage/access/datavec/hnswdelete.cpp b/src/gausskernel/storage/access/datavec/hnswdelete.cpp new file mode 100644 index 0000000000000000000000000000000000000000..59a388580db3f41eb3343c22eb1ac4ba4e2213ea --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswdelete.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswdelete.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswdelete.cpp + * + * ------------------------------------------------------------------------- + */ +#include "access/ubtree.h" +#include "access/datavec/hnsw.h" +#include "access/datavec/vecindex.h" + +bool HnswIsTIDEquals(ItemPointer p1, ItemPointer p2) +{ + int id; + bool equal = true; + for (id = 0; id < HNSW_HEAPTIDS; id++) { + if (ItemPointerIsValid(&p1[id]) && ItemPointerIsValid(&p2[id])) { + equal = ItemPointerEquals(&p1[id], &p2[id]); + } else if (!ItemPointerIsValid(&p1[id]) && !ItemPointerIsValid(&p2[id])) { + continue; + } else { + equal = false; + } + + if (!equal) { + break; + } + } + + return equal; +} + +bool HnswIsETUPEqual(HnswElementTuple etup1, HnswElementTuple etup2) +{ + if (etup1 == NULL || etup2 == NULL) { + return false; + } + Size len1 = MAXALIGN(VARSIZE_ANY(&etup1->data)); + Size len2 = MAXALIGN(VARSIZE_ANY(&etup2->data)); + if (len1 == 0 || len2 == 0 || len1 != len2) { + return false; + } + return memcmp(&etup1->data, &etup2->data, len1) == 0; +} + +OffsetNumber HnswFindDeleteLocation(Relation index, Buffer buf, HnswElementTuple etup) +{ + OffsetNumber off; + OffsetNumber maxOff; + Page page; + TransactionId xmin; + TransactionId xmax; + + page = BufferGetPage(buf); + maxOff = PageGetMaxOffsetNumber(page); + + if (RelationIsGlobalIndex(index)) { + elog(ERROR, "the GLOBAL partitioned index is not supported.\n"); + } + + for (off = FirstOffsetNumber; off < maxOff; off++) { + ItemId iid; + HnswElementTuple tup; + + iid = PageGetItemId(page, off); + if (!ItemIdIsDead(iid)) { + tup = (HnswElementTuple)PageGetItem(page, iid); + if (!HnswIsTIDEquals(etup->heaptids, tup->heaptids)) { + continue; + } + + if (!HnswIsETUPEqual(etup, tup)) { + continue; + } + + bool xminCommitted = false; + bool xmaxCommitted = false; + bool isDead = VecItupGetXminXmax(page, off, InvalidTransactionId, &xmin, &xmax, &xminCommitted, + &xmaxCommitted, RelationGetNamespace(index) == PG_TOAST_NAMESPACE); + if (!isDead && !TransactionIdIsValid(xmax)) { + return off; + } + } + } + return InvalidOffsetNumber; +} + +void HnswDeleteOnPage(Relation index, Buffer buf, OffsetNumber offset) +{ + ItemId iid; + IndexTransInfo *idxXid; + Page page; + HnswElementTuple etup; + + page = BufferGetPage(buf); + iid = PageGetItemId(page, offset); + etup = (HnswElementTuple)PageGetItem(page, iid); + idxXid = (IndexTransInfo *)VecIndexTupleGetXid(etup); + + idxXid->xmax = GetCurrentTransactionId(); + + MarkBufferDirty(buf); +} + +bool IsHnswEntryPoint(Relation index, BlockNumber blkno, OffsetNumber offno) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + bool res = false; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + if (blkno == metap->entryBlkno && offno == metap->entryOffno) { + res = true; + } + UnlockReleaseBuffer(buf); + return res; +} + +bool HnswDeleteIndex(Relation index, HnswElementTuple etup) +{ + bool found = false; + BlockNumber blkno; + Buffer buf; + char *base = NULL; + Datum q; + List *ep; + List *w; + ListCell *cell; + int m; + HnswElement entryPoint; + FmgrInfo *procinfo; + Oid collation; + OffsetNumber offset; + Page page; + + blkno = InvalidBlockNumber; + procinfo = index_getprocinfo(index, 1, 1); + collation = index->rd_indcollation[0]; + q = (Datum)(&etup->data); + HnswGetMetaPageInfo(index, &m, &entryPoint); + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, false, NULL)); + + for (int lc = entryPoint->level; lc >= 0; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL); + ep = w; + } + + foreach (cell, ep) { + HnswCandidate *hc = (HnswCandidate *)lfirst(cell); + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + blkno = element->blkno; + } + + while (BlockNumberIsValid(blkno)) { + buf = ReadBuffer(index, blkno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + offset = HnswFindDeleteLocation(index, buf, etup); + if (offset != InvalidOffsetNumber && !IsHnswEntryPoint(index, blkno, offset)) { + HnswDeleteOnPage(index, buf, offset); + UnlockReleaseBuffer(buf); + found = true; + break; + } + + page = BufferGetPage(buf); + blkno = HnswPageGetOpaque(page)->nextblkno; + UnlockReleaseBuffer(buf); + } + return found; +} + +HnswElementTuple IndexFormHnswElementTuple(TupleDesc tupleDesc, Datum *values, const bool *isnull, + ItemPointer heapTCtid) +{ + Datum value; + HnswElementTuple etup; + Size etupSize; + errno_t rc = EOK; + + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + etup = (HnswElementTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(DatumGetPointer(value))); + + etup->heaptids[0] = *heapTCtid; + for (int i = 1; i < HNSW_HEAPTIDS; i++) { + ItemPointerSetInvalid(&etup->heaptids[i]); + } + + rc = memcpy_s(&etup->data, VARSIZE_ANY(DatumGetPointer(value)), DatumGetPointer(value), VARSIZE_ANY(DatumGetPointer(value))); + securec_check(rc, "\0", "\0"); + return etup; +} + +bool hnswdelete_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, bool isRollbackIndex) +{ + bool found; + HnswElementTuple etup; + + etup = IndexFormHnswElementTuple(RelationGetDescr(index), values, isnull, heapTCtid); + found = HnswDeleteIndex(index, etup); + + return found; +} diff --git a/src/gausskernel/storage/access/datavec/hnswinsert.cpp b/src/gausskernel/storage/access/datavec/hnswinsert.cpp new file mode 100644 index 0000000000000000000000000000000000000000..10ad3f054e3612c061d0707eee16656f48ad6d25 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswinsert.cpp @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswinsert.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswinsert.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/xact.h" +#include "access/datavec/hnsw.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/datum.h" +#include "utils/memutils.h" + +/* + * Get the insert page + */ +static BlockNumber GetInsertPage(Relation index) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + BlockNumber insertPage; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + + insertPage = metap->insertPage; + + UnlockReleaseBuffer(buf); + + return insertPage; +} + +/* + * Check for a free offset + */ +static bool HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size ntupSize, Buffer *nbuf, + Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno, + BlockNumber *newInsertPage) +{ + OffsetNumber offno; + OffsetNumber maxoffno = PageGetMaxOffsetNumber(page); + + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (etup->deleted) { + BlockNumber elementPage = BufferGetBlockNumber(buf); + BlockNumber neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + OffsetNumber neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + ItemId itemid; + + if (!BlockNumberIsValid(*newInsertPage)) + *newInsertPage = elementPage; + + if (neighborPage == elementPage) { + *nbuf = buf; + *npage = page; + } else { + *nbuf = ReadBuffer(index, neighborPage); + LockBuffer(*nbuf, BUFFER_LOCK_EXCLUSIVE); + + /* Skip WAL for now */ + *npage = BufferGetPage(*nbuf); + } + + itemid = PageGetItemId(*npage, neighborOffno); + /* Check for space on neighbor tuple page */ + if (PageGetFreeSpace(*npage) + ItemIdGetLength(itemid) - sizeof(ItemIdData) >= ntupSize) { + *freeOffno = offno; + *freeNeighborOffno = neighborOffno; + return true; + } else if (*nbuf != buf) + UnlockReleaseBuffer(*nbuf); + } + } + + return false; +} + +/* + * Add a new page + */ +static void HnswInsertAppendPage(Relation index, Buffer *nbuf, Page *npage, GenericXLogState *state, Page page, + bool building) +{ + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + *nbuf = HnswNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + if (building) + *npage = BufferGetPage(*nbuf); + else + *npage = GenericXLogRegisterBuffer(state, *nbuf, GENERIC_XLOG_FULL_IMAGE); + + HnswInitPage(*nbuf, *npage); + + /* Update previous buffer */ + HnswPageGetOpaque(page)->nextblkno = BufferGetBlockNumber(*nbuf); +} + +/* + * Add to element and neighbor pages + */ +static void AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, + BlockNumber *updatedInsertPage, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size etupSize; + Size ntupSize; + Size combinedSize; + Size maxSize; + Size minCombinedSize; + HnswElementTuple etup; + BlockNumber currentPage = insertPage; + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + OffsetNumber freeOffno = InvalidOffsetNumber; + OffsetNumber freeNeighborOffno = InvalidOffsetNumber; + BlockNumber newInsertPage = InvalidBlockNumber; + char *base = NULL; + bool isUStore; + IndexTransInfo *idxXid; + bool enablePQ; + Size pqcodesSize; + + /* Get enablePQ and pqcodeSize info from metapage */ + Buffer metaBuf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(metaBuf, BUFFER_LOCK_SHARE); + HnswMetaPage metap = HnswPageGetMeta(BufferGetPage(metaBuf)); + enablePQ = metap->enablePQ; + pqcodesSize = metap->pqcodeSize; + UnlockReleaseBuffer(metaBuf); + + /* Calculate sizes */ + etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(HnswPtrAccess(base, e->value))); + ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m); + combinedSize = etupSize + MAXALIGN(pqcodesSize) + ntupSize + sizeof(ItemIdData); + maxSize = HNSW_MAX_SIZE; + minCombinedSize = etupSize + MAXALIGN(pqcodesSize) + + HNSW_NEIGHBOR_TUPLE_SIZE(0, m) + sizeof(ItemIdData); + + /* Prepare element tuple */ + etup = (HnswElementTuple)palloc0(etupSize); + HnswSetElementTuple(base, etup, e); + + /* Prepare neighbor tuple */ + ntup = (HnswNeighborTuple)palloc0(ntupSize); + HnswSetNeighborTuple(base, ntup, e, m); + + /* Find a page (or two if needed) to insert the tuples */ + for (;;) { + buf = ReadBuffer(index, currentPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + isUStore = HnswPageGetOpaque(page)->pageType == HNSW_USTORE_PAGE_TYPE; + /* Keep track of first page where element at level 0 can fit */ + if (!BlockNumberIsValid(newInsertPage) && PageGetFreeSpace(page) >= minCombinedSize) { + newInsertPage = currentPage; + } + + /* First, try the fastest path */ + /* Space for both tuples on the current page */ + /* This can split existing tuples in rare cases */ + if (PageGetFreeSpace(page) >= combinedSize) { + nbuf = buf; + npage = page; + break; + } + + /* Next, try space from a deleted element */ + if (HnswFreeOffset(index, buf, page, e, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno, + &newInsertPage)) { + if (nbuf != buf) { + if (building) { + npage = BufferGetPage(nbuf); + } else { + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + } + } + + break; + } + + /* Finally, try space for element only if last page */ + /* Skip if both tuples can fit on the same page */ + if (combinedSize > maxSize && PageGetFreeSpace(page) >= etupSize + MAXALIGN(pqcodesSize) && + !BlockNumberIsValid(HnswPageGetOpaque(page)->nextblkno)) { + HnswInsertAppendPage(index, &nbuf, &npage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + break; + } + + currentPage = HnswPageGetOpaque(page)->nextblkno; + if (BlockNumberIsValid(currentPage)) { + /* Move to next page */ + if (!building) + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } else { + Buffer newbuf; + Page newpage; + + HnswInsertAppendPage(index, &newbuf, &newpage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + /* Commit */ + if (building) { + MarkBufferDirty(buf); + } else { + GenericXLogFinish(state); + } + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + buf = newbuf; + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Create new page for neighbors if needed */ + if (PageGetFreeSpace(page) < combinedSize) { + HnswInsertAppendPage(index, &nbuf, &npage, state, page, building); + if (isUStore) { + HnswPageGetOpaque(npage)->pageType = HNSW_USTORE_PAGE_TYPE; + } + } else { + nbuf = buf; + npage = page; + } + + break; + } + } + + e->blkno = BufferGetBlockNumber(buf); + e->neighborPage = BufferGetBlockNumber(nbuf); + + /* Added tuple to new page if newInsertPage is not set */ + /* So can set to neighbor page instead of element page */ + if (!BlockNumberIsValid(newInsertPage)) { + newInsertPage = e->neighborPage; + } + + if (OffsetNumberIsValid(freeOffno)) { + e->offno = freeOffno; + e->neighborOffno = freeNeighborOffno; + } else { + e->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + if (nbuf == buf) { + e->neighborOffno = OffsetNumberNext(e->offno); + } else { + e->neighborOffno = FirstOffsetNumber; + } + } + + ItemPointerSet(&etup->neighbortid, e->neighborPage, e->neighborOffno); + + /* Add element and neighbors */ + if (OffsetNumberIsValid(freeOffno)) { + if (enablePQ || isUStore) { + ItemId item_id = PageGetItemId(page, e->offno); + Size aligned_size = MAXALIGN(ItemIdGetLength(item_id)); + unsigned offset = ItemIdGetOffset(item_id); + char *itemtail = (char *)page + offset + aligned_size; + if (enablePQ) { + Pointer codePtr = (Pointer)HnswPtrAccess(base, e->pqcodes); + errno_t rc = memcpy_s(itemtail, pqcodesSize, codePtr, pqcodesSize); + securec_check_c(rc, "\0", "\0"); + } + if (isUStore) { + idxXid = (IndexTransInfo *)(itemtail + MAXALIGN(pqcodesSize)); + idxXid->xmin = GetCurrentTransactionId(); + idxXid->xmax = InvalidTransactionId; + } + } + if (!page_index_tuple_overwrite(page, e->offno, (Item)etup, etupSize)) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + if (!page_index_tuple_overwrite(npage, e->neighborOffno, (Item)ntup, ntupSize)) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } else { + if (enablePQ) { + ((PageHeader)page)->pd_upper -= MAXALIGN(pqcodesSize); + Pointer codePtr = (Pointer)HnswPtrAccess(base, e->pqcodes); + errno_t rc = memcpy_s(((char *)page) + ((PageHeader)page)->pd_upper, + pqcodesSize, codePtr, pqcodesSize); + securec_check_c(rc, "\0", "\0"); + } + if (isUStore) { + ((PageHeader)page)->pd_upper -= sizeof(IndexTransInfo); + idxXid = (IndexTransInfo *)(((char *)page) + ((PageHeader)page)->pd_upper); + idxXid->xmin = GetCurrentTransactionId(); + idxXid->xmax = InvalidTransactionId; + } + if (PageAddItem(page, (Item)etup, etupSize, InvalidOffsetNumber, false, false) != e->offno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + + if (PageAddItem(npage, (Item)ntup, ntupSize, InvalidOffsetNumber, false, false) != e->neighborOffno) { + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + } + } + + /* Commit */ + if (building) { + MarkBufferDirty(buf); + if (nbuf != buf) + MarkBufferDirty(nbuf); + } else { + GenericXLogFinish(state); + } + UnlockReleaseBuffer(buf); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Update the insert page */ + if (BlockNumberIsValid(newInsertPage) && newInsertPage != insertPage) + *updatedInsertPage = newInsertPage; +} + +/* + * Check if connection already exists + */ +static bool ConnectionExists(HnswElement e, HnswNeighborTuple ntup, int startIdx, int lm) +{ + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &ntup->indextids[startIdx + i]; + + if (!ItemPointerIsValid(indextid)) { + break; + } + + if (ItemPointerGetBlockNumber(indextid) == e->blkno && ItemPointerGetOffsetNumber(indextid) == e->offno) { + return true; + } + } + + return false; +} + +/* + * Update neighbors + */ +void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, + bool checkExisting, bool building) +{ + char *base = NULL; + + for (int lc = e->level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *hc = &neighbors->items[i]; + Buffer buf; + Page page; + GenericXLogState *state; + HnswNeighborTuple ntup; + int idx = -1; + int startIdx; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, hc->element); + OffsetNumber offno = neighborElement->neighborOffno; + + /* Get latest neighbors since they may have changed */ + /* Do not lock yet since selecting neighbors can take time */ + HnswLoadNeighbors(neighborElement, index, m); + + /* + * Could improve performance for vacuuming by checking neighbors + * against list of elements being deleted to find index. It's + * important to exclude already deleted elements for this since + * they can be replaced at any time. + */ + + /* Select neighbors */ + HnswUpdateConnection(NULL, e, hc, lm, lc, &idx, index, procinfo, collation); + + /* New element was not selected as a neighbor */ + if (idx == -1) + continue; + + /* Register page */ + buf = ReadBuffer(index, neighborElement->neighborPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Get tuple */ + ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, offno)); + + /* Calculate index for update */ + startIdx = (neighborElement->level - lc) * m; + + /* Check for existing connection */ + if (checkExisting && ConnectionExists(e, ntup, startIdx, lm)) + idx = -1; + else if (idx == -2) { + /* Find free offset if still exists */ + /* TODO Retry updating connections if not */ + for (int j = 0; j < lm; j++) { + if (!ItemPointerIsValid(&ntup->indextids[startIdx + j])) { + idx = startIdx + j; + break; + } + } + } else + idx += startIdx; + + /* Make robust to issues */ + if (idx >= 0 && idx < ntup->count) { + ItemPointer indextid = &ntup->indextids[idx]; + + /* Update neighbor on the buffer */ + ItemPointerSet(indextid, e->blkno, e->offno); + + /* Commit */ + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + } else if (!building) + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + } +} + +/* + * Add a heap TID to an existing element + */ +static bool AddDuplicateOnDisk(Relation index, HnswElement element, HnswElement dup, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + HnswElementTuple etup; + int i; + + /* Read page */ + buf = ReadBuffer(index, dup->blkno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Find space */ + etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, dup->offno)); + for (i = 0; i < HNSW_HEAPTIDS; i++) { + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + } + + /* Either being deleted or we lost our chance to another backend */ + if (i == 0 || i == HNSW_HEAPTIDS) { + if (!building) + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + return false; + } + + /* Add heap TID, modifying the tuple on the page directly */ + etup->heaptids[i] = element->heaptids[0]; + + /* Commit */ + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + return true; +} + +/* + * Find duplicate element + */ +static bool FindDuplicateOnDisk(Relation index, HnswElement element, bool building) +{ + char *base = NULL; + HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, 0); + Datum value = HnswGetValue(base, element); + + for (int i = 0; i < neighbors->length; i++) { + HnswCandidate *neighbor = &neighbors->items[i]; + HnswElement neighborElement = (HnswElement)HnswPtrAccess(base, neighbor->element); + Datum neighborValue = HnswGetValue(base, neighborElement); + /* Exit early since ordered by distance */ + if (!datumIsEqual(value, neighborValue, false, -1)) + return false; + + if (AddDuplicateOnDisk(index, element, neighborElement, building)) + return true; + } + + return false; +} + +/* + * Update graph on disk + */ +static void UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, + int efConstruction, HnswElement entryPoint, bool building) +{ + BlockNumber newInsertPage = InvalidBlockNumber; + + /* Look for duplicate */ + if (FindDuplicateOnDisk(index, element, building)) { + return; + } + + /* Add element */ + AddElementOnDisk(index, element, m, GetInsertPage(index), &newInsertPage, building); + + /* Update insert page if needed */ + if (BlockNumberIsValid(newInsertPage)) { + HnswUpdateMetaPage(index, 0, NULL, newInsertPage, MAIN_FORKNUM, building); + } + + /* Update neighbors */ + HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, false, building); + + /* Update entry point if needed */ + if (entryPoint == NULL || element->level > entryPoint->level) { + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building); + } +} + +/* + * Insert a tuple into the index + */ +bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, const bool *isnull, ItemPointer heap_tid, + bool building) +{ + HnswElement entryPoint; + HnswElement element; + int m; + int efConstruction = HnswGetEfConstruction(index); + FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + Oid collation = index->rd_indcollation[0]; + LOCKMODE lockmode = ShareLock; + char *base = NULL; + PQParams params; + bool enablePQ; + int dim = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* + * Get a shared lock. This allows vacuum to ensure no in-flight inserts + * before repairing graph. Use a page lock so it does not interfere with + * buffer lock (or reads when vacuuming). + */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + /* Create an element */ + element = HnswInitElement(base, heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); + HnswPtrStore(base, element->value, DatumGetPointer(value)); + + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + InitPQParamsOnDisk(¶ms, index, procinfo, dim, &enablePQ); + + Pointer codePtr = NULL; + if (enablePQ) { + Size codesize = params.pqM * sizeof(uint8); + codePtr = (Pointer)HnswAlloc(NULL, codesize); + } + HnswPtrStore(base, element->pqcodes, codePtr); + + /* Find neighbors for element */ + HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, m, + efConstruction, false, enablePQ, ¶ms); + + /* Update graph on disk */ + UpdateGraphOnDisk(index, procinfo, collation, element, m, efConstruction, entryPoint, building); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + return true; +} + +/* + * Insert a tuple into the index + */ +static void HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid) +{ + Datum value; + const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); + FmgrInfo *normprocinfo; + Oid collation = index->rd_indcollation[0]; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) { + typeInfo->checkValue(DatumGetPointer(value)); + } + + /* Normalize if needed */ + normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + if (normprocinfo != NULL) { + if (!HnswCheckNorm(normprocinfo, collation, value)) { + return; + } + + value = HnswNormValue(typeInfo, collation, value); + } + + HnswInsertTupleOnDisk(index, value, values, isnull, heap_tid, false); +} + +/* + * Insert a tuple into the index + */ +bool hnswinsert_internal(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) { + return false; + } + + /* Create memory context */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw insert temporary context", ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + HnswInsertTuple(index, values, isnull, heap_tid); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/src/gausskernel/storage/access/datavec/hnswscan.cpp b/src/gausskernel/storage/access/datavec/hnswscan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c71093be34d3c09761641c3c325a0535add00e2f --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswscan.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswscan.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswscan.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/datavec/hnsw.h" +#include "pgstat.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Algorithm 5 from paper + */ +static List *GetScanItems(IndexScanDesc scan, Datum q) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + Relation index = scan->indexRelation; + FmgrInfo *procinfo = so->procinfo; + Oid collation = so->collation; + List *ep; + List *w; + int m; + HnswElement entryPoint; + char *base = NULL; + PQParams *params = &so->params; + bool enablePQ = so->enablePQ; + int hnswEfSearch = so->length; + int pqMode = so->pqMode; + /* Get m and entry point */ + HnswGetMetaPageInfo(index, &m, &entryPoint); + + if (entryPoint == NULL) + return NIL; + + if (enablePQ) { + uint8* qPQCode; + PQSearchInfo pqinfo; + float *query = DatumGetVector(q)->x; + + pqinfo.params = *params; + if (pqMode == HNSW_PQMODE_SDC) { + qPQCode = (uint8 *)palloc(params->pqM * sizeof(uint8)); + ComputeVectorPQCode(query, params, qPQCode); + pqinfo.qPQCode = qPQCode; + pqinfo.pqDistanceTable = index->pqDistanceTable; + } else { + pqinfo.qPQCode = NULL; + pqinfo.pqDistanceTable = (float*) palloc(params->pqM * params->pqKsub * sizeof(float)); + GetPQDistanceTableAdc(query, params, pqinfo.pqDistanceTable); + } + + pqinfo.pqMode = pqMode; + pqinfo.lc = entryPoint->level; + ep = list_make1(HnswEntryCandidate( + base, entryPoint, q, index, procinfo, collation, false, NULL, enablePQ, &pqinfo)); + for (int lc = entryPoint->level; lc >= 1; lc--) { + pqinfo.lc = lc; + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL, NULL, enablePQ, &pqinfo); + ep = w; + } + pqinfo.lc = 0; + w = HnswSearchLayer(base, q, ep, hnswEfSearch, 0, index, procinfo, collation, m, + false, NULL, NULL, enablePQ, &pqinfo); + } else { + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, false)); + for (int lc = entryPoint->level; lc >= 1; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL); + ep = w; + } + w = HnswSearchLayer(base, q, ep, hnswEfSearch, 0, index, procinfo, collation, m, false, NULL); + } + return w; +} + +/* + * Get scan value + */ +static Datum GetScanValue(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + Datum value; + + if (scan->orderByData->sk_flags & SK_ISNULL) { + value = PointerGetDatum(NULL); + } else { + value = scan->orderByData->sk_argument; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Normalize if needed */ + if (so->normprocinfo != NULL) { + value = HnswNormValue(so->typeInfo, so->collation, value); + } + } + + return value; +} + +/* + * Prepare for an index scan + */ +IndexScanDesc hnswbeginscan_internal(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + HnswScanOpaque so; + PQParams params; + int dim; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + so = (HnswScanOpaque)palloc(sizeof(HnswScanOpaqueData)); + so->typeInfo = HnswGetTypeInfo(index); + so->first = true; + so->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw scan temporary context", ALLOCSET_DEFAULT_SIZES); + + so->vs.buf = InvalidBuffer; + so->vs.lastSelfModifiedItup = NULL; + so->vs.lastSelfModifiedItupBufferSize = 0; + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + so->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + dim = TupleDescAttr(index->rd_att, 0)->atttypmod; + so->pqMode = HNSW_PQMODE_DEFAULT; + InitPQParamsOnDisk(¶ms, index, so->procinfo, dim, &so->enablePQ); + so->params = params; + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void hnswrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + errno_t rc = EOK; + + if (so->vs.lastSelfModifiedItup) { + IndexTupleSetSize(((IndexTuple)(so->vs.lastSelfModifiedItup)), 0); /* clear */ + } + + so->first = true; + MemoryContextReset(so->tmpCtx); + + if (keys && scan->numberOfKeys > 0) { + rc = memmove_s(scan->keyData, scan->numberOfKeys * sizeof(ScanKeyData), keys, scan->numberOfKeys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } + + if (orderbys && scan->numberOfOrderBys > 0) { + rc = memmove_s(scan->orderByData, scan->numberOfOrderBys * sizeof(ScanKeyData), orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } +} + +void check_length(HnswScanOpaque so, IndexScanDesc scan) +{ + if (list_length(so->w) == 0) { + LockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + so->length = so->length * 2; + so->w = GetScanItems(scan, so->value); + + /* Release shared lock */ + UnlockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + for (int i = 0; i < so->currentLoc; i++) { + so->w = list_delete_first(so->w); + } + } +} +/* + * Fetch the next tuple in the given scan + */ +bool hnswgettuple_internal(IndexScanDesc scan, ScanDirection dir) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + MemoryContext oldCtx = MemoryContextSwitchTo(so->tmpCtx); + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) { + Datum value; + so->length = scan->count + u_sess->datavec_ctx.hnsw_ef_search; + so->currentLoc = 0; + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan hnsw index without order"); + + /* Requires MVCC-compliant snapshot as not able to maintain a pin */ + /* https://www.postgresql.org/docs/current/index-locking.html */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with hnsw"); + + /* Get scan value */ + value = GetScanValue(scan); + so->value = value; + /* + * Get a shared lock. This allows vacuum to ensure no in-flight scans + * before marking tuples as deleted. + */ + LockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->w = GetScanItems(scan, value); + + /* Release shared lock */ + UnlockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock); + + so->first = false; + + } + check_length(so, scan); + while (list_length(so->w) > 0) { + char *base = NULL; + HnswCandidate *hc = (HnswCandidate *)linitial(so->w); + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + ItemPointer heaptid; + + /* Move to next element if no valid heap TIDs */ + if (element->heaptidsLength == 0) { + so->w = list_delete_first(so->w); + if (list_length(so->w) !=0) { + continue; + } + } + check_length(so, scan); + if (list_length(so->w) == 0) { + continue; + } + hc = (HnswCandidate *)linitial(so->w); + element = (HnswElement)HnswPtrAccess(base, hc->element); + if (element->heaptidsLength == 0) { + so->w = list_delete_first(so->w); + continue; + } + heaptid = &element->heaptids[--element->heaptidsLength]; + + MemoryContextSwitchTo(oldCtx); + + scan->xs_ctup.t_self = *heaptid; + scan->xs_recheck = false; + so->currentLoc = so->currentLoc + 1; + return true; + } + + MemoryContextSwitchTo(oldCtx); + return false; +} + +/* + * End a scan and release resources + */ +void hnswendscan_internal(IndexScanDesc scan) +{ + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + + FREE_POINTER(so->vs.lastSelfModifiedItup); + + MemoryContextDelete(so->tmpCtx); + + pfree(so); + scan->opaque = NULL; +} diff --git a/src/gausskernel/storage/access/datavec/hnswutils.cpp b/src/gausskernel/storage/access/datavec/hnswutils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3bdf95dcc894766155fe6da3736b9293e7345640 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswutils.cpp @@ -0,0 +1,1684 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswutils.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswutils.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/hnsw.h" +#include "lib/pairingheap.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/sparsevec.h" +#include "access/datavec/utils.h" +#include "storage/buf/bufmgr.h" +#include "utils/datum.h" +#include "utils/rel.h" + +#include "utils/hashutils.h" + +static inline uint64 murmurhash64(uint64 data) +{ + uint64 h = data; + + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + h ^= h >> 33; + + return h; +} + +/* TID hash table */ +static uint32 hash_tid(ItemPointerData tid) +{ + union { + uint64 i; + ItemPointerData tid; + } x; + + /* Initialize unused bytes */ + x.i = 0; + x.tid = tid; + + return murmurhash64(x.i); +} + +#define VALGRIND_MAKE_MEM_DEFINED(addr, size) \ + do { \ + } while (0) + +#define SH_PREFIX tidhash +#define SH_ELEMENT_TYPE TidHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_KEY tid +#define SH_HASH_KEY(tb, key) hash_tid(key) +#define SH_EQUAL(tb, a, b) ItemPointerEquals(&(a), &(b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +/* Pointer hash table */ +static uint32 hash_pointer(uintptr_t ptr) +{ +#if SIZEOF_VOID_P == 8 + return murmurhash64((uint64)ptr); +#else + return murmurhash32((uint32)ptr); +#endif +} + +#define SH_PREFIX pointerhash +#define SH_ELEMENT_TYPE PointerHashEntry +#define SH_KEY_TYPE uintptr_t +#define SH_KEY ptr +#define SH_HASH_KEY(tb, key) hash_pointer(key) +#define SH_EQUAL(tb, a, b) ((a) == (b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +/* Offset hash table */ +static uint32 hash_offset(Size offset) +{ +#if SIZEOF_SIZE_T == 8 + return murmurhash64((uint64)offset); +#else + return murmurhash32((uint32)offset); +#endif +} + +#define SH_PREFIX offsethash +#define SH_ELEMENT_TYPE OffsetHashEntry +#define SH_KEY_TYPE Size +#define SH_KEY offset +#define SH_HASH_KEY(tb, key) hash_offset(key) +#define SH_EQUAL(tb, a, b) ((a) == (b)) +#define SH_SCOPE extern +#define SH_DEFINE +#include "lib/simplehash.h" + +typedef union { + pointerhash_hash *pointers; + offsethash_hash *offsets; + tidhash_hash *tids; +} VisitedHash; + +/* + * Get the max number of connections in an upper layer for each element in the index + */ +int HnswGetM(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) + return opts->m; + + return HNSW_DEFAULT_M; +} + +/* + * Get the size of the dynamic candidate list in the index + */ +int HnswGetEfConstruction(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) + return opts->efConstruction; + + return HNSW_DEFAULT_EF_CONSTRUCTION; +} + +/* + * Get whether to enable PQ + */ +bool HnswGetEnablePQ(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->enablePQ; + } + + return GENERIC_DEFAULT_ENABLE_PQ; +} + +/* + * Get the number of subquantizer + */ +int HnswGetPqM(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->pqM; + } + + return GENERIC_DEFAULT_PQ_M; +} + +/* + * Get the number of centroids for each subquantizer + */ +int HnswGetPqKsub(Relation index) +{ + HnswOptions *opts = (HnswOptions *)index->rd_options; + + if (opts) { + return opts->pqKsub; + } + + return GENERIC_DEFAULT_PQ_KSUB; +} + +/* + * Get proc + */ +FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Normalize value + */ +Datum HnswNormValue(const HnswTypeInfo *typeInfo, Oid collation, Datum value) +{ + return DirectFunctionCall1Coll(typeInfo->normalize, collation, value); +} + +/* + * Check if non-zero norm + */ +bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value) +{ + return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0; +} + +/* + * New buffer + */ +Buffer HnswNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void HnswInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(HnswPageOpaqueData)); + HnswPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + HnswPageGetOpaque(page)->pageType = HNSW_DEFAULT_PAGE_TYPE; + HnswPageGetOpaque(page)->page_id = HNSW_PAGE_ID; +} + +/* + * Allocate a neighbor array + */ +static HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator *allocator) +{ + HnswNeighborArray *a = (HnswNeighborArray *)HnswAlloc(allocator, HNSW_NEIGHBOR_ARRAY_SIZE(lm)); + + a->length = 0; + a->closerSet = false; + return a; +} + +/* + * Allocate neighbors + */ +void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *allocator) +{ + int level = element->level; + HnswNeighborArrayPtr *neighborList = + (HnswNeighborArrayPtr *)HnswAlloc(allocator, sizeof(HnswNeighborArrayPtr) * (level + 1)); + + HnswPtrStore(base, element->neighbors, neighborList); + + for (int lc = 0; lc <= level; lc++) + HnswPtrStore(base, neighborList[lc], HnswInitNeighborArray(HnswGetLayerM(m, lc), allocator)); +} + +/* + * Allocate memory from the allocator + */ +void *HnswAlloc(HnswAllocator *allocator, Size size) +{ + if (allocator) + return (*(allocator)->alloc)(size, (allocator)->state); + + return palloc(size); +} + +/* + * Allocate an element + */ +HnswElement HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel, HnswAllocator *allocator) +{ + HnswElement element = (HnswElement)HnswAlloc(allocator, sizeof(HnswElementData)); + + int level = static_cast(-log(RandomDouble()) * ml); + /* Cap level */ + if (level > maxLevel) { + level = maxLevel; + } + + element->heaptidsLength = 0; + HnswAddHeapTid(element, heaptid); + + element->level = level; + element->deleted = 0; + + HnswInitNeighbors(base, element, m, allocator); + + HnswPtrStore(base, element->value, (Pointer)NULL); + + return element; +} + +/* + * Add a heap TID to an element + */ +void HnswAddHeapTid(HnswElement element, ItemPointer heaptid) +{ + element->heaptids[element->heaptidsLength++] = *heaptid; +} + +/* + * Allocate an element from block and offset numbers + */ +HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno) +{ + HnswElement element = (HnswElement)palloc(sizeof(HnswElementData)); + char *base = NULL; + + element->blkno = blkno; + element->offno = offno; + HnswPtrStore(base, element->neighbors, (HnswNeighborArrayPtr *)NULL); + HnswPtrStore(base, element->value, (Pointer)NULL); + return element; +} + +/* + * Get the metapage info + */ +void HnswGetMetaPageInfo(Relation index, int *m, HnswElement *entryPoint) +{ + Buffer buf; + Page page; + HnswMetaPage metap; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = HnswPageGetMeta(page); + if (unlikely(metap->magicNumber != HNSW_MAGIC_NUMBER)) + elog(ERROR, "hnsw index is not valid"); + + if (m != NULL) + *m = metap->m; + + if (entryPoint != NULL) { + if (BlockNumberIsValid(metap->entryBlkno)) { + *entryPoint = HnswInitElementFromBlock(metap->entryBlkno, metap->entryOffno); + (*entryPoint)->level = metap->entryLevel; + } else { + *entryPoint = NULL; + } + } + + UnlockReleaseBuffer(buf); +} + +/* + * Get the entry point + */ +HnswElement HnswGetEntryPoint(Relation index) +{ + HnswElement entryPoint; + + HnswGetMetaPageInfo(index, NULL, &entryPoint); + + return entryPoint; +} + +/* + * Update the metapage info + */ +static void HnswUpdateMetaPageInfo(Page page, int updateEntry, HnswElement entryPoint, BlockNumber insertPage) +{ + HnswMetaPage metap = HnswPageGetMeta(page); + + if (updateEntry) { + if (entryPoint == NULL) { + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + } else if (entryPoint->level > metap->entryLevel || updateEntry == HNSW_UPDATE_ENTRY_ALWAYS) { + metap->entryBlkno = entryPoint->blkno; + metap->entryOffno = entryPoint->offno; + metap->entryLevel = entryPoint->level; + } + } + + if (BlockNumberIsValid(insertPage)) + metap->insertPage = insertPage; +} + +/* + * Update the append metapage info + */ +static void HnswUpdateAppendMetaPageInfo(Page page, int updateEntry, HnswElement entryPoint, + BlockNumber eleInsertSlotStartPage, BlockNumber neiInsertSlotStartPage) +{ + HnswAppendMetaPage metap = HnswPageGetAppendMeta(page); + + if (updateEntry) { + if (entryPoint == NULL) { + metap->entryBlkno = InvalidBlockNumber; + metap->entryOffno = InvalidOffsetNumber; + metap->entryLevel = -1; + } else if (entryPoint->level > metap->entryLevel || updateEntry == HNSW_UPDATE_ENTRY_ALWAYS) { + metap->entryBlkno = entryPoint->blkno; + metap->entryOffno = entryPoint->offno; + metap->entryLevel = entryPoint->level; + } + } + + if (BlockNumberIsValid(eleInsertSlotStartPage)) { + metap->elementInsertSlot = eleInsertSlotStartPage; + } + + if (BlockNumberIsValid(neiInsertSlotStartPage)) { + metap->neighborInsertSlot = neiInsertSlotStartPage; + } +} + +/* + * Update the metapage + */ +void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, + ForkNumber forkNum, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + + buf = ReadBufferExtended(index, forkNum, HNSW_METAPAGE_BLKNO, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + HnswUpdateMetaPageInfo(page, updateEntry, entryPoint, insertPage); + + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Update the append metapage + */ +void HnswUpdateAppendMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber eleInsertPage, + BlockNumber neiInsertPage, ForkNumber forkNum, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + + buf = ReadBufferExtended(index, forkNum, HNSW_METAPAGE_BLKNO, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) { + state = NULL; + page = BufferGetPage(buf); + } else { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + HnswUpdateAppendMetaPageInfo(page, updateEntry, entryPoint, eleInsertPage, neiInsertPage); + + if (building) { + MarkBufferDirty(buf); + } else { + GenericXLogFinish(state); + } + UnlockReleaseBuffer(buf); +} + +void FlushPQInfoInternal(Relation index, char* table, BlockNumber startBlkno, uint16 nblks, uint32 totalSize) +{ + Buffer buf; + Page page; + PageHeader p; + uint32 curFlushSize; + for (uint16 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? + (totalSize - i * HNSW_PQTABLE_STORAGE_SIZE) : HNSW_PQTABLE_STORAGE_SIZE; + buf = ReadBufferExtended(index, MAIN_FORKNUM, startBlkno + i, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + errno_t err = memcpy_s(PageGetContents(page), curFlushSize, + table + i * HNSW_PQTABLE_STORAGE_SIZE, curFlushSize); + securec_check(err, "\0", "\0"); + p = (PageHeader)page; + p->pd_lower += curFlushSize; + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); + } +} + +/* +* Flush PQ table into page during index building +*/ +void FlushPQInfo(HnswBuildState * buildstate) +{ + Relation index = buildstate->index; + char* pqTable = buildstate->pqTable; + float* pqDistanceTable = buildstate->pqDistanceTable; + uint16 pqTableNblk; + uint16 pqDisTableNblk; + uint32 pqTableSize; + uint32 pqDisTableSize; + + HnswGetPQInfoFromMetaPage(index, &pqTableNblk, &pqTableSize, &pqDisTableNblk, &pqDisTableSize); + + /* Flush pq table */ + FlushPQInfoInternal(index, pqTable, HNSW_PQTABLE_START_BLKNO, pqTableNblk, pqTableSize); + if (buildstate->pqMode == HNSW_PQMODE_SDC) { + /* Flush pq distance table */ + FlushPQInfoInternal(index, (char*)pqDistanceTable, + HNSW_PQTABLE_START_BLKNO + pqTableNblk, pqDisTableNblk, pqDisTableSize); + } +} + +char* LoadPQtable(Relation index) +{ + Buffer buf; + Page page; + uint16 nblks; + uint32 curFlushSize; + uint32 pqTableSize; + char* pqTable; + + HnswGetPQInfoFromMetaPage(index, &nblks, &pqTableSize, NULL, NULL); + pqTable = (char*)palloc0(pqTableSize); + + for (uint16 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? (pqTableSize - i * HNSW_PQTABLE_STORAGE_SIZE) : HNSW_PQTABLE_STORAGE_SIZE; + buf = ReadBuffer(index, HNSW_PQTABLE_START_BLKNO + i); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + errno_t err = memcpy_s(pqTable + i * HNSW_PQTABLE_STORAGE_SIZE, curFlushSize, + PageGetContents(page), curFlushSize); + securec_check(err, "\0", "\0"); + UnlockReleaseBuffer(buf); + } + return pqTable; +} + +float* LoadPQDisTable(Relation index) +{ + Buffer buf; + Page page; + uint16 pqTableNblk; + uint16 nblks; + uint32 curFlushSize; + uint32 pqDisTableSize; + float* disTable; + + HnswGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &nblks, &pqDisTableSize); + disTable = (float*)palloc0(pqDisTableSize); + + BlockNumber startBlkno = HNSW_PQTABLE_START_BLKNO + pqTableNblk; + for (uint16 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? (pqDisTableSize - i * HNSW_PQTABLE_STORAGE_SIZE) : HNSW_PQTABLE_STORAGE_SIZE; + buf = ReadBuffer(index, startBlkno + i); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + errno_t err = memcpy_s((char*)disTable + i * HNSW_PQTABLE_STORAGE_SIZE, curFlushSize, + PageGetContents(page), curFlushSize); + securec_check(err, "\0", "\0"); + UnlockReleaseBuffer(buf); + } + return disTable; +} + +uint8* +LoadPQcode(HnswElementTuple tuple) +{ + return (uint8*)(((char*)(tuple)) + HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(&tuple->data))); +} + +/* + * Set element tuple, except for neighbor info + */ +void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element) +{ + Pointer valuePtr = (Pointer)HnswPtrAccess(base, element->value); + errno_t rc = EOK; + + etup->type = HNSW_ELEMENT_TUPLE_TYPE; + etup->level = element->level; + etup->deleted = 0; + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + if (i < element->heaptidsLength) + etup->heaptids[i] = element->heaptids[i]; + else + ItemPointerSetInvalid(&etup->heaptids[i]); + } + rc = memcpy_s(&etup->data, VARSIZE_ANY(valuePtr), valuePtr, VARSIZE_ANY(valuePtr)); + securec_check(rc, "\0", "\0"); +} + +/* + * Set neighbor tuple + */ +void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m) +{ + int idx = 0; + + ntup->type = HNSW_NEIGHBOR_TUPLE_TYPE; + + for (int lc = e->level; lc >= 0; lc--) { + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e, lc); + int lm = HnswGetLayerM(m, lc); + + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &ntup->indextids[idx++]; + + if (i < neighbors->length) { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + + ItemPointerSet(indextid, hce->blkno, hce->offno); + } else { + ItemPointerSetInvalid(indextid); + } + } + } + + ntup->count = idx; +} + +/* + * Load neighbors from page + */ +static void LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) +{ + char *base = NULL; + + HnswNeighborTuple ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + int neighborCount = (element->level + 2) * m; + + Assert(HnswIsNeighborTuple(ntup)); + + HnswInitNeighbors(base, element, m, NULL); + + /* Ensure expected neighbors */ + if (ntup->count != neighborCount) { + return; + } + + for (int i = 0; i < neighborCount; i++) { + HnswElement e; + int level; + HnswCandidate *hc; + ItemPointer indextid; + HnswNeighborArray *neighbors; + + indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) { + continue; + } + + e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + + /* Calculate level based on offset */ + level = element->level - i / m; + if (level < 0) { + level = 0; + } + + neighbors = HnswGetNeighbors(base, element, level); + hc = &neighbors->items[neighbors->length++]; + HnswPtrStore(base, hc->element, e); + } +} + +/* + * Load neighbors + */ +void HnswLoadNeighbors(HnswElement element, Relation index, int m) +{ + Buffer buf; + Page page; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + LoadNeighborsFromPage(element, index, page, m); + + UnlockReleaseBuffer(buf); +} + +/* + * Load an element from a tuple + */ +void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec) +{ + element->level = etup->level; + element->deleted = etup->deleted; + element->neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + element->neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + element->heaptidsLength = 0; + + if (loadHeaptids) { + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + /* Can stop at first invalid */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + HnswAddHeapTid(element, &etup->heaptids[i]); + } + } + + if (loadVec) { + char *base = NULL; + Datum value = datumCopy(PointerGetDatum(&etup->data), false, -1); + + HnswPtrStore(base, element->value, DatumGetPointer(value)); + } +} + +/* + * Load an element and optionally get its distance from q + */ +bool HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, + bool loadVec, float *maxDistance, IndexScanDesc scan, bool enablePQ, PQSearchInfo *pqinfo) +{ + Buffer buf; + Page page; + HnswElementTuple etup; + bool needRecheck = false; + bool isVisible = true; + uint8 *ePQCode; + PQParams *params; + + /* Read vector */ + buf = ReadBuffer(index, element->blkno); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + if (scan != NULL && HnswPageGetOpaque(page)->pageType == HNSW_USTORE_PAGE_TYPE) { + HnswScanOpaque so = (HnswScanOpaque)scan->opaque; + so->vs.buf = buf; + isVisible = VecVisibilityCheck(scan, page, element->offno, &needRecheck); + } + + etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, element->offno)); + + Assert(HnswIsElementTuple(etup)); + + /* Calculate distance */ + if (distance != NULL) { + if (enablePQ && pqinfo->lc == 0) { + ePQCode = LoadPQcode(etup); + params = &pqinfo->params; + if (pqinfo->pqMode == HNSW_PQMODE_SDC && *pqinfo->qPQCode == NULL) { + *distance = 0; + } else if (pqinfo->pqMode == HNSW_PQMODE_ADC && pqinfo->pqDistanceTable == NULL) { + *distance = 0; + } else { + GetPQDistance(ePQCode, pqinfo->qPQCode, params, pqinfo->pqDistanceTable, distance); + } + } else { + if (DatumGetPointer(*q) == NULL) { + *distance = 0; + } else { + *distance = (float)DatumGetFloat8(FunctionCall2Coll( + procinfo, collation, *q, PointerGetDatum(&etup->data))); + } + } + } + + /* Load element */ + if (distance == NULL || maxDistance == NULL || *distance < *maxDistance) { + HnswLoadElementFromTuple(element, etup, true, loadVec); + if (enablePQ) { + params = &pqinfo->params; + Vector *vd1 = &etup->data; + Vector *vd2 = (Vector *)DatumGetPointer(*q); + float exactDis; + if (pqinfo->params.funcType == HNSW_PQ_DIS_IP) { + exactDis = -VectorInnerProduct(params->dim, vd1->x, vd2->x); + } else { + exactDis = VectorL2SquaredDistance(params->dim, vd1->x, vd2->x); + } + *distance = exactDis; + } + } + + UnlockReleaseBuffer(buf); + return isVisible; +} + +/* + * Get the distance for a candidate + */ +static float GetCandidateDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation) +{ + Datum value = HnswGetValue(base, element); + + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, value)); +} + +/* + * Create a candidate for the entry point + */ +HnswCandidate *HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, + Oid collation, bool loadVec, IndexScanDesc scan, bool enablePQ, PQSearchInfo *pqinfo) +{ + HnswCandidate *hc = (HnswCandidate *)palloc(sizeof(HnswCandidate)); + + HnswPtrStore(base, hc->element, entryPoint); + if (index == NULL) { + hc->distance = GetCandidateDistance(base, entryPoint, q, procinfo, collation); + } else { + bool isVisible = HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, + collation, loadVec, NULL, scan, enablePQ, pqinfo); + if (!isVisible) { + elog(ERROR, "hnsw entryPoint is invisible\n"); + } + } + return hc; +} + +#define HnswGetPairingHeapCandidate(membername, ptr) \ +(pairingheap_container(HnswPairingHeapNode, membername, ptr)->inner) +#define HnswGetPairingHeapCandidateConst(membername, ptr) \ +(pairingheap_const_container(HnswPairingHeapNode, membername, ptr)->inner) + +/* + * Compare candidate distances + */ +static int CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (HnswGetPairingHeapCandidateConst(c_node, a)->distance < HnswGetPairingHeapCandidateConst(c_node, b)->distance) { + return 1; + } + if (HnswGetPairingHeapCandidateConst(c_node, a)->distance > HnswGetPairingHeapCandidateConst(c_node, b)->distance) { + return -1; + } + + return 0; +} + +/* + * Compare candidate distances + */ +static int CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (HnswGetPairingHeapCandidateConst(w_node, a)->distance < HnswGetPairingHeapCandidateConst(w_node, b)->distance) { + return -1; + } + if (HnswGetPairingHeapCandidateConst(w_node, a)->distance > HnswGetPairingHeapCandidateConst(w_node, b)->distance) { + return 1; + } + + return 0; +} + +/* + * Create a pairing heap node for a candidate + */ +static HnswPairingHeapNode *CreatePairingHeapNode(HnswCandidate *c) +{ + HnswPairingHeapNode *node = (HnswPairingHeapNode *)palloc(sizeof(HnswPairingHeapNode)); + + node->inner = c; + return node; +} + +/* + * Init visited + */ +static inline void InitVisited(char *base, VisitedHash *v, Relation index, int ef, int m) +{ + if (index != NULL) { + v->tids = tidhash_create(CurrentMemoryContext, ef * m * 2, NULL); + } else if (base != NULL) { + v->offsets = offsethash_create(CurrentMemoryContext, ef * m * 2, NULL); + } else { + v->pointers = pointerhash_create(CurrentMemoryContext, ef * m * 2, NULL); + } +} + +/* + * Add to visited + */ +static inline void AddToVisited(char *base, VisitedHash *v, HnswCandidate *hc, Relation index, bool *found) +{ + if (index != NULL) { + HnswElement element = (HnswElement)HnswPtrAccess(base, hc->element); + ItemPointerData indextid; + + ItemPointerSet(&indextid, element->blkno, element->offno); + tidhash_insert(v->tids, indextid, found); + } else if (base != NULL) { + offsethash_insert(v->offsets, HnswPtrOffset(hc->element), found); + } else { + pointerhash_insert(v->pointers, (uintptr_t)HnswPtrPointer(hc->element), found); + } +} + +/* + * Count element towards ef + */ +static inline bool CountElement(char *base, HnswElement skipElement, HnswElement e) +{ + if (skipElement == NULL) { + return true; + } + + /* Ensure does not access heaptidsLength during in-memory build */ + pg_memory_barrier(); + + return e->heaptidsLength != 0; +} + +/* + * Load unvisited neighbors from memory + */ +static void +HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement *unvisited, int *unvisitedLength, + VisitedHash *v, int lc, HnswNeighborArray *neighborhoodData, Size neighborhoodSize) +{ + /* Get the neighborhood at layer lc */ + HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc); + + /* Copy neighborhood to local memory */ + LWLockAcquire(&element->lock, LW_SHARED); + memcpy(neighborhoodData, neighborhood, neighborhoodSize); + LWLockRelease(&element->lock); + neighborhood = neighborhoodData; + + *unvisitedLength = 0; + + for (int i = 0; i < neighborhood->length; i++) { + HnswCandidate *hc = &neighborhood->items[i]; + bool found; + + AddToVisited(base, v, hc, NULL, &found); + + if (!found) { + unvisited[(*unvisitedLength)++] = (HnswElement)HnswPtrAccess(base, hc->element); + } + } +} + +/* + * Load unvisited neighbors from disk + */ +static void +HnswLoadUnvisitedFromDisk(HnswElement element, HnswElement *unvisited, int *unvisitedLength, + VisitedHash *v, Relation index, int m, int lm, int lc) +{ + Buffer buf; + Page page; + HnswNeighborTuple ntup; + int start; + ItemPointerData indextids[HNSW_MAX_M * 2]; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + start = (element->level - lc) * m; + + /* Copy to minimize lock time */ + memcpy(&indextids, ntup->indextids + start, lm * sizeof(ItemPointerData)); + + UnlockReleaseBuffer(buf); + + *unvisitedLength = 0; + + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &indextids[i]; + bool found; + + if (!ItemPointerIsValid(indextid)) { + break; + } + + tidhash_insert(v->tids, *indextid, &found); + + if (!found) { + unvisited[(*unvisitedLength)++] = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), + ItemPointerGetOffsetNumber(indextid)); + } + } +} + +/* + * Algorithm 2 from paper + */ +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, + Oid collation, int m, bool inserting, HnswElement skipElement, + IndexScanDesc scan, bool enablePQ, PQSearchInfo *pqinfo) +{ + List *w = NIL; + pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); + pairingheap *W = pairingheap_allocate(CompareFurthestCandidates, NULL); + int wlen = 0; + VisitedHash v; + ListCell *lc2; + HnswNeighborArray *neighborhoodData = NULL; + Size neighborhoodSize; + bool isVisible = true; + int lm = HnswGetLayerM(m, lc); + HnswElement *unvisited = (HnswElementData**)palloc(lm * sizeof(HnswElement)); + int unvisitedLength; + errno_t rc = EOK; + int vNum = 0; + int threshold = u_sess->datavec_ctx.hnsw_earlystop_threshold; + bool enableEarlyStop = threshold == INT32_MAX ? false : true; + + InitVisited(base, &v, index, ef, m); + + /* Create local memory for neighborhood if needed */ + if (index == NULL) { + neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm); + neighborhoodData = (HnswNeighborArray *)palloc(neighborhoodSize); + } + + /* Add entry points to v, C, and W */ + foreach (lc2, ep) { + HnswCandidate *hc = (HnswCandidate *)lfirst(lc2); + bool found; + HnswPairingHeapNode *node; + + AddToVisited(base, &v, hc, index, &found); + + node = CreatePairingHeapNode(hc); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); + + /* + * Do not count elements being deleted towards ef when vacuuming. It + * would be ideal to do this for inserts as well, but this could + * affect insert performance. + */ + if (CountElement(base, skipElement, (HnswElement)HnswPtrAccess(base, hc->element))) { + wlen++; + } + } + + while (!pairingheap_is_empty(C)) { + HnswCandidate *c = HnswGetPairingHeapCandidate(c_node, pairingheap_remove_first(C)); + HnswCandidate *f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); + HnswElement cElement; + + if (c->distance > f->distance || (enableEarlyStop && vNum == threshold)) + break; + + cElement = (HnswElement)HnswPtrAccess(base, c->element); + + if (index == NULL) { + HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, &v, + lc, neighborhoodData, neighborhoodSize); + } else { + HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, &v, index, m, lm, lc); + } + + for (int i = 0; i < unvisitedLength; i++) { + HnswElement eElement = unvisited[i]; + float eDistance; + bool alwaysAdd = wlen < ef; + + f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); + + if (index == NULL) { + if (enablePQ) { + uint8 *ePQCode = (uint8*)HnswPtrAccess(base, eElement->pqcodes); + GetPQDistance(ePQCode, pqinfo->qPQCode, &pqinfo->params, pqinfo->pqDistanceTable, &eDistance); + } else { + eDistance = GetCandidateDistance(base, eElement, q, procinfo, collation); + } + } else { + HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, + alwaysAdd ? NULL : &f->distance, NULL, enablePQ, pqinfo); + } + + if (eDistance < f->distance || alwaysAdd) { + HnswCandidate *e; + HnswPairingHeapNode *node; + vNum = 0; + + Assert(!eElement->deleted); + + /* Make robust to issues */ + if (eElement->level < lc) { + continue; + } + /* Create a new candidate */ + e = (HnswCandidate *)palloc(sizeof(HnswCandidate)); + HnswPtrStore(base, e->element, eElement); + e->distance = eDistance; + + node = CreatePairingHeapNode(e); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); + + /* + * Do not count elements being deleted towards ef when + * vacuuming. It would be ideal to do this for inserts as + * well, but this could affect insert performance. + */ + if (CountElement(base, skipElement, eElement)) { + wlen++; + + /* No need to decrement wlen */ + if (wlen > ef) { + pairingheap_remove_first(W); + } + } + } else { + vNum++; + } + + if (enableEarlyStop && vNum == threshold) { + break; + } + } + } + + /* Add each element of W to w */ + while (!pairingheap_is_empty(W)) { + HnswCandidate *hc = HnswGetPairingHeapCandidate(w_node, pairingheap_remove_first(W)); + + w = lcons(hc, w); + } + + return w; +} + +/* + * Compare candidate distances with pointer tie-breaker + */ +static int + CompareCandidateDistances(const void *a, const void *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(*(ListCell **)a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(*(ListCell **)b); + + if (hca->distance < hcb->distance) { + return 1; + } + + if (hca->distance > hcb->distance) { + return -1; + } + + if (HnswPtrPointer(hca->element) < HnswPtrPointer(hcb->element)) { + return 1; + } + + if (HnswPtrPointer(hca->element) > HnswPtrPointer(hcb->element)) { + return -1; + } + + return 0; +} + +/* + * Compare candidate distances with offset tie-breaker + */ +static int + CompareCandidateDistancesOffset(const void *a, const void *b) +{ + HnswCandidate *hca = (HnswCandidate *)lfirst(*(ListCell **)a); + HnswCandidate *hcb = (HnswCandidate *)lfirst(*(ListCell **)b); + + if (hca->distance < hcb->distance) { + return 1; + } + + if (hca->distance > hcb->distance) { + return -1; + } + + if (HnswPtrOffset(hca->element) < HnswPtrOffset(hcb->element)) { + return 1; + } + + if (HnswPtrOffset(hca->element) > HnswPtrOffset(hcb->element)) { + return -1; + } + + return 0; +} + +/* + * Calculate the distance between elements + */ +static float HnswGetDistance(char *base, HnswElement a, HnswElement b, FmgrInfo *procinfo, Oid collation) +{ + Datum aValue = HnswGetValue(base, a); + Datum bValue = HnswGetValue(base, b); + + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, aValue, bValue)); +} + +/* + * Check if an element is closer to q than any element from R + */ +static bool CheckElementCloser(char *base, HnswCandidate *e, List *r, FmgrInfo *procinfo, Oid collation) +{ + HnswElement eElement = (HnswElement)HnswPtrAccess(base, e->element); + ListCell *lc2; + + foreach (lc2, r) { + HnswCandidate *ri = (HnswCandidate *)lfirst(lc2); + HnswElement riElement = (HnswElement)HnswPtrAccess(base, ri->element); + float distance = HnswGetDistance(base, eElement, riElement, procinfo, collation); + + if (distance <= e->distance) { + return false; + } + } + + return true; +} + +/* + * Algorithm 4 from paper + */ +static List *SelectNeighbors(char *base, List *c, int lm, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, + HnswCandidate *newCandidate, HnswCandidate **pruned, bool sortCandidates) +{ + List *r = NIL; + List *w = list_copy(c); + HnswCandidate **wd; + int wdlen = 0; + int wdoff = 0; + HnswNeighborArray *neighbors = HnswGetNeighbors(base, e2, lc); + bool mustCalculate = !neighbors->closerSet; + List *added = NIL; + bool removedAny = false; + + if (list_length(w) <= lm) { + return w; + } + + wd = (HnswCandidate **)palloc(sizeof(HnswCandidate *) * list_length(w)); + + /* Ensure order of candidates is deterministic for closer caching */ + if (sortCandidates) { + if (base == NULL) { + list_sort(w, CompareCandidateDistances); + } else { + list_sort(w, CompareCandidateDistancesOffset); + } + } + + while (list_length(w) > 0 && list_length(r) < lm) { + /* Assumes w is already ordered desc */ + HnswCandidate *e = (HnswCandidate *)linitial(w); + + w = list_delete_first(w); + + /* Use previous state of r and wd to skip work when possible */ + if (mustCalculate) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + } else if (list_length(added) > 0) { + /* Keep Valgrind happy for in-memory, parallel builds */ + if (base != NULL) { + VALGRIND_MAKE_MEM_DEFINED(&e->closer, 1); + } + + /* + * If the current candidate was closer, we only need to compare it + * with the other candidates that we have added. + */ + if (e->closer) { + e->closer = CheckElementCloser(base, e, added, procinfo, collation); + + if (!e->closer) { + removedAny = true; + } + } else { + /* + * If we have removed any candidates from closer, a candidate + * that was not closer earlier might now be. + */ + if (removedAny) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + if (e->closer) { + added = lappend(added, e); + } + } + } + } else if (e == newCandidate) { + e->closer = CheckElementCloser(base, e, r, procinfo, collation); + if (e->closer) { + added = lappend(added, e); + } + } + + /* Keep Valgrind happy for in-memory, parallel builds */ + if (base != NULL) { + VALGRIND_MAKE_MEM_DEFINED(&e->closer, 1); + } + + if (e->closer) { + r = lappend(r, e); + } else { + wd[wdlen++] = e; + } + } + + /* Cached value can only be used in future if sorted deterministically */ + neighbors->closerSet = sortCandidates; + + /* Keep pruned connections */ + while (wdoff < wdlen && list_length(r) < lm) { + r = lappend(r, wd[wdoff++]); + } + + /* Return pruned for update connections */ + if (pruned != NULL) { + if (wdoff < wdlen) { + *pruned = wd[wdoff]; + } else { + *pruned = (HnswCandidate *)linitial(w); + } + } + + return r; +} + +/* + * Add connections + */ +static void AddConnections(char *base, HnswElement element, List *neighbors, int lc) +{ + ListCell *lc2; + HnswNeighborArray *a = HnswGetNeighbors(base, element, lc); + + foreach (lc2, neighbors) + a->items[a->length++] = *((HnswCandidate *)lfirst(lc2)); +} + +/* + * Update connections + */ +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate *hc, int lm, int lc, int *updateIdx, + Relation index, FmgrInfo *procinfo, Oid collation) +{ + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + HnswNeighborArray *currentNeighbors = HnswGetNeighbors(base, hce, lc); + HnswCandidate hc2; + + HnswPtrStore(base, hc2.element, element); + hc2.distance = hc->distance; + + if (currentNeighbors->length < lm) { + currentNeighbors->items[currentNeighbors->length++] = hc2; + + /* Track update */ + if (updateIdx != NULL) { + *updateIdx = -2; + } + } else { + /* Shrink connections */ + HnswCandidate *pruned = NULL; + + /* Load elements on insert */ + if (index != NULL) { + Datum q = HnswGetValue(base, hce); + + for (int i = 0; i < currentNeighbors->length; i++) { + HnswCandidate *hc3 = ¤tNeighbors->items[i]; + HnswElement hc3Element = (HnswElement)HnswPtrAccess(base, hc3->element); + + if (HnswPtrIsNull(base, hc3Element->value)) + HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); + else + hc3->distance = GetCandidateDistance(base, hc3Element, q, procinfo, collation); + + /* Prune element if being deleted */ + if (hc3Element->heaptidsLength == 0) { + pruned = ¤tNeighbors->items[i]; + break; + } + } + } + + if (pruned == NULL) { + List *c = NIL; + + /* Add candidates */ + for (int i = 0; i < currentNeighbors->length; i++) { + c = lappend(c, ¤tNeighbors->items[i]); + } + c = lappend(c, &hc2); + + SelectNeighbors(base, c, lm, lc, procinfo, collation, hce, &hc2, &pruned, true); + + /* Should not happen */ + if (pruned == NULL) + return; + } + + /* Find and replace the pruned element */ + for (int i = 0; i < currentNeighbors->length; i++) { + if (HnswPtrEqual(base, currentNeighbors->items[i].element, pruned->element)) { + currentNeighbors->items[i] = hc2; + + /* Track update */ + if (updateIdx != NULL) { + *updateIdx = i; + } + + break; + } + } + } +} + +/* + * Remove elements being deleted or skipped + */ +static List *RemoveElements(char *base, List *w, HnswElement skipElement) +{ + ListCell *lc2; + List *w2 = NIL; + + /* Ensure does not access heaptidsLength during in-memory build */ + pg_memory_barrier(); + + foreach (lc2, w) { + HnswCandidate *hc = (HnswCandidate *)lfirst(lc2); + HnswElement hce = (HnswElement)HnswPtrAccess(base, hc->element); + + /* Skip self for vacuuming update */ + if (skipElement != NULL && hce->blkno == skipElement->blkno && hce->offno == skipElement->offno) { + continue; + } + + if (hce->heaptidsLength != 0) { + w2 = lappend(w2, hc); + } + } + + return w2; +} + +/* + * Algorithm 1 from paper + */ +void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, + FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing, + bool enablePQ, PQParams *params) +{ + List *ep; + List *w; + int level = element->level; + int entryLevel; + Datum q = HnswGetValue(base, element); + HnswElement skipElement = existing ? element : NULL; + + if (enablePQ) { + /* compute pq code */ + Size codesize = params->pqM * sizeof(uint8); + uint8 *pqcode = (uint8*)palloc(codesize); + ComputeVectorPQCode(DatumGetVector(q)->x, params, pqcode); + Pointer codePtr = (Pointer)HnswPtrAccess(base, element->pqcodes); + errno_t err = memcpy_s(codePtr, codesize, pqcode, codesize); + securec_check(err, "\0", "\0"); + } + + /* No neighbors if no entry point */ + if (entryPoint == NULL) + return; + + /* Get entry point and level */ + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, true)); + entryLevel = entryPoint->level; + + /* 1st phase: greedy search to insert level */ + for (int lc = entryLevel; lc >= level + 1; lc--) { + w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, true, skipElement); + ep = w; + } + + if (level > entryLevel) { + level = entryLevel; + } + + /* Add one for existing element */ + if (existing) { + efConstruction++; + } + /* 2nd phase */ + for (int lc = level; lc >= 0; lc--) { + int lm = HnswGetLayerM(m, lc); + List *neighbors; + List *lw; + + w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, procinfo, collation, m, true, skipElement); + + /* Elements being deleted or skipped can help with search */ + /* but should be removed before selecting neighbors */ + if (index != NULL) + lw = RemoveElements(base, w, skipElement); + else + lw = w; + + /* + * Candidates are sorted, but not deterministically. Could set + * sortCandidates to true for in-memory builds to enable closer + * caching, but there does not seem to be a difference in performance. + */ + neighbors = SelectNeighbors(base, lw, lm, lc, procinfo, collation, element, NULL, NULL, false); + + AddConnections(base, element, neighbors, lc); + + ep = w; + } +} + +/* +* Get the info related to pqTable in metapage +*/ +void HnswGetPQInfoFromMetaPage(Relation index, uint16 *pqTableNblk, uint32 *pqTableSize, + uint16 *pqDisTableNblk, uint32 *pqDisTableSize) +{ + Buffer buf; + Page page; + + buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + HnswMetaPage metap = HnswPageGetMeta(page); + PG_TRY(); + { + if (unlikely(metap->magicNumber != HNSW_MAGIC_NUMBER)) { + elog(ERROR, "hnsw index is not valid"); + } + } + PG_CATCH(); + { + UnlockReleaseBuffer(buf); + PG_RE_THROW(); + } + PG_END_TRY(); + + if (pqTableNblk != NULL) { + *pqTableNblk = metap->pqTableNblk; + } + if (pqTableSize != NULL) { + *pqTableSize = metap->pqTableSize; + } + if (pqDisTableNblk != NULL) { + *pqDisTableNblk = metap->pqDisTableNblk; + } + if (pqDisTableSize != NULL) { + *pqDisTableSize = metap->pqDisTableSize; + } + + UnlockReleaseBuffer(buf); +} + +int getPQfunctionType(FmgrInfo *procinfo, FmgrInfo *normprocinfo) +{ + if (procinfo->fn_oid == 8431) { + return HNSW_PQ_DIS_L2; + } else if (procinfo->fn_oid == 8434) { + if (normprocinfo == NULL) { + return HNSW_PQ_DIS_IP; + } else { + return HNSW_PQ_DIS_COSINE; + } + } else { + ereport(ERROR, (errmsg("current data type or distance type can't support hnswpq."))); + return -1; + } +} + +void InitPQParamsOnDisk(PQParams *params, Relation index, FmgrInfo *procinfo, int dim, bool *enablePQ) +{ + const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); + Buffer buf = ReadBuffer(index, HNSW_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + Page page = BufferGetPage(buf); + HnswMetaPage metap = HnswPageGetMeta(page); + *enablePQ = metap->enablePQ; + params->pqM = metap->pqM; + params->pqKsub = metap->pqKsub; + UnlockReleaseBuffer(buf); + int pqMode = HNSW_PQMODE_DEFAULT; + + if (*enablePQ && !g_instance.pq_inited) { + ereport(ERROR, (errmsg("the SQL involves operations related to HNSWPQ, " + "but this instance has not currently loaded the PQ dynamic library."))); + } + + if (*enablePQ) { + params->funcType = getPQfunctionType(procinfo, HnswOptionalProcInfo(index, HNSW_NORM_PROC)); + params->dim = dim; + params->subItemSize = typeInfo->itemSize(dim / params->pqM); + /* Now save pqTable and pqDistanceTable in the relcache entry. */ + if (index->pqTable == NULL) { + MemoryContext oldcxt = MemoryContextSwitchTo(index->rd_indexcxt); + index->pqTable = LoadPQtable(index); + (void)MemoryContextSwitchTo(oldcxt); + } + if (index->pqDistanceTable == NULL && pqMode == HNSW_PQMODE_SDC) { + MemoryContext oldcxt = MemoryContextSwitchTo(index->rd_indexcxt); + index->pqDistanceTable = LoadPQDisTable(index); + (void)MemoryContextSwitchTo(oldcxt); + } + params->pqTable = index->pqTable; + } else { + params->pqTable = NULL; + } + +} + +static void SparsevecCheckValue(Pointer v) +{ + SparseVector *vec = (SparseVector *)v; + + if (vec->nnz > HNSW_MAX_NNZ) { + elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ); + } +} + +/* + * Get type info + */ +const HnswTypeInfo *HnswGetTypeInfo(Relation index) +{ + FmgrInfo *procinfo = HnswOptionalProcInfo(index, HNSW_TYPE_INFO_PROC); + + if (procinfo == NULL) { + static const HnswTypeInfo typeInfo = { + .maxDimensions = HNSW_MAX_DIM, .supportPQ = true, + .itemSize = VectorItemSize, .normalize = l2_normalize, .checkValue = NULL}; + return (&typeInfo); + } else { + return (const HnswTypeInfo *)DatumGetPointer(OidFunctionCall0Coll(procinfo->fn_oid, InvalidOid)); + } +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_halfvec_support); +Datum hnsw_halfvec_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = { + .maxDimensions = HNSW_MAX_DIM * 2, .supportPQ = false, + .itemSize = HalfvecItemSize, .normalize = halfvec_l2_normalize, .checkValue = NULL}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_bit_support); +Datum hnsw_bit_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = {.maxDimensions = HNSW_MAX_DIM * 32, .supportPQ = false, + .itemSize = BitItemSize, .normalize = NULL, .checkValue = NULL}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_sparsevec_support); +Datum hnsw_sparsevec_support(PG_FUNCTION_ARGS) +{ + static const HnswTypeInfo typeInfo = { + .maxDimensions = SPARSEVEC_MAX_DIM, .supportPQ = false, + .itemSize = NULL, .normalize = sparsevec_l2_normalize, .checkValue = SparsevecCheckValue}; + + PG_RETURN_POINTER(&typeInfo); +}; diff --git a/src/gausskernel/storage/access/datavec/hnswvacuum.cpp b/src/gausskernel/storage/access/datavec/hnswvacuum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca5fe81fdd0701e34c62e9f55fefef35a5b28732 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/hnswvacuum.cpp @@ -0,0 +1,633 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnswvacuum.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/hnswvacuum.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "commands/vacuum.h" +#include "access/datavec/hnsw.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Check if deleted list contains an index TID + */ +static bool DeletedContains(tidhash_hash *deleted, ItemPointer indextid) +{ + return tidhash_lookup(deleted, *indextid) != NULL; +} + +/* + * Remove deleted heap TIDs + * + * OK to remove for entry point, since always considered for searches and inserts + */ +static void RemoveHeapTids(HnswVacuumState *vacuumstate) +{ + BlockNumber blkno = vacuumstate->hnswHeadBlkno; + HnswElement highestPoint = &vacuumstate->highestPoint; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + HnswElement entryPoint = HnswGetEntryPoint(vacuumstate->index); + IndexBulkDeleteResult *stats = vacuumstate->stats; + + /* Store separately since highestPoint.level is uint8 */ + int highestLevel = -1; + + /* Initialize highest point */ + highestPoint->blkno = InvalidBlockNumber; + highestPoint->offno = InvalidOffsetNumber; + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + bool updated = false; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Iterate over nodes */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + int idx = 0; + bool itemUpdated = false; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + if (ItemPointerIsValid(&etup->heaptids[0])) { + for (int i = 0; i < HNSW_HEAPTIDS; i++) { + /* Stop at first unused */ + if (!ItemPointerIsValid(&etup->heaptids[i])) + break; + + if (vacuumstate->callback(&etup->heaptids[i], vacuumstate->callbackState, InvalidOid, + InvalidBktId)) { + itemUpdated = true; + stats->tuples_removed++; + } else { + /* Move to front of list */ + etup->heaptids[idx++] = etup->heaptids[i]; + stats->num_index_tuples++; + } + } + + if (itemUpdated) { + /* Mark rest as invalid */ + for (int i = idx; i < HNSW_HEAPTIDS; i++) + ItemPointerSetInvalid(&etup->heaptids[i]); + + updated = true; + } + } + + if (!ItemPointerIsValid(&etup->heaptids[0])) { + ItemPointerData ip; + bool found; + + /* Add to deleted list */ + ItemPointerSet(&ip, blkno, offno); + + tidhash_insert(vacuumstate->deleted, ip, &found); + Assert(!found); + } else if (etup->level > highestLevel && + !(entryPoint != NULL && blkno == entryPoint->blkno && offno == entryPoint->offno)) { + /* Keep track of highest non-entry point */ + highestPoint->blkno = blkno; + highestPoint->offno = offno; + highestPoint->level = etup->level; + highestLevel = etup->level; + } + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + if (updated) + GenericXLogFinish(state); + else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } +} + +/* + * Check for deleted neighbors + */ +static bool NeedsUpdated(HnswVacuumState *vacuumstate, HnswElement element) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + Buffer buf; + Page page; + HnswNeighborTuple ntup; + bool needsUpdated = false; + + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + ntup = (HnswNeighborTuple)PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + + Assert(HnswIsNeighborTuple(ntup)); + + /* Check neighbors */ + for (int i = 0; i < ntup->count; i++) { + ItemPointer indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + /* Check if in deleted list */ + if (DeletedContains(vacuumstate->deleted, indextid)) { + needsUpdated = true; + break; + } + } + + /* Also update if layer 0 is not full */ + /* This could indicate too many candidates being deleted during insert */ + if (!needsUpdated) + needsUpdated = !ItemPointerIsValid(&ntup->indextids[ntup->count - 1]); + + UnlockReleaseBuffer(buf); + + return needsUpdated; +} + +/* + * Repair graph for a single element + */ +static void RepairGraphElement(HnswVacuumState *vacuumstate, HnswElement element, HnswElement entryPoint) +{ + Relation index = vacuumstate->index; + Buffer buf; + Page page; + GenericXLogState *state; + int m = vacuumstate->m; + int efConstruction = vacuumstate->efConstruction; + FmgrInfo *procinfo = vacuumstate->procinfo; + Oid collation = vacuumstate->collation; + BufferAccessStrategy bas = vacuumstate->bas; + HnswNeighborTuple ntup = vacuumstate->ntup; + Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); + char *base = NULL; + + /* Skip if element is entry point */ + if (entryPoint != NULL && element->blkno == entryPoint->blkno && element->offno == entryPoint->offno) { + return; + } + + /* Init fields */ + HnswInitNeighbors(base, element, m, NULL); + element->heaptidsLength = 0; + + /* Find neighbors for element, skipping itself */ + HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, + m, efConstruction, true, false, NULL); + + /* Zero memory for each element */ + MemSet(ntup, 0, HNSW_TUPLE_ALLOC_SIZE); + + /* Update neighbor tuple */ + /* Do this before getting page to minimize locking */ + HnswSetNeighborTuple(base, ntup, element, m); + + /* Get neighbor page */ + buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + /* Overwrite tuple */ + if (!page_index_tuple_overwrite(page, element->neighborOffno, (Item)ntup, ntupSize)) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Commit */ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); + + /* Update neighbors */ + HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, true, false); +} + +/* + * Repair graph entry point + */ +static void RepairGraphEntryPoint(HnswVacuumState *vacuumstate) +{ + Relation index = vacuumstate->index; + HnswElement highestPoint = &vacuumstate->highestPoint; + HnswElement entryPoint; + MemoryContext oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + if (!BlockNumberIsValid(highestPoint->blkno)) + highestPoint = NULL; + + /* + * Repair graph for highest non-entry point. Highest point may be outdated + * due to inserts that happen during and after RemoveHeapTids. + */ + if (highestPoint != NULL) { + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, ShareLock); + + /* Load element */ + HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + + /* Repair if needed */ + if (NeedsUpdated(vacuumstate, highestPoint)) + RepairGraphElement(vacuumstate, highestPoint, HnswGetEntryPoint(index)); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ShareLock); + } + + /* Prevent concurrent inserts when possibly updating entry point */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Get latest entry point */ + entryPoint = HnswGetEntryPoint(index); + if (entryPoint != NULL) { + ItemPointerData epData; + + ItemPointerSet(&epData, entryPoint->blkno, entryPoint->offno); + + if (DeletedContains(vacuumstate->deleted, &epData)) { + /* + * Replace the entry point with the highest point. If highest + * point is outdated and empty, the entry point will be empty + * until an element is repaired. + */ + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_ALWAYS, highestPoint, InvalidBlockNumber, MAIN_FORKNUM, false); + } else { + /* + * Repair the entry point with the highest point. If highest point + * is outdated, this can remove connections at higher levels in + * the graph until they are repaired, but this should be fine. + */ + HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + + if (NeedsUpdated(vacuumstate, entryPoint)) { + /* Reset neighbors from previous update */ + if (highestPoint != NULL) + HnswPtrStore((char *)NULL, highestPoint->neighbors, (HnswNeighborArrayPtr *)NULL); + + RepairGraphElement(vacuumstate, entryPoint, highestPoint); + } + } + } + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); +} + +/* + * Repair graph for all elements + */ +static void RepairGraph(HnswVacuumState *vacuumstate) +{ + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + BlockNumber blkno = vacuumstate->hnswHeadBlkno; + + /* + * Wait for inserts to complete. Inserts before this point may have + * neighbors about to be deleted. Inserts after this point will not. + */ + LockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_UPDATE_LOCK, ExclusiveLock); + + /* Repair entry point first */ + RepairGraphEntryPoint(vacuumstate); + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + OffsetNumber offno; + OffsetNumber maxoffno; + List *elements = NIL; + ListCell *lc2; + MemoryContext oldCtx; + + vacuum_delay_point(); + + oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Load items into memory to minimize locking */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + HnswElement element; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip updating neighbors if being deleted */ + if (!ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Create an element */ + element = HnswInitElementFromBlock(blkno, offno); + HnswLoadElementFromTuple(element, etup, false, true); + + elements = lappend(elements, element); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + + /* Update neighbor pages */ + foreach (lc2, elements) { + HnswElement element = (HnswElement)lfirst(lc2); + HnswElement entryPoint; + LOCKMODE lockmode = ShareLock; + + /* Check if any neighbors point to deleted values */ + if (!NeedsUpdated(vacuumstate, element)) + continue; + + /* Get a shared lock */ + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Refresh entry point for each element */ + entryPoint = HnswGetEntryPoint(index); + /* Prevent concurrent inserts when likely updating entry point */ + if (entryPoint == NULL || element->level > entryPoint->level) { + /* Release shared lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get exclusive lock */ + lockmode = ExclusiveLock; + LockPage(index, HNSW_UPDATE_LOCK, lockmode); + + /* Get latest entry point after lock is acquired */ + entryPoint = HnswGetEntryPoint(index); + } + + /* Repair connections */ + RepairGraphElement(vacuumstate, element, entryPoint); + + /* + * Update metapage if needed. Should only happen if entry point + * was replaced and highest point was outdated. + */ + if (entryPoint == NULL || element->level > entryPoint->level) + HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, false); + + /* Release lock */ + UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); + } + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(vacuumstate->tmpCtx); + } +} + +/* + * Mark items as deleted + */ +static void MarkDeleted(HnswVacuumState *vacuumstate) +{ + BlockNumber blkno = vacuumstate->hnswHeadBlkno; + BlockNumber insertPage = InvalidBlockNumber; + Relation index = vacuumstate->index; + BufferAccessStrategy bas = vacuumstate->bas; + + /* + * Wait for index scans to complete. Scans before this point may contain + * tuples about to be deleted. Scans after this point will not, since the + * graph has been repaired. + */ + LockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + UnlockPage(index, HNSW_SCAN_LOCK, ExclusiveLock); + + while (BlockNumberIsValid(blkno)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are pinned by + * other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + maxoffno = PageGetMaxOffsetNumber(page); + + /* Update element and neighbors together */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + HnswElementTuple etup = (HnswElementTuple)PageGetItem(page, PageGetItemId(page, offno)); + HnswNeighborTuple ntup; + Buffer nbuf; + Page npage; + BlockNumber neighborPage; + OffsetNumber neighborOffno; + + /* Skip neighbor tuples */ + if (!HnswIsElementTuple(etup)) + continue; + + /* Skip deleted tuples */ + if (etup->deleted) { + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + continue; + } + + /* Skip live tuples */ + if (ItemPointerIsValid(&etup->heaptids[0])) + continue; + + /* Get neighbor page */ + neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid); + neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid); + + if (neighborPage == blkno) { + nbuf = buf; + npage = page; + } else { + nbuf = ReadBufferExtended(index, MAIN_FORKNUM, neighborPage, RBM_NORMAL, bas); + LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE); + npage = GenericXLogRegisterBuffer(state, nbuf, 0); + } + + ntup = (HnswNeighborTuple)PageGetItem(npage, PageGetItemId(npage, neighborOffno)); + + /* Overwrite element */ + etup->deleted = 1; + MemSet(&etup->data, 0, VARSIZE_ANY(&etup->data)); + + /* Overwrite neighbors */ + for (int i = 0; i < ntup->count; i++) + ItemPointerSetInvalid(&ntup->indextids[i]); + + /* + * We modified the tuples in place, no need to call + * page_index_tuple_overwrite + */ + + /* Commit */ + GenericXLogFinish(state); + if (nbuf != buf) + UnlockReleaseBuffer(nbuf); + + /* Set to first free page */ + if (!BlockNumberIsValid(insertPage)) + insertPage = blkno; + + /* Prepare new xlog */ + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + blkno = HnswPageGetOpaque(page)->nextblkno; + + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } + + /* Update insert page last, after everything has been marked as deleted */ + HnswUpdateMetaPage(index, 0, NULL, insertPage, MAIN_FORKNUM, false); +} + +/* + * Initialize the vacuum state + */ +static void InitVacuumState(HnswVacuumState *vacuumstate, IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + Relation index = info->index; + uint16 pqTableNblk; + uint16 pqDisTableNblk; + + if (stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + vacuumstate->index = index; + vacuumstate->stats = stats; + vacuumstate->callback = callback; + vacuumstate->callbackState = callbackState; + vacuumstate->efConstruction = HnswGetEfConstruction(index); + vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD); + vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + vacuumstate->collation = index->rd_indcollation[0]; + vacuumstate->ntup = (HnswNeighborTuple)palloc0(HNSW_TUPLE_ALLOC_SIZE); + vacuumstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Hnsw vacuum temporary context", ALLOCSET_DEFAULT_SIZES); + + /* Get m from metapage */ + HnswGetMetaPageInfo(index, &vacuumstate->m, NULL); + HnswGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &pqDisTableNblk, NULL); + vacuumstate->hnswHeadBlkno = HNSW_PQTABLE_START_BLKNO + pqTableNblk + pqDisTableNblk; + + /* Create hash table */ + vacuumstate->deleted = tidhash_create(CurrentMemoryContext, 256, NULL); +} + +/* + * Free resources + */ +static void FreeVacuumState(HnswVacuumState *vacuumstate) +{ + tidhash_destroy(vacuumstate->deleted); + FreeAccessStrategy(vacuumstate->bas); + pfree(vacuumstate->ntup); + MemoryContextDelete(vacuumstate->tmpCtx); +} + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult *hnswbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + HnswVacuumState vacuumstate; + + InitVacuumState(&vacuumstate, info, stats, callback, callbackState); + + /* Pass 1: Remove heap TIDs */ + RemoveHeapTids(&vacuumstate); + + /* Pass 2: Repair graph */ + RepairGraph(&vacuumstate); + + /* Pass 3: Mark as deleted */ + MarkDeleted(&vacuumstate); + + FreeVacuumState(&vacuumstate); + + return vacuumstate.stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult *hnswvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/src/gausskernel/storage/access/datavec/ivfadaptor.cpp b/src/gausskernel/storage/access/datavec/ivfadaptor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0ddede4de4fcd258cc19a9057cd077cace94e29b --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfadaptor.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfadaptor.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfadaptor.cpp + * + * ------------------------------------------------------------------------- + */ +#include +#include "access/datavec/ivfflat.h" +#include "access/datavec/utils.h" + +int IvfComputePQTable(VectorArray samples, PQParams *params) +{ + return g_pq_func.ComputePQTable(samples, params); +} + +int IvfComputeVectorPQCode(float *vector, const PQParams *params, uint8 *pqCode) +{ + return g_pq_func.ComputeVectorPQCode(vector, params, pqCode); +} + +int IvfGetPQDistanceTableAdc(float *vector, const PQParams *params, float *pqDistanceTable) +{ + return g_pq_func.GetPQDistanceTableAdc(vector, params, pqDistanceTable); +} + +int IvfGetPQDistance(const uint8 *basecode, const uint8 *querycode, const PQParams *params, + const float *pqDistanceTable, float *pqDistance) +{ + return g_pq_func.GetPQDistance(basecode, querycode, params, pqDistanceTable, pqDistance); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/datavec/ivfbuild.cpp b/src/gausskernel/storage/access/datavec/ivfbuild.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f8fe6de9b53eb1e6b781dcba29699553466f5eb --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfbuild.cpp @@ -0,0 +1,1198 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfbuild.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfbuild.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/tableam.h" +#include "access/xact.h" +#include "access/datavec/bitvec.h" +#include "catalog/index.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "storage/buf/bufmgr.h" +#include "tcop/tcopprot.h" +#include "utils/memutils.h" +#include "access/datavec/vector.h" +#include "postmaster/bgworker.h" +#include "commands/vacuum.h" + +#include "pgstat.h" + +#define CALLBACK_ITEM_POINTER HeapTuple hup + +#define PARALLEL_KEY_IVFFLAT_SHARED UINT64CONST(0xA000000000000001) +#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002) +#define PARALLEL_KEY_IVFFLAT_CENTERS UINT64CONST(0xA000000000000003) +#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) + +/* + * Create PQ-related pages + */ +static void CreatePQPages(IvfflatBuildState *buildstate, ForkNumber fNum) +{ + uint16 nblks; + Relation index = buildstate->index; + ForkNumber forkNum = fNum; + Buffer buf; + Page page; + uint16 pqTableNblk; + uint32 pqPreComputeTableNblk; + GenericXLogState *state; + + IvfGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &pqPreComputeTableNblk, NULL); + + /* create pq table page */ + for (uint16 i = 0; i < pqTableNblk; i++) { + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + MarkBufferDirty(buf); + IvfflatCommitBuffer(buf, state); + } + + /* create pq distance table page */ + for (uint32 i = 0; i < pqPreComputeTableNblk; i++) { + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + MarkBufferDirty(buf); + IvfflatCommitBuffer(buf, state); + } +} + +/* + * Caculate Residual + */ +static void ComputeResidual(IvfflatBuildState *buildstate, Vector* sample, int list) +{ + Vector *vec = (Vector *)lfirst(buildstate->rlist->tail); + Vector *center = (Vector *)VectorArrayGet(buildstate->centers, list); + + if (buildstate->byResidual) { + for (int i = 0; i < buildstate->dimensions; i++) { + vec->x[i] = sample->x[i] -center->x[i]; + } + } else { + for (int i = 0; i < buildstate->dimensions; i++) { + vec->x[i] = sample->x[i]; + } + } +} + +/* + * Caculate square of L2 normalform + */ +static float ComputeNormL2sqr(float *x, int dsub) +{ + float res = 0.0f; + for (int i = 0; i < dsub; i++) { + res += x[i] * x[i]; + } + return res; +} + +static void ComputeInnerProdAndSum(IvfflatBuildState *buildstate, float * l2Norm, float *center, float * tab, int dsub) +{ + Size itemSize = MAXALIGN(buildstate->typeInfo->itemSize(dsub)); + const float MULTIPLIER = 2.0; + + for (int i = 0; i < buildstate->pqM; i++) { + for (int j = 0; j < buildstate->pqKsub; j++) { + float *x = DatumGetVector(buildstate->pqTable + ((i * buildstate->pqKsub + j) * itemSize))->x; + tab[i * buildstate->pqKsub + j] = VectorInnerProduct(dsub, x, center + i * dsub); + float *pretable = &tab[i * buildstate->pqKsub + j]; + VectorMadd(1, l2Norm + (i * buildstate->pqKsub + j), MULTIPLIER, pretable, pretable); + } + } +} + +/* + * Compute precalculated table + */ +static void ComputePreTable(IvfflatBuildState *buildstate) +{ + Size size = buildstate->pqKsub * buildstate->pqM * sizeof(float); + float *l2Norm = (float *)palloc0(size); + + int dsub = buildstate->dimensions / buildstate->pqM; + Size itemSize = MAXALIGN(buildstate->typeInfo->itemSize(dsub)); + + for (int m = 0; m < buildstate->pqM; m++) { + for (int j = 0; j < buildstate->pqKsub; j++) { + float *x = DatumGetVector(buildstate->pqTable + (m * buildstate->pqKsub + j) * itemSize)->x; + l2Norm[m * buildstate->pqKsub + j] = ComputeNormL2sqr(x, dsub); + } + } + + for (int n = 0; n < buildstate->lists; n++) { + float *tab = buildstate->preComputeTable + n * buildstate->pqM * buildstate->pqKsub; + Vector *center = (Vector *)VectorArrayGet(buildstate->centers, n); + ComputeInnerProdAndSum(buildstate, l2Norm, center->x, tab, dsub); + } + + pfree(l2Norm); +} + +/* + * Compute PQTable + */ +static void ComputeIvfPQ(IvfflatBuildState *buildstate) +{ + MemoryContext pqCtx = AllocSetContextCreate(CurrentMemoryContext, + "Ivfflat PQ temporary context", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(pqCtx); + + IvfComputePQTable(buildstate->residuals, buildstate->params); + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(pqCtx); +} + +/* + * Get all sample vector or residual vector to vector array + */ +static void CopyResidaulFromList(IvfflatBuildState *buildstate) +{ + if (buildstate->rlist == NIL) { + ereport(ERROR, (errmsg("when enable_pq = on, at least one vector needs to be include"))); + } + + ListCell *lc; + buildstate->residuals = VectorArrayInit( + buildstate->rlist->length, + buildstate->dimensions, + buildstate->typeInfo->itemSize(buildstate->dimensions) + ); + + foreach (lc, buildstate->rlist) { + Vector *vec = (Vector *)lfirst(lc); + Datum value = PointerGetDatum(vec); + value = PointerGetDatum(PG_DETOAST_DATUM(value)); + VectorArraySet(buildstate->residuals, buildstate->residuals->length, DatumGetPointer(value)); + buildstate->residuals->length++; + } + list_free_deep(buildstate->rlist); +} + +/* + * Init PQParam + */ +PQParams *InitIVFPQParamsInMemory(IvfflatBuildState *buildstate) +{ + PQParams *params = (PQParams*)palloc(sizeof(PQParams)); + params->pqM = buildstate->pqM; + params->pqKsub = buildstate->pqKsub; + params->funcType = getIVFPQfunctionType(buildstate->procinfo, buildstate->normprocinfo); + params->dim = buildstate->dimensions; + params->subItemSize = buildstate->typeInfo->itemSize(buildstate->dimensions / buildstate->pqM); + params->pqTable = buildstate->pqTable; + return params; +} + +/* + * Add sample + */ +static void AddSample(Datum *values, IvfflatBuildState *buildstate) +{ + VectorArray samples = buildstate->samples; + int targsamples = samples->maxlen; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* + * Normalize with KMEANS_NORM_PROC since spherical distance function + * expects unit vectors + */ + if (buildstate->kmeansnormprocinfo != NULL) { + if (!IvfflatCheckNorm(buildstate->kmeansnormprocinfo, buildstate->collation, value)) { + return; + } + + value = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); + } + + if (samples->length < targsamples) { + VectorArraySet(samples, samples->length, DatumGetPointer(value)); + samples->length++; + } else { + if (buildstate->rowstoskip < 0) { + buildstate->rowstoskip = anl_get_next_S(samples->length, targsamples, &buildstate->rstate); + } + + if (buildstate->rowstoskip <= 0) { + int k = static_cast(targsamples * anl_random_fract()); + Assert(k >= 0 && k < targsamples); + VectorArraySet(samples, k, DatumGetPointer(value)); + } + + buildstate->rowstoskip -= 1; + } +} + +/* + * Callback for sampling + */ +static void SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *)state; + MemoryContext oldCtx; + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add sample */ + AddSample(values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Sample rows with same logic as ANALYZE + */ +static void SampleRows(IvfflatBuildState *buildstate) +{ + int targsamples = buildstate->samples->maxlen; + BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap); + + buildstate->rowstoskip = -1; + + BlockSampler_Init(&buildstate->bs, totalblocks, targsamples); + + buildstate->rstate = anl_init_selection_state(targsamples); + while (BlockSampler_HasMore(&buildstate->bs)) { + BlockNumber targblock = BlockSampler_Next(&buildstate->bs); + + tableam_index_build_scan(buildstate->heap, buildstate->index, buildstate->indexInfo, false, SampleCallback, + (void *)buildstate, NULL, targblock, 1); + } +} + +/* + * Add tuple to sort + */ +static void AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState *buildstate) +{ + double distance; + double minDistance = DBL_MAX; + int closestCenter = 0; + VectorArray centers = buildstate->centers; + TupleTableSlot *slot = buildstate->slot; + + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + if (buildstate->normprocinfo != NULL) { + if (!IvfflatCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) { + return; + } + + value = IvfflatNormValue(buildstate->typeInfo, buildstate->collation, value); + } + + /* Find the list that minimizes the distance */ + for (int i = 0; i < centers->length; i++) { + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, value, + PointerGetDatum(VectorArrayGet(centers, i)))); + if (distance < minDistance) { + minDistance = distance; + closestCenter = i; + } + } + + Vector* residual = NULL; + if (buildstate->enablePQ) { + ComputeResidual(buildstate, DatumGetVector(value), closestCenter); + if (buildstate->byResidual) { + residual = (Vector *)lfirst(buildstate->rlist->tail); + } + } + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia += minDistance; + buildstate->listSums[closestCenter] += minDistance; + buildstate->listCounts[closestCenter]++; +#endif + + /* Create a virtual tuple */ + ExecClearTuple(slot); + slot->tts_values[IVF_LISTID - 1] = Int32GetDatum(closestCenter); + slot->tts_isnull[IVF_LISTID - 1] = false; + slot->tts_values[IVF_TID - 1] = PointerGetDatum(tid); + slot->tts_isnull[IVF_TID - 1] = false; + slot->tts_values[IVF_VECTOR - 1] = value; + slot->tts_isnull[IVF_VECTOR - 1] = false; + slot->tts_values[IVF_RESIDUAL - 1] = residual == NULL ? NULL : PointerGetDatum(residual); + slot->tts_isnull[IVF_RESIDUAL - 1] = residual == NULL ? true : false; + ExecStoreVirtualTuple(slot); + + /* + * Add tuple to sort + * + * tuplesort_puttupleslot comment: Input data is always copied; the caller + * need not save it. + */ + tuplesort_puttupleslot(buildstate->sortstate, slot); + + buildstate->indtuples++; +} + +/* + * Callback for table_index_build_scan + */ +static void BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, const bool *isnull, bool tupleIsAlive, + void *state) +{ + IvfflatBuildState *buildstate = (IvfflatBuildState *)state; + MemoryContext oldCtx; + + ItemPointer tid = &hup->t_self; + + /* Skip nulls */ + if (isnull[0]) { + return; + } + + Vector *vec = InitVector(buildstate->dimensions); + buildstate->rlist = lappend(buildstate->rlist, vec); + + /* Use memory context since detoast can allocate */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + /* Add tuple to sort */ + AddTupleToSort(index, tid, values, buildstate); + + /* Reset memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(buildstate->tmpCtx); +} + +/* + * Get index tuple from sort state + */ +static inline void GetNextTuple(Tuplesortstate *sortstate, TupleDesc tupdesc, TupleTableSlot *slot, IndexTuple *itup, + int *list) +{ + Datum value; + bool isnull; + + if (tuplesort_gettupleslot(sortstate, true, slot, NULL)) { + *list = DatumGetInt32(heap_slot_getattr(slot, 1, &isnull)); + value = heap_slot_getattr(slot, 3, &isnull); + + /* Form the index tuple */ + *itup = index_form_tuple(tupdesc, &value, &isnull); + (*itup)->t_tid = *((ItemPointer)DatumGetPointer(heap_slot_getattr(slot, 2, &isnull))); + } else { + *list = -1; + } +} + +/* + * Create initial entry pages + */ +static void InsertTuples(Relation index, IvfflatBuildState *buildstate, ForkNumber forkNum) +{ + int list; + IndexTuple itup = NULL; /* silence compiler warning */ + int64 inserted = 0; + + TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->tupdesc); + TupleDesc tupdesc = RelationGetDescr(index); + Size pqcodesSize = buildstate->pqcodeSize; + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + + /* Check vector and pqcode can be on the same page */ + if (list != -1) { + Size itemsize = MAXALIGN(IndexTupleSize(itup)); + Size emptyFreeSize = BLCKSZ - sizeof(IvfflatPageOpaqueData) - SizeOfPageHeaderData - sizeof(ItemIdData); + if (emptyFreeSize < itemsize + MAXALIGN(pqcodesSize)) { + int maxPQcodeSize = ((emptyFreeSize - itemsize) / 8) * 8; + ereport(ERROR, (errmsg("vector and pqcode must be on the same page, max pq_m is %d", maxPQcodeSize))); + } + } + + for (int i = 0; i < buildstate->centers->length; i++) { + Buffer buf; + Page page; + GenericXLogState *state; + BlockNumber startPage; + BlockNumber insertPage; + + /* Can take a while, so ensure we can interrupt */ + /* Needs to be called when no buffer locks are held */ + CHECK_FOR_INTERRUPTS(); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + startPage = BufferGetBlockNumber(buf); + + /* Get all tuples for list */ + while (list == i) { + /* Check for free space */ + Size itemsz = MAXALIGN(IndexTupleSize(itup)); + if (PageGetFreeSpace(page) < itemsz + MAXALIGN(pqcodesSize)) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + if (buildstate->enablePQ) { + bool isnull; + Size codesize = buildstate->params->pqM * sizeof(uint8); + uint8 *pqcode = (uint8 *)palloc(codesize); + Datum datum = buildstate->byResidual ? heap_slot_getattr(slot, 4, &isnull) : index_getattr(itup, 1, tupdesc, &isnull); + + IvfComputeVectorPQCode(DatumGetVector(datum)->x, buildstate->params, pqcode); + ((PageHeader)page)->pd_upper -= MAXALIGN(pqcodesSize); + errno_t rc = memcpy_s( + ((char *)page) + ((PageHeader)page)->pd_upper, pqcodesSize, (char *)pqcode, pqcodesSize); + securec_check_c(rc, "\0", "\0"); + } + + /* Add the item */ + if (PageAddItem(page, (Item)itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + pfree(itup); + + UpdateProgress(PROGRESS_CREATEIDX_TUPLES_DONE, ++inserted); + + GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list); + } + + insertPage = BufferGetBlockNumber(buf); + + IvfflatCommitBuffer(buf, state); + + /* Set the start and insert pages */ + IvfflatUpdateList(index, buildstate->listInfo[i], insertPage, InvalidBlockNumber, startPage, forkNum); + } +} + +/* + * Initialize the build state + */ +static void InitBuildState(IvfflatBuildState *buildstate, Relation heap, Relation index, IndexInfo *indexInfo) +{ + buildstate->heap = heap; + buildstate->index = index; + buildstate->indexInfo = indexInfo; + buildstate->typeInfo = IvfflatGetTypeInfo(index); + + buildstate->lists = IvfflatGetLists(index); + buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Disallow varbit since require fixed dimensions */ + if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) + elog(ERROR, "type not supported for ivfflat index"); + + /* Require column to have dimensions to be indexed */ + if (buildstate->dimensions < 0) + elog(ERROR, "column does not have dimensions"); + + if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) + elog(ERROR, "column cannot have more than %d dimensions for ivfflat index", + buildstate->typeInfo->maxDimensions); + + buildstate->reltuples = 0; + buildstate->indtuples = 0; + + /* Get support functions */ + buildstate->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + buildstate->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + buildstate->kmeansnormprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + buildstate->collation = index->rd_indcollation[0]; + + /* Require more than one dimension for spherical k-means */ + if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1) + elog(ERROR, "dimensions must be greater than one for this opclass"); + + /* Create tuple description for sorting */ + buildstate->tupdesc = CreateTemplateTupleDesc(IVF_NUM_COLUMNS, false); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)IVF_LISTID, "list", INT4OID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)IVF_TID, "tid", TIDOID, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)IVF_VECTOR, "vector", RelationGetDescr(index)->attrs[0].atttypid, -1, 0); + TupleDescInitEntry(buildstate->tupdesc, (AttrNumber)IVF_RESIDUAL, "residual", VECTOROID, -1, 0); + + buildstate->slot = MakeSingleTupleTableSlot(buildstate->tupdesc); + + buildstate->centers = VectorArrayInit(buildstate->lists, buildstate->dimensions, + buildstate->typeInfo->itemSize(buildstate->dimensions)); + buildstate->listInfo = (ListInfo *)palloc(sizeof(ListInfo) * buildstate->lists); + + buildstate->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat build temporary context", ALLOCSET_DEFAULT_SIZES); + +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = 0; + buildstate->listSums = palloc0(sizeof(double) * buildstate->lists); + buildstate->listCounts = palloc0(sizeof(int) * buildstate->lists); +#endif + buildstate->ivfleader = NULL; + + buildstate->enablePQ = IvfGetEnablePQ(index); + if (buildstate->enablePQ && !buildstate->typeInfo->supportPQ) { + ereport(ERROR, (errmsg("this data type cannot support ivfpq."))); + } + if (buildstate->enablePQ && !g_instance.pq_inited) { + ereport(ERROR, (errmsg("this instance has not currently loaded the pq dynamic library."))); + } + + buildstate->pqM = IvfGetPqM(index); + buildstate->pqKsub = IvfGetPqKsub(index); + buildstate->byResidual = IvfGetByResidual(index); + buildstate->rlist = NIL; + buildstate->residuals = NULL; + + if (buildstate->enablePQ) { + if (buildstate->dimensions % buildstate->pqM != 0) { + ereport(ERROR, (errmsg("dimensions must be divisible by pq_m, please reset pq_m."))); + } + Size subItemsize = buildstate->typeInfo->itemSize(buildstate->dimensions / buildstate->pqM); + subItemsize = MAXALIGN(subItemsize); + buildstate->pqTableSize = buildstate->pqM * buildstate->pqKsub * subItemsize; + buildstate->pqTable = (char*)palloc0(buildstate->pqTableSize); + buildstate->pqcodeSize = buildstate->pqM * sizeof(uint8); + buildstate->params = InitIVFPQParamsInMemory(buildstate); + + if (buildstate->byResidual && + (buildstate->params->funcType == IVF_PQ_DIS_L2 || buildstate->params->funcType == IVF_PQ_DIS_COSINE)) { + buildstate->preComputeTableSize = buildstate->lists * buildstate->pqM * buildstate->pqKsub; + buildstate->preComputeTable = (float*)palloc0(buildstate->preComputeTableSize * sizeof(float)); + } else { + buildstate->preComputeTableSize = 0; + buildstate->preComputeTable = NULL; + } + } else { + buildstate->pqTable = NULL; + buildstate->pqTableSize = 0; + buildstate->pqcodeSize = 0; + buildstate->params = NULL; + buildstate->preComputeTableSize = 0; + buildstate->preComputeTable = NULL; + } + buildstate->pqDistanceTable = NULL; +} + +/* + * Free resources + */ +static void FreeBuildState(IvfflatBuildState *buildstate) +{ + VectorArrayFree(buildstate->centers); + if (buildstate->residuals) { + VectorArrayFree(buildstate->residuals); + } + pfree(buildstate->listInfo); + +#ifdef IVFFLAT_KMEANS_DEBUG + pfree(buildstate->listSums); + pfree(buildstate->listCounts); +#endif + + MemoryContextDelete(buildstate->tmpCtx); +} + +/* + * Compute centers + */ +static void ComputeCenters(IvfflatBuildState *buildstate) +{ + int numSamples; + + /* Target 50 samples per list, with at least 10000 samples */ + /* The number of samples has a large effect on index build time */ + numSamples = buildstate->lists * 50; + if (numSamples < 10000) { + numSamples = 10000; + } + + /* Skip samples for unlogged table */ + if (buildstate->heap == NULL) { + numSamples = 1; + } + + /* Sample rows */ + /* TODO Ensure within maintenance_work_mem */ + buildstate->samples = VectorArrayInit(numSamples, buildstate->dimensions, buildstate->centers->itemsize); + if (buildstate->heap != NULL) { + SampleRows(buildstate); + if (buildstate->samples->length < buildstate->lists) { + ereport(NOTICE, (errmsg("ivfflat index created with little data"), errdetail("This will cause low recall."), + errhint("Drop the index until the table has more data."))); + } + } + + /* Calculate centers */ + IvfflatBench("k-means", + IvfflatKmeans(buildstate->index, buildstate->samples, buildstate->centers, buildstate->typeInfo)); + + /* Free samples before we allocate more memory */ + VectorArrayFree(buildstate->samples); +} + +/* + * Create the metapage + */ +static void CreateMetaPage(Relation index, IvfflatBuildState *buildstate, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatMetaPage metap; + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + /* Set metapage data */ + metap = IvfflatPageGetMeta(page); + metap->magicNumber = IVFFLAT_MAGIC_NUMBER; + metap->version = IVFFLAT_VERSION; + metap->dimensions = buildstate->dimensions; + metap->lists = buildstate->lists; + + /* set PQ info */ + metap->enablePQ = buildstate->enablePQ; + metap->pqM = buildstate->pqM; + metap->byResidual = buildstate->byResidual; + metap->pqKsub = buildstate->pqKsub; + metap->pqcodeSize = buildstate->pqcodeSize; + metap->pqPreComputeTableSize = 0; + metap->pqPreComputeTableNblk = 0; + + if (buildstate->enablePQ) { + metap->pqTableSize = (uint32)buildstate->pqTableSize; + metap->pqTableNblk = (uint16)( + (metap->pqTableSize + IVF_PQTABLE_STORAGE_SIZE - 1) / IVF_PQTABLE_STORAGE_SIZE); + if (buildstate->byResidual && + (buildstate->params->funcType == IVF_PQ_DIS_L2 || buildstate->params->funcType == IVF_PQ_DIS_COSINE)) { + uint64 TableLen = buildstate->lists * buildstate->pqM * buildstate->pqKsub; + metap->pqPreComputeTableSize = (uint64)TableLen * sizeof(float); + metap->pqPreComputeTableNblk = (uint32)( + (metap->pqPreComputeTableSize + IVF_PQTABLE_STORAGE_SIZE - 1) / IVF_PQTABLE_STORAGE_SIZE); + } + } else { + metap->pqTableSize = 0; + metap->pqTableNblk = 0; + } + + ((PageHeader)page)->pd_lower = ((char *)metap + sizeof(IvfflatMetaPageData)) - (char *)page; + + IvfflatCommitBuffer(buf, state); +} + +/* + * Create list pages + */ +static void CreateListPages(Relation index, VectorArray centers, int dimensions, int lists, ForkNumber forkNum, + ListInfo **listInfo) +{ + Buffer buf; + Page page; + GenericXLogState *state; + Size listSize; + IvfflatList list; + errno_t rc = EOK; + + listSize = MAXALIGN(IVFFLAT_LIST_SIZE(centers->itemsize)); + list = (IvfflatList)palloc0(listSize); + + buf = IvfflatNewBuffer(index, forkNum); + IvfflatInitRegisterPage(index, &buf, &page, &state); + + for (int i = 0; i < lists; i++) { + OffsetNumber offno; + + /* Zero memory for each list */ + MemSet(list, 0, listSize); + + /* Load list */ + list->startPage = InvalidBlockNumber; + list->insertPage = InvalidBlockNumber; + rc = memcpy_s(&list->center, VARSIZE_ANY(VectorArrayGet(centers, i)), VectorArrayGet(centers, i), VARSIZE_ANY(VectorArrayGet(centers, i))); + securec_check(rc, "\0", "\0"); + + /* Ensure free space */ + if (PageGetFreeSpace(page) < listSize) + IvfflatAppendPage(index, &buf, &page, &state, forkNum); + + /* Add the item */ + offno = PageAddItem(page, (Item)list, listSize, InvalidOffsetNumber, false, false); + if (offno == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + /* Save location info */ + (*listInfo)[i].blkno = BufferGetBlockNumber(buf); + (*listInfo)[i].offno = offno; + } + + IvfflatCommitBuffer(buf, state); + + pfree(list); +} + +#ifdef IVFFLAT_KMEANS_DEBUG +/* + * Print k-means metrics + */ +static void PrintKmeansMetrics(IvfflatBuildState *buildstate) +{ + elog(INFO, "inertia: %.3e", buildstate->inertia); + + /* Calculate Davies-Bouldin index */ + if (buildstate->lists > 1) { + double db = 0.0; + + /* Calculate average distance */ + for (int i = 0; i < buildstate->lists; i++) { + if (buildstate->listCounts[i] > 0) + buildstate->listSums[i] /= buildstate->listCounts[i]; + } + + for (int i = 0; i < buildstate->lists; i++) { + double max = 0.0; + double distance; + + for (int j = 0; j < buildstate->lists; j++) { + if (j == i) + continue; + + distance = DatumGetFloat8(FunctionCall2Coll(buildstate->procinfo, buildstate->collation, + PointerGetDatum(VectorArrayGet(buildstate->centers, i)), + PointerGetDatum(VectorArrayGet(buildstate->centers, j)))); + distance = (buildstate->listSums[i] + buildstate->listSums[j]) / distance; + + if (distance > max) + max = distance; + } + db += max; + } + db /= buildstate->lists; + elog(INFO, "davies-bouldin: %.3f", db); + } +} +#endif + +/* + * Within leader, wait for end of heap scan + */ +static double ParallelHeapScan(IvfflatBuildState *buildstate) +{ + IvfflatShared *ivfshared = buildstate->ivfleader->ivfshared; + double reltuples; + + BgworkerListWaitFinish(&buildstate->ivfleader->nparticipanttuplesorts); + pg_memory_barrier(); + + /* all done, update to the actual number of participants */ + if (ivfshared->sharedsort != NULL) { + ivfshared->sharedsort->actualParticipants = buildstate->ivfleader->nparticipanttuplesorts; + } + + buildstate->indtuples = ivfshared->indtuples; + reltuples = ivfshared->reltuples; + buildstate->rlist =list_copy(ivfshared->rlist); + list_free(ivfshared->rlist); +#ifdef IVFFLAT_KMEANS_DEBUG + buildstate->inertia = ivfshared->inertia; +#endif + + return reltuples; +} + +/* + * Perform a worker's portion of a parallel sort + */ +static void IvfflatParallelScanAndSort(IvfflatSpool *ivfspool, IvfflatShared *ivfshared, Vector *ivfcenters) +{ + SortCoordinate coordinate; + IvfflatBuildState buildstate; + TableScanDesc scan; + double reltuples; + IndexInfo *indexInfo; + errno_t rc = EOK; + + /* Sort options, which must match AssignTuples */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {INT4LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + /* Initialize local tuplesort coordination state */ + coordinate = (SortCoordinate)palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = true; + coordinate->nParticipants = -1; + coordinate->sharedsort = ivfshared->sharedsort; + + int sortmem = ivfshared->workmem / ivfshared->scantuplesortstates; + + /* Join parallel scan */ + indexInfo = BuildIndexInfo(ivfspool->index); + indexInfo->ii_Concurrent = false; + InitBuildState(&buildstate, ivfspool->heap, ivfspool->index, indexInfo); + Size centersSize = buildstate.centers->itemsize * buildstate.centers->maxlen; + rc = memcpy_s(buildstate.centers->items, centersSize, ivfcenters, centersSize); + securec_check(rc, "\0", "\0"); + buildstate.centers->length = buildstate.centers->maxlen; + ivfspool->sortstate = tuplesort_begin_heap(buildstate.tupdesc, 1, attNums, sortOperators, sortCollations, + nullsFirstFlags, sortmem, false, 0, 0, 1, coordinate); + buildstate.sortstate = ivfspool->sortstate; + + scan = tableam_scan_begin_parallel(ivfspool->heap, &ivfshared->heapdesc); + reltuples = tableam_index_build_scan(ivfspool->heap, ivfspool->index, indexInfo, true, BuildCallback, + (void *)&buildstate, scan); + + /* Execute this worker's part of the sort */ + tuplesort_performsort(ivfspool->sortstate); + + /* Record statistics */ + SpinLockAcquire(&ivfshared->mutex); + + MemoryContext oldCtx = MemoryContextSwitchTo(ivfshared->tmpCtx); + ListCell *lc; + foreach (lc, buildstate.rlist) { + Vector *vec = InitVector(buildstate.dimensions); + int size = VECTOR_SIZE(buildstate.dimensions); + error_t rc = memcpy_s(vec, size, lc->data.ptr_value, size); + securec_check_c(rc, "\0", "\0"); + ivfshared->rlist = lappend(ivfshared->rlist, vec); + } + MemoryContextSwitchTo(oldCtx); + list_free_deep(buildstate.rlist); + + ivfshared->nparticipantsdone++; + ivfshared->reltuples += reltuples; + ivfshared->indtuples += buildstate.indtuples; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia += buildstate.inertia; +#endif + SpinLockRelease(&ivfshared->mutex); + + /* We can end tuplesorts immediately */ + tuplesort_end(ivfspool->sortstate); + + FreeBuildState(&buildstate); +} + +/* + * Perform work within a launched parallel process + */ +void IvfflatParallelBuildMain(const BgWorkerContext *bwc) +{ + IvfflatSpool *ivfspool; + IvfflatShared *ivfshared; + Relation heapRel; + Relation indexRel; + + ivfshared = (IvfflatShared *)bwc->bgshared; + + /* Open relations within worker */ + heapRel = heap_open(ivfshared->heaprelid, NoLock); + indexRel = index_open(ivfshared->indexrelid, NoLock); + + /* Initialize worker's own spool */ + ivfspool = (IvfflatSpool *)palloc0(sizeof(IvfflatSpool)); + ivfspool->heap = heapRel; + ivfspool->index = indexRel; + + IvfflatParallelScanAndSort(ivfspool, ivfshared, ivfshared->ivfcenters); + + /* Close relations within worker */ + index_close(indexRel, NoLock); + heap_close(heapRel, NoLock); +} + +/* + * End parallel build + */ +static void IvfflatParallelCleanup(const BgWorkerContext *bwc) +{ + IvfflatShared *ivfshared = (IvfflatShared *)bwc->bgshared; + + /* delete shared fileset */ + Assert(ivfshared->sharedsort); + SharedFileSetDeleteAll(&ivfshared->sharedsort->fileset); + pfree_ext(ivfshared->sharedsort); + + pfree_ext(ivfshared->ivfcenters); + MemoryContextDelete(ivfshared->tmpCtx); +} + +static IvfflatShared *IvfflatParallelInitshared(IvfflatBuildState *buildstate, int workmem, int scantuplesortstates) +{ + IvfflatShared *ivfshared; + Sharedsort *sharedsort; + Size estsort; + Size estcenters; + char *ivfcenters; + + /* Store shared build state, for which we reserved space */ + ivfshared = (IvfflatShared *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), + sizeof(IvfflatShared)); + + /* Initialize immutable state */ + ivfshared->heaprelid = RelationGetRelid(buildstate->heap); + ivfshared->indexrelid = RelationGetRelid(buildstate->index); + ivfshared->scantuplesortstates = scantuplesortstates; + SpinLockInit(&ivfshared->mutex); + + /* Initialize mutable state */ + ivfshared->nparticipantsdone = 0; + ivfshared->reltuples = 0; + ivfshared->indtuples = 0; + ivfshared->workmem = workmem; +#ifdef IVFFLAT_KMEANS_DEBUG + ivfshared->inertia = 0; +#endif + HeapParallelscanInitialize(&ivfshared->heapdesc, buildstate->heap); + + /* Store shared tuplesort-private state, for which we reserved space */ + estsort = tuplesort_estimate_shared(scantuplesortstates); + sharedsort = (Sharedsort *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), estsort); + tuplesort_initialize_shared(sharedsort, scantuplesortstates); + ivfshared->sharedsort = sharedsort; + + estcenters = buildstate->centers->itemsize * buildstate->lists; + ivfcenters = (char *)MemoryContextAllocZero(INSTANCE_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), estcenters); + errno_t rc = memcpy_s(ivfcenters, estcenters, buildstate->centers->items, estcenters); + securec_check(rc, "\0", "\0"); + ivfshared->ivfcenters = (Vector *)ivfcenters; + + ivfshared->tmpCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat build temporary context", ALLOCSET_DEFAULT_SIZES); + return ivfshared; +} + +/* + * Shut down workers, destory parallel context, and end parallel mode. + */ +void IvfflatEndParallel(IvfflatLeader *ivfleader) +{ + BgworkerListSyncQuit(); + pfree_ext(ivfleader); +} + +/* + * Begin parallel build + */ +static void IvfflatBeginParallel(IvfflatBuildState *buildstate, int request, int workmem) +{ + IvfflatShared *ivfshared; + IvfflatLeader *ivfleader = (IvfflatLeader *)palloc0(sizeof(IvfflatLeader)); + + Assert(request > 0); + ivfshared = IvfflatParallelInitshared(buildstate, workmem, request); + + /* Launch workers, saving status for leader/caller */ + ivfleader->nparticipanttuplesorts = + LaunchBackgroundWorkers(request, ivfshared, IvfflatParallelBuildMain, IvfflatParallelCleanup); + + /* If no workers were successfully launched, back out (do serial build) */ + if (ivfleader->nparticipanttuplesorts == 0) { + IvfflatEndParallel(ivfleader); + return; + } + + /* Log participants */ + ereport(DEBUG1, (errmsg("using %d parallel workers", ivfleader->nparticipanttuplesorts))); + + ivfleader->ivfshared = ivfshared; + /* Save leader state now that it's clear build will be parallel */ + buildstate->ivfleader = ivfleader; +} + +static double AssignTupleUtility(IvfflatBuildState *buildstate) +{ + Relation heap = buildstate->heap; + Relation index = buildstate->index; + IndexInfo *indexInfo = buildstate->indexInfo; + double reltuples = 0; + + /* Fill spool using either serial or parallel heap scan */ + if (!buildstate->ivfleader) { + serial_build: + reltuples = tableam_index_build_scan(heap, index, indexInfo, true, BuildCallback, (void *)buildstate, NULL); + } else { + reltuples = ParallelHeapScan(buildstate); + IvfflatShared *ivfshared = buildstate->ivfleader->ivfshared; + int nruns = ivfshared->sharedsort->actualParticipants; + if (nruns == 0) { + /* failed to startup any bgworker, retry to do serial build */ + goto serial_build; + } + } + return reltuples; +} + +/* + * Scan table for tuples to index + */ +static void AssignTuples(IvfflatBuildState *buildstate) +{ + SortCoordinate coordinate = NULL; + int parallel_workers = 0; + IndexInfo *indexInfo = buildstate->indexInfo; + UtilityDesc *desc = &indexInfo->ii_desc; + int workmem; + + /* Sort options, which must match IvfflatParallelScanAndSort */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {INT4LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + + workmem = (desc->query_mem[0] > 0) ? (desc->query_mem[0] - SIMPLE_THRESHOLD) + : u_sess->attr.attr_memory.maintenance_work_mem; + + /* Calculate parallel workers */ + if (buildstate->heap != NULL) + parallel_workers = PlanCreateIndexWorkers(buildstate->heap, indexInfo); + + /* Attempt to launch parallel worker scan when required */ + if (parallel_workers > 0) { + Assert(!indexInfo->ii_Concurrent); + IvfflatBeginParallel(buildstate, parallel_workers, workmem); + } + + /* Set up coordination state if at least one worker launched */ + if (buildstate->ivfleader) { + coordinate = (SortCoordinate)palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = false; + coordinate->nParticipants = buildstate->ivfleader->nparticipanttuplesorts; + coordinate->sharedsort = buildstate->ivfleader->ivfshared->sharedsort; + } + + /* Begin serial/leader tuplesort */ + buildstate->sortstate = + tuplesort_begin_heap(buildstate->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, + u_sess->attr.attr_memory.maintenance_work_mem, false, 0, 0, 1, coordinate); + + /* Add tuples to sort */ + if (buildstate->heap != NULL) { + buildstate->reltuples = AssignTupleUtility(buildstate); + +#ifdef IVFFLAT_KMEANS_DEBUG + PrintKmeansMetrics(buildstate); +#endif + } +} + +/* + * Create entry pages + */ +static void CreateEntryPages(IvfflatBuildState *buildstate, ForkNumber forkNum) +{ + /* Assign */ + IvfflatBench("assign tuples", AssignTuples(buildstate)); + + /* Sort */ + IvfflatBench("sort tuples", tuplesort_performsort(buildstate->sortstate)); + /* Build PQTable by residusal */ + if (buildstate->enablePQ) { + CopyResidaulFromList(buildstate); + ComputeIvfPQ(buildstate); + if (buildstate->byResidual && + (buildstate->params->funcType == IVF_PQ_DIS_L2 || buildstate->params->funcType == IVF_PQ_DIS_COSINE)) + ComputePreTable(buildstate); + } + + /* Load */ + IvfflatBench("load tuples", InsertTuples(buildstate->index, buildstate, forkNum)); + + /* End sort */ + tuplesort_end(buildstate->sortstate); + + /* End parallel build */ + if (buildstate->ivfleader) { + IvfflatEndParallel(buildstate->ivfleader); + } +} + +/* + * Build the index + */ +static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, IvfflatBuildState *buildstate, + ForkNumber forkNum) +{ + InitBuildState(buildstate, heap, index, indexInfo); + + ComputeCenters(buildstate); + + /* Create pages */ + CreateMetaPage(index, buildstate, forkNum); + + if (buildstate->enablePQ) { + CreatePQPages(buildstate, forkNum); + } + + CreateListPages(index, buildstate->centers, buildstate->dimensions, buildstate->lists, forkNum, + &buildstate->listInfo); + CreateEntryPages(buildstate, forkNum); + + if (buildstate->enablePQ) { + IvfFlushPQInfo(buildstate); + } + + /* Write WAL for initialization fork since GenericXLog functions do not */ + if (forkNum == INIT_FORKNUM) + LogNewpageRange(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true); + + FreeBuildState(buildstate); +} + +/* + * Build the index for a logged table + */ +IndexBuildResult *ivfflatbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo) +{ + IndexBuildResult *result; + IvfflatBuildState buildstate; + + BuildIndex(heap, index, indexInfo, &buildstate, MAIN_FORKNUM); + + result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult)); + result->heap_tuples = buildstate.reltuples; + result->index_tuples = buildstate.indtuples; + + return result; +} + +/* + * Build the index for an unlogged table + */ +void ivfflatbuildempty_internal(Relation index) +{ + IndexInfo *indexInfo = BuildIndexInfo(index); + IvfflatBuildState buildstate; + + BuildIndex(NULL, index, indexInfo, &buildstate, INIT_FORKNUM); +} diff --git a/src/gausskernel/storage/access/datavec/ivfflat.cpp b/src/gausskernel/storage/access/datavec/ivfflat.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b8076446815620525ccf15d4f00892c36d61b3b1 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfflat.cpp @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfflat.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfflat.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/amapi.h" +#include "access/reloptions.h" +#include "commands/vacuum.h" +#include "access/datavec/ivfflat.h" +#include "utils/guc.h" +#include "utils/selfuncs.h" +#include "utils/spccache.h" + +/* + * Estimate the cost of an index scan + */ +static void ivfflatcostestimate_internal(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation) +{ + GenericCosts costs; + int lists; + double ratio; + double spcSeqPageCost; + Relation index; + double half = 0.5; + + /* Never use index without order */ + if (path->indexorderbys == NULL) { + *indexStartupCost = DBL_MAX; + *indexTotalCost = DBL_MAX; + *indexSelectivity = 0; + *indexCorrelation = 0; + return; + } + + MemSet(&costs, 0, sizeof(costs)); + + index = index_open(path->indexinfo->indexoid, NoLock); + IvfflatGetMetaPageInfo(index, &lists, NULL); + index_close(index, NoLock); + + /* Get the ratio of lists that we need to visit */ + ratio = (static_cast(u_sess->datavec_ctx.ivfflat_probes)) / lists; + if (ratio > 1.0) { + ratio = 1.0; + } + + /* + * This gives us the subset of tuples to visit. This value is passed into + * the generic cost estimator to determine the number of pages to visit + * during the index scan. + */ + costs.numIndexTuples = path->indexinfo->tuples * ratio; + + genericcostestimate(root, path, loop_count, costs.numIndexTuples, &costs.indexStartupCost, &costs.indexTotalCost, + &costs.indexSelectivity, &costs.indexCorrelation); + + get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spcSeqPageCost); + + /* Adjust cost if needed since TOAST not included in seq scan cost */ + if (costs.numIndexPages > path->indexinfo->rel->pages && ratio < half) { + /* Change all page cost from random to sequential */ + costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spcSeqPageCost); + + /* Remove cost of extra pages */ + costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spcSeqPageCost; + } else { + /* Change some page cost from random to sequential */ + costs.indexTotalCost -= half * costs.numIndexPages * (costs.spc_random_page_cost - spcSeqPageCost); + } + + /* + * If the list selectivity is lower than what is returned from the generic + * cost estimator, use that. + */ + if (ratio < costs.indexSelectivity) { + costs.indexSelectivity = ratio; + } + + /* Use total cost since most work happens before first tuple is returned */ + *indexStartupCost = costs.indexTotalCost; + *indexTotalCost = costs.indexTotalCost; + *indexSelectivity = costs.indexSelectivity; + *indexCorrelation = costs.indexCorrelation; +} + +/* + * Parse and validate the reloptions + */ +static bytea *ivfflatoptions_internal(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)}, + {"enable_pq", RELOPT_TYPE_BOOL, offsetof(IvfflatOptions, enablePQ)}, + {"pq_m", RELOPT_TYPE_INT, offsetof(IvfflatOptions, pqM)}, + {"pq_ksub", RELOPT_TYPE_INT, offsetof(IvfflatOptions, pqKsub)}, + {"by_residual", RELOPT_TYPE_BOOL, offsetof(IvfflatOptions, byResidual)}, + {"parallel_workers", RELOPT_TYPE_INT, offsetof(StdRdOptions, parallel_workers)}}; + + relopt_value *options; + int numoptions; + IvfflatOptions *rdopts; + + options = parseRelOptions(reloptions, validate, RELOPT_KIND_IVFFLAT, &numoptions); + rdopts = (IvfflatOptions *)allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions); + fillRelOptions((void *)rdopts, sizeof(IvfflatOptions), options, numoptions, validate, tab, lengthof(tab)); + + return (bytea *)rdopts; +} + +/* + * Validate catalog entries for the specified operator class + */ +static bool ivfflatvalidate_internal(Oid opclassoid) +{ + return true; +} + +/* + * Define index handler + * + * See https://www.postgresql.org/docs/current/index-api.html + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflathandler); +Datum ivfflathandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = 5; +#if PG_VERSION_NUM >= 130000 + amroutine->amoptsprocnum = 0; +#endif + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; /* can change direction mid-scan */ + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = false; +#if PG_VERSION_NUM >= 130000 + amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ + amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; +#endif + amroutine->amkeytype = InvalidOid; + + /* Interface functions */ + errno_t rc; + rc = strcpy_s(amroutine->ambuildfuncname, NAMEDATALEN, "ivfflatbuild"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambuildemptyfuncname, NAMEDATALEN, "ivfflatbuildempty"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->aminsertfuncname, NAMEDATALEN, "ivfflatinsert"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambulkdeletefuncname, NAMEDATALEN, "ivfflatbulkdelete"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvacuumcleanupfuncname, NAMEDATALEN, "ivfflatvacuumcleanup"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amcostestimatefuncname, NAMEDATALEN, "ivfflatcostestimate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amoptionsfuncname, NAMEDATALEN, "ivfflatoptions"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amvalidatefuncname, NAMEDATALEN, "ivfflatvalidate"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->ambeginscanfuncname, NAMEDATALEN, "ivfflatbeginscan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amrescanfuncname, NAMEDATALEN, "ivfflatrescan"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amgettuplefuncname, NAMEDATALEN, "ivfflatgettuple"); + securec_check(rc, "\0", "\0"); + rc = strcpy_s(amroutine->amendscanfuncname, NAMEDATALEN, "ivfflatendscan"); + securec_check(rc, "\0", "\0"); + + PG_RETURN_POINTER(amroutine); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbuild); +Datum ivfflatbuild(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "ivfflat index do not support extreme rto."); + } + Relation heap = (Relation)PG_GETARG_POINTER(0); + Relation index = (Relation)PG_GETARG_POINTER(1); + IndexInfo *indexinfo = (IndexInfo *)PG_GETARG_POINTER(2); + IndexBuildResult *result = ivfflatbuild_internal(heap, index, indexinfo); + + PG_RETURN_POINTER(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbuildempty); +Datum ivfflatbuildempty(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "ivfflat index do not support extreme rto."); + } + Relation index = (Relation)PG_GETARG_POINTER(0); + ivfflatbuildempty_internal(index); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatinsert); +Datum ivfflatinsert(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "ivfflat index do not support extreme rto."); + } + Relation rel = (Relation)PG_GETARG_POINTER(0); + Datum *values = (Datum *)PG_GETARG_POINTER(1); + bool *isnull = reinterpret_cast(PG_GETARG_POINTER(2)); + ItemPointer ht_ctid = (ItemPointer)PG_GETARG_POINTER(3); + Relation heaprel = (Relation)PG_GETARG_POINTER(4); + IndexUniqueCheck checkunique = (IndexUniqueCheck)PG_GETARG_INT32(5); + bool result = ivfflatinsert_internal(rel, values, isnull, ht_ctid, heaprel, checkunique); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbulkdelete); +Datum ivfflatbulkdelete(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "ivfflat index do not support extreme rto."); + } + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback)PG_GETARG_POINTER(2); + void *callbackState = static_cast(PG_GETARG_POINTER(3)); + stats = ivfflatbulkdelete_internal(info, stats, callback, callbackState); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatvacuumcleanup); +Datum ivfflatvacuumcleanup(PG_FUNCTION_ARGS) +{ + if (IsExtremeRedo()) { + elog(ERROR, "ivfflat index do not support extreme rto."); + } + IndexVacuumInfo *info = (IndexVacuumInfo *)PG_GETARG_POINTER(0); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *)PG_GETARG_POINTER(1); + stats = ivfflatvacuumcleanup_internal(info, stats); + + PG_RETURN_POINTER(stats); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatcostestimate); +Datum ivfflatcostestimate(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0); + IndexPath *path = (IndexPath *)PG_GETARG_POINTER(1); + double loopcount = static_cast(PG_GETARG_FLOAT8(2)); + Cost *startupcost = (Cost *)PG_GETARG_POINTER(3); + Cost *totalcost = (Cost *)PG_GETARG_POINTER(4); + Selectivity *selectivity = (Selectivity *)PG_GETARG_POINTER(5); + double *correlation = reinterpret_cast(PG_GETARG_POINTER(6)); + ivfflatcostestimate_internal(root, path, loopcount, startupcost, totalcost, selectivity, correlation); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatoptions); +Datum ivfflatoptions(PG_FUNCTION_ARGS) +{ + Datum reloptions = PG_GETARG_DATUM(0); + bool validate = PG_GETARG_BOOL(1); + bytea *result = ivfflatoptions_internal(reloptions, validate); + + if (NULL != result) + PG_RETURN_BYTEA_P(result); + + PG_RETURN_NULL(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatvalidate); +Datum ivfflatvalidate(PG_FUNCTION_ARGS) +{ + Oid opclassoid = PG_GETARG_OID(0); + bool result = ivfflatvalidate_internal(opclassoid); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatbeginscan); +Datum ivfflatbeginscan(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation)PG_GETARG_POINTER(0); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); + IndexScanDesc scan = ivfflatbeginscan_internal(rel, nkeys, norderbys); + + PG_RETURN_POINTER(scan); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatrescan); +Datum ivfflatrescan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanKey scankey = (ScanKey)PG_GETARG_POINTER(1); + int nkeys = PG_GETARG_INT32(2); + ScanKey orderbys = (ScanKey)PG_GETARG_POINTER(3); + int norderbys = PG_GETARG_INT32(4); + ivfflatrescan_internal(scan, scankey, nkeys, orderbys, norderbys); + + PG_RETURN_VOID(); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatgettuple); +Datum ivfflatgettuple(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ScanDirection direction = (ScanDirection)PG_GETARG_INT32(1); + + if (NULL == scan) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid arguments for function ivfflatgettuple"))); + + bool result = ivfflatgettuple_internal(scan, direction); + + PG_RETURN_BOOL(result); +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflatendscan); +Datum ivfflatendscan(PG_FUNCTION_ARGS) +{ + IndexScanDesc scan = (IndexScanDesc)PG_GETARG_POINTER(0); + ivfflatendscan_internal(scan); + + PG_RETURN_VOID(); +} diff --git a/src/gausskernel/storage/access/datavec/ivfinsert.cpp b/src/gausskernel/storage/access/datavec/ivfinsert.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e8816603389e0921ec9072f84769fbcae4eeb88 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfinsert.cpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfinsert.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfinsert.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/generic_xlog.h" +#include "access/datavec/ivfflat.h" +#include "storage/buf/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/memutils.h" + +/* + * Find the list that minimizes the distance function + */ +static void FindInsertPage(Relation index, Datum *values, BlockNumber *insertPage, ListInfo *listInfo) +{ + double minDistance = DBL_MAX; + uint16 pqTableNblk; + uint32 pqDisTableNblk; + IvfGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &pqDisTableNblk, NULL); + BlockNumber nextblkno = IVFPQTABLE_START_BLKNO + pqTableNblk + pqDisTableNblk; + FmgrInfo *procinfo; + Oid collation; + + /* Avoid compiler warning */ + listInfo->blkno = nextblkno; + listInfo->offno = FirstOffsetNumber; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(index, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IvfflatList list; + double distance; + + list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, offno)); + distance = + DatumGetFloat8(FunctionCall2Coll(procinfo, collation, values[0], PointerGetDatum(&list->center))); + if (distance < minDistance || !BlockNumberIsValid(*insertPage)) { + *insertPage = list->insertPage; + listInfo->blkno = nextblkno; + listInfo->offno = offno; + minDistance = distance; + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +static void InitPQParamsOnDisk(Relation index, PQParams *params, int dim, bool *enablePQ, + bool *byResidual, uint16 *pqcodeSize) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + const IvfflatTypeInfo *typeInfo = IvfflatGetTypeInfo(index); + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + if (unlikely(metap->magicNumber != IVFFLAT_MAGIC_NUMBER)) { + UnlockReleaseBuffer(buf); + elog(ERROR, "ivfflat index is not valid"); + } + + *enablePQ = metap->enablePQ; + params->pqM = metap->pqM; + params->pqKsub = metap->pqKsub; + *byResidual = metap->byResidual; + *pqcodeSize = metap->pqcodeSize; + UnlockReleaseBuffer(buf); + + if (*enablePQ) { + FmgrInfo *procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + params->funcType = getIVFPQfunctionType(procinfo, normprocinfo); + params->dim = dim; + params->subItemSize = typeInfo->itemSize(dim / params->pqM); + + /* Now save pqTable in the relcache entry. */ + if (index->pqTable == NULL) { + MemoryContext oldcxt = MemoryContextSwitchTo(index->rd_indexcxt); + index->pqTable = IVFPQLoadPQtable(index); + (void)MemoryContextSwitchTo(oldcxt); + } + params->pqTable = index->pqTable; + } else { + params->pqTable = NULL; + } +} + +/* + * Insert a tuple into the index + */ +static void InsertTuple(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heapRel) +{ + const IvfflatTypeInfo *typeInfo = IvfflatGetTypeInfo(index); + IndexTuple itup; + Datum value; + FmgrInfo *normprocinfo; + Buffer buf; + Page page; + GenericXLogState *state; + Size itemsz; + BlockNumber insertPage = InvalidBlockNumber; + ListInfo listInfo; + BlockNumber originalInsertPage; + PQParams params; + bool enablePQ; + bool byResidual; + uint16 pqcodeSize; + int dim = TupleDescAttr(index->rd_att, 0)->atttypmod; + + /* Detoast once for all calls */ + value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Normalize if needed */ + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + if (normprocinfo != NULL) { + Oid collation = index->rd_indcollation[0]; + + if (!IvfflatCheckNorm(normprocinfo, collation, value)) { + return; + } + + value = IvfflatNormValue(typeInfo, collation, value); + } + + /* Ensure index is valid */ + IvfflatGetMetaPageInfo(index, NULL, NULL); + + InitPQParamsOnDisk(index, ¶ms, dim, &enablePQ, &byResidual, &pqcodeSize); + + /* Find the insert page - sets the page and list info */ + FindInsertPage(index, values, &insertPage, &listInfo); + Assert(BlockNumberIsValid(insertPage)); + originalInsertPage = insertPage; + + /* Form tuple */ + itup = index_form_tuple(RelationGetDescr(index), &value, isnull); + itup->t_tid = *heap_tid; + + /* Get tuple size */ + itemsz = MAXALIGN(IndexTupleSize(itup)); + Assert(itemsz <= + BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(IvfflatPageOpaqueData)) - sizeof(ItemIdData)); + + /* Find a page to insert the item */ + for (;;) { + buf = ReadBuffer(index, insertPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + if (PageGetFreeSpace(page) >= itemsz + MAXALIGN(pqcodeSize)) { + break; + } + + insertPage = IvfflatPageGetOpaque(page)->nextblkno; + if (BlockNumberIsValid(insertPage)) { + /* Move to next page */ + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } else { + Buffer newbuf; + Page newpage; + + /* Add a new page */ + LockRelationForExtension(index, ExclusiveLock); + newbuf = IvfflatNewBuffer(index, MAIN_FORKNUM); + UnlockRelationForExtension(index, ExclusiveLock); + + /* Init new page */ + newpage = GenericXLogRegisterBuffer(state, newbuf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(newbuf, newpage); + + /* Update insert page */ + insertPage = BufferGetBlockNumber(newbuf); + + /* Update previous buffer */ + IvfflatPageGetOpaque(page)->nextblkno = insertPage; + + /* Commit */ + GenericXLogFinish(state); + + /* Unlock previous buffer */ + UnlockReleaseBuffer(buf); + + /* Prepare new buffer */ + state = GenericXLogStart(index); + buf = newbuf; + page = GenericXLogRegisterBuffer(state, buf, 0); + break; + } + } + + if (enablePQ) { + uint8 *pqcode = (uint8 *)palloc(pqcodeSize); + float *vec = ((Vector *)value)->x; + if (byResidual) { + float *resVec = (float *)palloc(dim * sizeof(float)); + Buffer cbuf = ReadBuffer(index, listInfo.blkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + Page cpage = BufferGetPage(cbuf); + IvfflatList list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, listInfo.offno)); + + for (int i = 0; i < dim; i++) { + resVec[i] = vec[i] - list->center.x[i]; + } + vec = resVec; + UnlockReleaseBuffer(cbuf); + } + IvfComputeVectorPQCode(vec, ¶ms, pqcode); + ((PageHeader)page)->pd_upper -= MAXALIGN(pqcodeSize); + errno_t rc = memcpy_s( + ((char *)page) + ((PageHeader)page)->pd_upper, pqcodeSize, (char *)pqcode, pqcodeSize); + securec_check_c(rc, "\0", "\0"); + } + + /* Add to next offset */ + if (PageAddItem(page, (Item)itup, itemsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber) + elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index)); + + IvfflatCommitBuffer(buf, state); + + /* Update the insert page */ + if (insertPage != originalInsertPage) + IvfflatUpdateList(index, listInfo, insertPage, originalInsertPage, InvalidBlockNumber, MAIN_FORKNUM); +} + +/* + * Insert a tuple into the index + */ +bool ivfflatinsert_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique) +{ + MemoryContext oldCtx; + MemoryContext insertCtx; + + /* Skip nulls */ + if (isnull[0]) { + return false; + } + + /* + * Use memory context since detoast, IvfflatNormValue, and + * index_form_tuple can allocate + */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Ivfflat insert temporary context", ALLOCSET_DEFAULT_SIZES); + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Insert tuple */ + InsertTuple(index, values, isnull, heap_tid, heap); + + /* Delete memory context */ + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; +} diff --git a/src/gausskernel/storage/access/datavec/ivfkmeans.cpp b/src/gausskernel/storage/access/datavec/ivfkmeans.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c07f7c5122bd5f1dd10a173830f0caea39533ebb --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfkmeans.cpp @@ -0,0 +1,576 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfkmeans.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfkmeans.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "access/datavec/bitvec.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/memutils.h" +#include "access/datavec/vector.h" + +/* + * Initialize with kmeans++ + * + * https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf + */ +static void InitCenters(Relation index, VectorArray samples, VectorArray centers, float *lowerBound) +{ + FmgrInfo *procinfo; + Oid collation; + int64 j; + float *weight = (float *)palloc(samples->length * sizeof(float)); + int numCenters = centers->maxlen; + int numSamples = samples->length; + + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + collation = index->rd_indcollation[0]; + + /* Choose an initial center uniformly at random */ + VectorArraySet(centers, 0, VectorArrayGet(samples, RandomInt() % samples->length)); + centers->length++; + + for (j = 0; j < numSamples; j++) + weight[j] = FLT_MAX; + + for (int i = 0; i < numCenters; i++) { + double sum; + double choice; + + CHECK_FOR_INTERRUPTS(); + + sum = 0.0; + + for (j = 0; j < numSamples; j++) { + Datum vec = PointerGetDatum(VectorArrayGet(samples, j)); + double distance; + + /* Only need to compute distance for new center */ + /* TODO Use triangle inequality to reduce distance calculations */ + distance = DatumGetFloat8( + FunctionCall2Coll(procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, i)))); + + /* Set lower bound */ + lowerBound[j * numCenters + i] = distance; + + /* Use distance squared for weighted probability distribution */ + distance *= distance; + + if (distance < weight[j]) + weight[j] = distance; + + sum += weight[j]; + } + + /* Only compute lower bound on last iteration */ + if (i + 1 == numCenters) { + break; + } + + /* Choose new center using weighted probability distribution. */ + choice = sum * RandomDouble(); + for (j = 0; j < numSamples - 1; j++) { + choice -= weight[j]; + if (choice <= 0) + break; + } + + VectorArraySet(centers, i + 1, VectorArrayGet(samples, j)); + centers->length++; + } + + pfree(weight); +} + +/* + * Norm centers + */ +static void NormCenters(const IvfflatTypeInfo *typeInfo, Oid collation, VectorArray centers) +{ + MemoryContext normCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat norm temporary context", ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(normCtx); + errno_t rc = EOK; + + for (int j = 0; j < centers->length; j++) { + Datum center = PointerGetDatum(VectorArrayGet(centers, j)); + Datum newCenter = IvfflatNormValue(typeInfo, collation, center); + Size size = VARSIZE_ANY(DatumGetPointer(newCenter)); + if (size > centers->itemsize) + elog(ERROR, "safety check failed"); + + rc = memcpy_s(DatumGetPointer(center), size, DatumGetPointer(newCenter), size); + securec_check(rc, "\0", "\0"); + MemoryContextReset(normCtx); + } + + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(normCtx); +} + +/* + * Quick approach if we have no data + */ +static void RandomCenters(Relation index, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + int dimensions = centers->dim; + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + Oid collation = index->rd_indcollation[0]; + float *x = static_cast(palloc(sizeof(float) * dimensions)); + + /* Fill with random data */ + while (centers->length < centers->maxlen) { + Pointer center = VectorArrayGet(centers, centers->length); + + for (int i = 0; i < dimensions; i++) { + x[i] = static_cast(RandomDouble()); + } + + typeInfo->updateCenter(center, dimensions, x); + + centers->length++; + } + + if (normprocinfo != NULL) + NormCenters(typeInfo, collation, centers); +} + +#ifdef IVFFLAT_MEMORY +/* + * Show memory usage + */ +static void ShowMemoryUsage(MemoryContext context, Size estimatedSize) +{ + MemoryContextStats(context); + elog(INFO, "estimated memory: %zu MB", estimatedSize / (1024 * 1024)); +} +#endif + +/* + * Sum centers + */ +static void SumCenters(VectorArray samples, float *agg, int *closestCenters, const IvfflatTypeInfo *typeInfo) +{ + for (int j = 0; j < samples->length; j++) { + float *x = agg + ((int64)closestCenters[j] * samples->dim); + + typeInfo->sumCenter(VectorArrayGet(samples, j), x); + } +} + +/* + * Update centers + */ +static void UpdateCenters(float *agg, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + for (int j = 0; j < centers->length; j++) { + float *x = agg + ((int64)j * centers->dim); + + typeInfo->updateCenter(VectorArrayGet(centers, j), centers->dim, x); + } +} + +/* + * Compute new centers + */ +static void ComputeNewCenters(VectorArray samples, float *agg, VectorArray newCenters, int *centerCounts, + int *closestCenters, FmgrInfo *normprocinfo, Oid collation, + const IvfflatTypeInfo *typeInfo) +{ + int dimensions = newCenters->dim; + int numCenters = newCenters->length; + int numSamples = samples->length; + + /* Reset sum and count */ + for (int j = 0; j < numCenters; j++) { + float *x = agg + ((int64)j * dimensions); + + for (int k = 0; k < dimensions; k++) { + x[k] = 0.0; + } + + centerCounts[j] = 0; + } + + /* Increment sum of closest center */ + SumCenters(samples, agg, closestCenters, typeInfo); + + /* Increment count of closest center */ + for (int j = 0; j < numSamples; j++) { + centerCounts[closestCenters[j]] += 1; + } + + /* Divide sum by count */ + for (int j = 0; j < numCenters; j++) { + float *x = agg + ((int64)j * dimensions); + + if (centerCounts[j] > 0) { + /* Double avoids overflow, but requires more memory */ + /* TODO Update bounds */ + for (int k = 0; k < dimensions; k++) { + if (isinf(x[k])) { + x[k] = x[k] > 0 ? FLT_MAX : -FLT_MAX; + } + } + + for (int k = 0; k < dimensions; k++) { + x[k] /= centerCounts[j]; + } + } else { + /* TODO Handle empty centers properly */ + for (int k = 0; k < dimensions; k++) { + x[k] = RandomDouble(); + } + } + } + + /* Set new centers */ + UpdateCenters(agg, newCenters, typeInfo); + + /* Normalize if needed */ + if (normprocinfo != NULL) + NormCenters(typeInfo, collation, newCenters); +} + +/* + * Use Elkan for performance. This requires distance function to satisfy triangle inequality. + * + * We use L2 distance for L2 (not L2 squared like index scan) + * and angular distance for inner product and cosine distance + * + * https://www.aaai.org/Papers/ICML/2003/ICML03-022.pdf + */ +static void ElkanKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + int dimensions = centers->dim; + int numCenters = centers->maxlen; + int numSamples = samples->length; + VectorArray newCenters; + float *agg; + int *centerCounts; + int *closestCenters; + float *lowerBound; + float *upperBound; + float *s; + float *halfcdist; + float *newcdist; + + /* Calculate allocation sizes */ + Size samplesSize = VECTOR_ARRAY_SIZE(samples->maxlen, samples->itemsize); + Size centersSize = VECTOR_ARRAY_SIZE(centers->maxlen, centers->itemsize); + Size newCentersSize = VECTOR_ARRAY_SIZE(numCenters, centers->itemsize); + Size aggSize = sizeof(float) * (int64)numCenters * dimensions; + Size centerCountsSize = sizeof(int) * numCenters; + Size closestCentersSize = sizeof(int) * numSamples; + Size lowerBoundSize = sizeof(float) * numSamples * numCenters; + Size upperBoundSize = sizeof(float) * numSamples; + Size sSize = sizeof(float) * numCenters; + Size halfcdistSize = sizeof(float) * numCenters * numCenters; + Size newcdistSize = sizeof(float) * numCenters; + + /* Calculate total size */ + Size totalSize = samplesSize + centersSize + newCentersSize + aggSize + centerCountsSize + closestCentersSize + + lowerBoundSize + upperBoundSize + sSize + halfcdistSize + newcdistSize; + + /* Check memory requirements */ + /* Add one to error message to ceil */ + if (totalSize > (Size)u_sess->attr.attr_memory.maintenance_work_mem * 1024L) + ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("memory required is %zu MB, maintenance_work_mem is %d MB", + totalSize / (1024 * 1024) + 1, u_sess->attr.attr_memory.maintenance_work_mem / 1024))); + + /* Ensure indexing does not overflow */ + if (numCenters * numCenters > INT_MAX) + elog(ERROR, "Indexing overflow detected. Please report a bug."); + + /* Set support functions */ + procinfo = index_getprocinfo(index, 1, IVFFLAT_KMEANS_DISTANCE_PROC); + normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_KMEANS_NORM_PROC); + collation = index->rd_indcollation[0]; + + /* Allocate space */ + /* Use float instead of double to save memory */ + agg = (float *)palloc(aggSize); + centerCounts = (int *)palloc(centerCountsSize); + closestCenters = (int *)palloc(closestCentersSize); + lowerBound = (float *)MemoryContextAllocExtended(CurrentMemoryContext, lowerBoundSize, MCXT_ALLOC_HUGE); + upperBound = (float *)palloc(upperBoundSize); + s = (float *)palloc(sSize); + halfcdist = (float *)palloc_extended(halfcdistSize, MCXT_ALLOC_HUGE); + newcdist = (float *)palloc(newcdistSize); + + /* Initialize new centers */ + newCenters = VectorArrayInit(numCenters, dimensions, centers->itemsize); + newCenters->length = numCenters; + +#ifdef IVFFLAT_MEMORY + ShowMemoryUsage(MemoryContextGetParent(CurrentMemoryContext)); +#endif + + /* Pick initial centers */ + InitCenters(index, samples, centers, lowerBound); + + /* Assign each x to its closest initial center c(x) = argmin d(x,c) */ + for (int64 j = 0; j < numSamples; j++) { + float minDistance = FLT_MAX; + int closestCenter = 0; + + /* Find closest center */ + for (int64 k = 0; k < numCenters; k++) { + /* TODO Use Lemma 1 in k-means++ initialization */ + float distance = lowerBound[j * numCenters + k]; + + if (distance < minDistance) { + minDistance = distance; + closestCenter = k; + } + } + + upperBound[j] = minDistance; + closestCenters[j] = closestCenter; + } + + /* Give 500 iterations to converge */ + for (int iteration = 0; iteration < 500; iteration++) { + int changes = 0; + bool rjreset; + + /* Can take a while, so ensure we can interrupt */ + CHECK_FOR_INTERRUPTS(); + + /* Step 1: For all centers, compute distance */ + for (int64 j = 0; j < numCenters; j++) { + Datum vec = PointerGetDatum(VectorArrayGet(centers, j)); + + for (int64 k = j + 1; k < numCenters; k++) { + float distance = 0.5 * DatumGetFloat8(FunctionCall2Coll(procinfo, collation, vec, + PointerGetDatum(VectorArrayGet(centers, k)))); + + halfcdist[j * numCenters + k] = distance; + halfcdist[k * numCenters + j] = distance; + } + } + + /* For all centers c, compute s(c) */ + for (int64 j = 0; j < numCenters; j++) { + float minDistance = FLT_MAX; + + for (int64 k = 0; k < numCenters; k++) { + float distance; + + if (j == k) + continue; + + distance = halfcdist[j * numCenters + k]; + if (distance < minDistance) + minDistance = distance; + } + + s[j] = minDistance; + } + + rjreset = iteration != 0; + + for (int64 j = 0; j < numSamples; j++) { + bool rj; + + /* Step 2: Identify all points x such that u(x) <= s(c(x)) */ + if (upperBound[j] <= s[closestCenters[j]]) + continue; + + rj = rjreset; + + for (int64 k = 0; k < numCenters; k++) { + Datum vec; + float dxcx; + + /* Step 3: For all remaining points x and centers c */ + if (k == closestCenters[j]) + continue; + + if (upperBound[j] <= lowerBound[j * numCenters + k]) + continue; + + if (upperBound[j] <= halfcdist[closestCenters[j] * numCenters + k]) + continue; + + vec = PointerGetDatum(VectorArrayGet(samples, j)); + + /* Step 3a */ + if (rj) { + dxcx = DatumGetFloat8(FunctionCall2Coll( + procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, closestCenters[j])))); + + /* d(x,c(x)) computed, which is a form of d(x,c) */ + lowerBound[j * numCenters + closestCenters[j]] = dxcx; + upperBound[j] = dxcx; + + rj = false; + } else + dxcx = upperBound[j]; + + /* Step 3b */ + if (dxcx > lowerBound[j * numCenters + k] || dxcx > halfcdist[closestCenters[j] * numCenters + k]) { + float dxc = DatumGetFloat8( + FunctionCall2Coll(procinfo, collation, vec, PointerGetDatum(VectorArrayGet(centers, k)))); + + /* d(x,c) calculated */ + lowerBound[j * numCenters + k] = dxc; + + if (dxc < dxcx) { + closestCenters[j] = k; + + /* c(x) changed */ + upperBound[j] = dxc; + + changes++; + } + } + } + } + + /* Step 4: For each center c, let m(c) be mean of all points assigned */ + ComputeNewCenters(samples, agg, newCenters, centerCounts, closestCenters, normprocinfo, collation, typeInfo); + + /* Step 5 */ + for (int j = 0; j < numCenters; j++) + newcdist[j] = + DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(VectorArrayGet(centers, j)), + PointerGetDatum(VectorArrayGet(newCenters, j)))); + + for (int64 j = 0; j < numSamples; j++) { + for (int64 k = 0; k < numCenters; k++) { + float distance = lowerBound[j * numCenters + k] - newcdist[k]; + + if (distance < 0) { + distance = 0; + } + + lowerBound[j * numCenters + k] = distance; + } + } + + /* Step 6 */ + /* We reset r(x) before Step 3 in the next iteration */ + for (int j = 0; j < numSamples; j++) { + upperBound[j] += newcdist[closestCenters[j]]; + } + + /* Step 7 */ + for (int j = 0; j < numCenters; j++) { + VectorArraySet(centers, j, VectorArrayGet(newCenters, j)); + } + + if (changes == 0 && iteration != 0) { + break; + } + } +} + +/* + * Ensure no NaN or infinite values + */ +static void CheckElements(VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + float *scratch = (float *)palloc(sizeof(float) * centers->dim); + + for (int i = 0; i < centers->length; i++) { + for (int j = 0; j < centers->dim; j++) + scratch[j] = 0; + + /* /fp:fast may not propagate NaN with MSVC, but that's alright */ + typeInfo->sumCenter(VectorArrayGet(centers, i), scratch); + + for (int j = 0; j < centers->dim; j++) { + if (isnan(scratch[j])) + elog(ERROR, "NaN detected. Please report a bug."); + + if (isinf(scratch[j])) + elog(ERROR, "Infinite value detected. Please report a bug."); + } + } +} + +/* + * Ensure no zero vectors for cosine distance + */ +static void CheckNorms(VectorArray centers, Relation index) +{ + /* Check NORM_PROC instead of KMEANS_NORM_PROC */ + FmgrInfo *normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + Oid collation = index->rd_indcollation[0]; + + if (normprocinfo == NULL) { + return; + } + + for (int i = 0; i < centers->length; i++) { + double norm = + DatumGetFloat8(FunctionCall1Coll(normprocinfo, collation, PointerGetDatum(VectorArrayGet(centers, i)))); + if (norm == 0) { + elog(ERROR, "Zero norm detected. Please report a bug."); + } + } +} + +/* + * Detect issues with centers + */ +static void CheckCenters(Relation index, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + if (centers->length != centers->maxlen) + elog(ERROR, "Not enough centers. Please report a bug."); + + CheckElements(centers, typeInfo); + CheckNorms(centers, index); +} + +/* + * Perform naive k-means centering + * We use spherical k-means for inner product and cosine + */ +void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo) +{ + MemoryContext kmeansCtx = + AllocSetContextCreate(CurrentMemoryContext, "Ivfflat kmeans temporary context", ALLOCSET_DEFAULT_SIZES); + MemoryContext oldCtx = MemoryContextSwitchTo(kmeansCtx); + + if (samples->length == 0) + RandomCenters(index, centers, typeInfo); + else + ElkanKmeans(index, samples, centers, typeInfo); + + CheckCenters(index, centers, typeInfo); + + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(kmeansCtx); +} diff --git a/src/gausskernel/storage/access/datavec/ivfscan.cpp b/src/gausskernel/storage/access/datavec/ivfscan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0d38891a4d4da86b24aa985fd4d57fbc07a483f --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfscan.cpp @@ -0,0 +1,633 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfscan.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfscan.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/relscan.h" +#include "lib/pairingheap.h" +#include "access/datavec/ivfflat.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/buf/bufmgr.h" + +/* + * Compare list distances + */ +static int CompareLists(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfflatScanList *)a)->distance < ((const IvfflatScanList *)b)->distance) { + return 1; + } + + if (((const IvfflatScanList *)a)->distance > ((const IvfflatScanList *)b)->distance) { + return -1; + } + + return 0; +} + +/* + * Get lists and sort by distance + */ +static void GetScanLists(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + uint16 pqTableNblk; + uint32 pqDisTableNblk; + IvfGetPQInfoFromMetaPage(scan->indexRelation, &pqTableNblk, NULL, &pqDisTableNblk, NULL); + BlockNumber nextblkno = IVFPQTABLE_START_BLKNO + pqTableNblk + pqDisTableNblk; + int listId = 0; + + /* Search all list pages */ + while (BlockNumberIsValid(nextblkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber maxoffno; + + cbuf = ReadBuffer(scan->indexRelation, nextblkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + maxoffno = PageGetMaxOffsetNumber(cpage); + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IvfflatList list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, offno)); + double distance; + + /* Use procinfo from the index instead of scan key for performance */ + distance = DatumGetFloat8(so->distfunc(so->procinfo, so->collation, PointerGetDatum(&list->center), value)); + + if (listId < so->listCount) { + IvfflatScanList *scanlist; + + scanlist = &so->lists[listId]; + scanlist->startPage = list->startPage; + scanlist->distance = distance; + scanlist->key = listId; + listId++; + if (so->funcType == IVFPQ_DIS_COSINE && so->byResidual) { + Vector *vd = (Vector *)DatumGetPointer(value); + scanlist->pqDistance = VectorL2SquaredDistance(so->dimensions, list->center.x, vd->x); + } else { + scanlist->pqDistance = distance; + } + /* Add to heap */ + pairingheap_add(so->listQueue, &scanlist->ph_node); + } + } + + nextblkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + } +} + +/* + * Get items + */ +static void GetScanItems(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + double tuples = 0; + TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc); + + /* + * Reuse same set of shared buffers for scan + * + * See postgres/src/backend/storage/buffer/README for description + */ + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + /* Search closest probes lists */ + int listCount = 0; + while (!pairingheap_is_empty(so->listQueue)) { + BlockNumber searchPage = ((IvfflatScanList *)pairingheap_remove_first(so->listQueue))->startPage; + /* Search all entry pages for list */ + bool isEmptyList = false; + bool isFirstPage = true; + while (BlockNumberIsValid(searchPage)) { + Buffer buf; + Page page; + OffsetNumber maxoffno; + + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + isEmptyList = (isFirstPage && maxoffno <= 0 && !BlockNumberIsValid(IvfflatPageGetOpaque(page)->nextblkno)); + isFirstPage = false; + if (isEmptyList) { + UnlockReleaseBuffer(buf); + break; + } + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IndexTuple itup; + Datum datum; + bool isnull; + ItemId itemid = PageGetItemId(page, offno); + + itup = (IndexTuple)PageGetItem(page, itemid); + datum = index_getattr(itup, 1, tupdesc, &isnull); + + /* + * Add virtual tuple + * + * Use procinfo from the index instead of scan key for + * performance + */ + ExecClearTuple(slot); + slot->tts_values[0] = so->distfunc(so->procinfo, so->collation, datum, value); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(&itup->t_tid); + slot->tts_isnull[1] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(so->sortstate, slot); + + tuples++; + } + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + } + + if (!isEmptyList) { + ++listCount; + if (listCount >= so->probes) { + break; + } + } + } + + FreeAccessStrategy(bas); + + if (tuples < 100) + ereport(DEBUG1, + (errmsg("index scan found few tuples"), errdetail("Index may have been created with little data."), + errhint("Recreate the index and possibly decrease lists."))); + + tuplesort_performsort(so->sortstate); +} + +/* + * Compare candidate distances + */ +static inline int CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfpqPairingHeapNode *)a)->distance < ((const IvfpqPairingHeapNode *)b)->distance) { + return -1; + } + if (((const IvfpqPairingHeapNode *)a)->distance > ((const IvfpqPairingHeapNode *)b)->distance) { + return 1; + } + + return 0; +} + +/* + * Compare candidate blocknumber + */ +static inline int CompareBlknoCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfpqPairingHeapNode *)a)->indexBlk < ((const IvfpqPairingHeapNode *)b)->indexBlk) { + return -1; + } + if (((const IvfpqPairingHeapNode *)a)->indexBlk > ((const IvfpqPairingHeapNode *)b)->indexBlk) { + return 1; + } + + return 0; +} + +/* + * Get items PQ + */ +static void GetScanItemsPQ(IndexScanDesc scan, Datum value, float *simTable) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + double tuples = 0; + TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc); + Relation index = scan->indexRelation; + int pqM = so->pqM; + int pqKsub = so->pqKsub; + int kreorder = so->kreorder; + bool l2CosResidual = so->funcType != IVFPQ_DIS_IP && so->byResidual; + pairingheap *reOrderCandidate = pairingheap_allocate(CompareFurthestCandidates, NULL); + int canLen = 0; + + /* + * Reuse same set of shared buffers for scan + * + * See postgres/src/backend/storage/buffer/README for description + */ + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + /* Search closest probes lists */ + int listCount = 0; + while (!pairingheap_is_empty(so->listQueue)) { + IvfflatScanList *scanlist = (IvfflatScanList *)pairingheap_remove_first(so->listQueue); + double dis0 = so->byResidual ? scanlist->pqDistance : 0; + BlockNumber searchPage = scanlist->startPage; + int key = scanlist->key; + float *simTable2; + /* Search all entry pages for list */ + bool isEmptyList = false; + bool isFirstPage = true; + + if (l2CosResidual) { + /* L2 or Cosine */ + float *preComputeDisTable = (float *)index->pqDistanceTable + key * pqM * pqKsub; + simTable2 = (float *)palloc(pqM * pqKsub * sizeof(float)); + VectorMadd(pqM * pqKsub, preComputeDisTable, -2.0, simTable, simTable2); + } + + while (BlockNumberIsValid(searchPage)) { + Buffer buf; + Page page; + OffsetNumber maxoffno; + + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + isEmptyList = (isFirstPage && maxoffno <= 0 && !BlockNumberIsValid(IvfflatPageGetOpaque(page)->nextblkno)); + isFirstPage = false; + if (isEmptyList) { + UnlockReleaseBuffer(buf); + break; + } + + for (OffsetNumber offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IndexTuple itup; + Datum datum; + bool isnull; + uint8 *code; + double distance; + double maxDistance = DBL_MAX; + + ItemId itemid = PageGetItemId(page, offno); + + itup = (IndexTuple)PageGetItem(page, itemid); + datum = index_getattr(itup, 1, tupdesc, &isnull); + code = LoadPQCode(itup); + if (l2CosResidual) { + distance = GetPQDistance(simTable2, code, dis0, pqM, pqKsub, false); + } else { + distance = GetPQDistance(simTable, code, dis0, pqM, pqKsub, so->funcType == IVFPQ_DIS_IP); + } + + if (kreorder == 0) { + /* + * Add virtual tuple + * + * Use procinfo from the index instead of scan key for + * performance + */ + ExecClearTuple(slot); + slot->tts_values[0] = Float8GetDatum(distance); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(&itup->t_tid); + slot->tts_isnull[1] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(so->sortstate, slot); + } else { + /* need reorder, add to pairingheap */ + if (canLen < kreorder) { + IvfpqPairingHeapNode *e = IvfpqCreatePairingHeapNode(distance, &itup->t_tid, searchPage, offno); + pairingheap_add(reOrderCandidate, &e->ph_node); + canLen++; + if (canLen == kreorder) { + maxDistance = ((IvfpqPairingHeapNode *)pairingheap_first(reOrderCandidate))->distance; + } + } else if (distance < maxDistance) { + IvfpqPairingHeapNode *e = (IvfpqPairingHeapNode *)pairingheap_remove_first(reOrderCandidate); + e->distance = distance; + e->heapTid = &itup->t_tid; + e->indexBlk = searchPage; + e->indexOff = offno; + pairingheap_add(reOrderCandidate, &e->ph_node); + maxDistance = ((IvfpqPairingHeapNode *)pairingheap_first(reOrderCandidate))->distance; + } + } + tuples++; + } + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + } + + if (!isEmptyList) { + ++listCount; + if (listCount >= so->probes) { + break; + } + } + } + + FreeAccessStrategy(bas); + + if (tuples < 100) + ereport(DEBUG1, + (errmsg("index scan found few tuples"), errdetail("Index may have been created with little data."), + errhint("Recreate the index and possibly decrease lists."))); + + if (kreorder != 0) { + pairingheap *blkOrderCandidate = pairingheap_allocate(CompareBlknoCandidates, NULL); + BlockNumber blkno = InvalidBlockNumber; + Buffer buf; + Page page; + + while (!pairingheap_is_empty(reOrderCandidate)) { + pairingheap_add(blkOrderCandidate, pairingheap_remove_first(reOrderCandidate)); + } + + while (!pairingheap_is_empty(blkOrderCandidate)) { + bool isnull; + IvfpqPairingHeapNode *node = (IvfpqPairingHeapNode *)pairingheap_remove_first(blkOrderCandidate); + + if (blkno != node->indexBlk) { + if (BlockNumberIsValid(blkno)) { + UnlockReleaseBuffer(buf); + } + blkno = node->indexBlk; + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, node->indexBlk, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + } + + ItemId itemid = PageGetItemId(page, node->indexOff); + IndexTuple itup = (IndexTuple)PageGetItem(page, itemid); + Datum datum = index_getattr(itup, 1, tupdesc, &isnull); + + /* Add virtual tuple */ + ExecClearTuple(slot); + slot->tts_values[0] = so->distfunc(so->procinfo, so->collation, datum, value); + slot->tts_isnull[0] = false; + slot->tts_values[1] = PointerGetDatum(node->heapTid); + slot->tts_isnull[1] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(so->sortstate, slot); + } + + if (BlockNumberIsValid(blkno)) { + UnlockReleaseBuffer(buf); + } + } + + tuplesort_performsort(so->sortstate); +} + +/* + * Zero distance + */ +static Datum ZeroDistance(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2) +{ + return Float8GetDatum(0.0); +} + +/* + * Get scan value + */ +static Datum GetScanValue(IndexScanDesc scan) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + Datum value; + + if (scan->orderByData->sk_flags & SK_ISNULL) { + value = PointerGetDatum(NULL); + so->distfunc = ZeroDistance; + } else { + value = scan->orderByData->sk_argument; + so->distfunc = FunctionCall2Coll; + + /* Value should not be compressed or toasted */ + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); + Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); + + /* Normalize if needed */ + if (so->normprocinfo != NULL) + value = IvfflatNormValue(so->typeInfo, so->collation, value); + } + + return value; +} + +/* + * Prepare for an index scan + */ +IndexScanDesc ivfflatbeginscan_internal(Relation index, int nkeys, int norderbys) +{ + IndexScanDesc scan; + IvfflatScanOpaque so; + int lists; + int dimensions; + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {FLOAT8LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + int probes = u_sess->datavec_ctx.ivfflat_probes; + int natts = 2; + int attDistance = 1; + int attHeaptid = 2; + + scan = RelationGetIndexScan(index, nkeys, norderbys); + + /* Get lists and dimensions from metapage */ + IvfflatGetMetaPageInfo(index, &lists, &dimensions); + + if (probes > lists) { + probes = lists; + } + + so = (IvfflatScanOpaque)palloc(offsetof(IvfflatScanOpaqueData, lists) + lists * sizeof(IvfflatScanList)); + so->typeInfo = IvfflatGetTypeInfo(index); + so->first = true; + so->listCount = lists; + so->probes = probes; + so->dimensions = dimensions; + so->kreorder = u_sess->datavec_ctx.ivfpq_kreorder; + + /* Set support functions */ + so->procinfo = index_getprocinfo(index, 1, IVFFLAT_DISTANCE_PROC); + so->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); + so->collation = index->rd_indcollation[0]; + + /* Create tuple description for sorting */ + so->tupdesc = CreateTemplateTupleDesc(natts, false); + TupleDescInitEntry(so->tupdesc, (AttrNumber)attDistance, "distance", FLOAT8OID, -1, 0); + TupleDescInitEntry(so->tupdesc, (AttrNumber)attHeaptid, "heaptid", TIDOID, -1, 0); + + /* Prep sort */ + so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, + u_sess->attr.attr_memory.work_mem, NULL, false); + + so->slot = MakeSingleTupleTableSlot(so->tupdesc); + + so->listQueue = pairingheap_allocate(CompareLists, scan); + + GetPQInfoOnDisk(so, index); + so->pqCtx = AllocSetContextCreate(CurrentMemoryContext, "IVFPQ scan temporary context", ALLOCSET_DEFAULT_SIZES); + + scan->opaque = so; + + return scan; +} + +/* + * Start or restart an index scan + */ +void ivfflatrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + errno_t rc = EOK; + + so->first = true; + pairingheap_reset(so->listQueue); + + if (keys && scan->numberOfKeys > 0) { + rc = memmove_s(scan->keyData, scan->numberOfKeys * sizeof(ScanKeyData), keys, scan->numberOfKeys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } + + if (orderbys && scan->numberOfOrderBys > 0) { + rc = memmove_s(scan->orderByData, scan->numberOfOrderBys * sizeof(ScanKeyData), orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); + securec_check(rc, "\0", "\0"); + } +} + +/* + * Fetch the next tuple in the given scan + */ +bool ivfflatgettuple_internal(IndexScanDesc scan, ScanDirection dir) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + + /* + * Index can be used to scan backward, but Postgres doesn't support + * backward scan on operators + */ + Assert(ScanDirectionIsForward(dir)); + + if (so->first) { + Datum value; + + /* Count index scan for stats */ + pgstat_count_index_scan(scan->indexRelation); + + /* Safety check */ + if (scan->orderByData == NULL) + elog(ERROR, "cannot scan ivfflat index without order"); + + /* Requires MVCC-compliant snapshot as not able to pin during sorting */ + if (!IsMVCCSnapshot(scan->xs_snapshot)) + elog(ERROR, "non-MVCC snapshots are not supported with ivfflat"); + + value = GetScanValue(scan); + + IvfflatBench("GetScanLists", GetScanLists(scan, value)); + + if (so->enablePQ) { + MemoryContext oldCxt = MemoryContextSwitchTo(so->pqCtx); + + float *simTable = (float *)palloc0(so->pqM * so->pqKsub * sizeof(float)); + IvfpqComputeQueryRelTables(so, scan->indexRelation, value, simTable); + IvfflatBench("GetScanItemsPQ", GetScanItemsPQ(scan, value, simTable)); + + MemoryContextSwitchTo(oldCxt); + } else { + IvfflatBench("GetScanItems", GetScanItems(scan, value)); + } + so->first = false; + + /* Clean up if we allocated a new value */ + if (value != scan->orderByData->sk_argument) + pfree(DatumGetPointer(value)); + } + + bool isDone = tuplesort_gettupleslot(so->sortstate, true, so->slot, NULL); + if (!isDone && !pairingheap_is_empty(so->listQueue)) { + /* End prev tuplesort of ivfflat lists group */ + tuplesort_end(so->sortstate); + + /* Reinitialize a new tuplesort of ivfflat lists group */ + AttrNumber attNums[] = {1}; + Oid sortOperators[] = {FLOAT8LTOID}; + Oid sortCollations[] = {InvalidOid}; + bool nullsFirstFlags[] = {false}; + so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, + u_sess->attr.attr_memory.work_mem, NULL, false); + Datum value = GetScanValue(scan); + if (so->enablePQ) { + MemoryContext oldCxt = MemoryContextSwitchTo(so->pqCtx); + + float *simTable = (float *)palloc0(so->pqM * so->pqKsub * sizeof(float)); + IvfpqComputeQueryRelTables(so, scan->indexRelation, value, simTable); + IvfflatBench("GetScanItemsPQ", GetScanItemsPQ(scan, value, simTable)); + + MemoryContextSwitchTo(oldCxt); + } else { + IvfflatBench("GetScanItems", GetScanItems(scan, value)); + } + isDone = tuplesort_gettupleslot(so->sortstate, true, so->slot, NULL); + + /* Clean up if we allocated a new value */ + if (value != scan->orderByData->sk_argument) { + pfree(DatumGetPointer(value)); + } + } + + if (isDone) { + ItemPointer heaptid = (ItemPointer)DatumGetPointer(heap_slot_getattr(so->slot, 2, &so->isnull)); + + scan->xs_ctup.t_self = *heaptid; + scan->xs_recheck = false; + return true; + } + + return false; +} + +/* + * End a scan and release resources + */ +void ivfflatendscan_internal(IndexScanDesc scan) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque)scan->opaque; + + MemoryContextDelete(so->pqCtx); + pairingheap_free(so->listQueue); + tuplesort_end(so->sortstate); + + pfree(so); + scan->opaque = NULL; +} diff --git a/src/gausskernel/storage/access/datavec/ivfutils.cpp b/src/gausskernel/storage/access/datavec/ivfutils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d09e5b14c9557e8baaef761298d45ebf3658efa0 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfutils.cpp @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfutils.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfutils.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/datavec/bitvec.h" +#include "catalog/pg_type.h" +#include "fmgr.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/ivfflat.h" +#include "access/datavec/utils.h" +#include "storage/buf/bufmgr.h" + +/* + * Get the number of lists in the index + */ +int IvfflatGetLists(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) + return opts->lists; + + return IVFFLAT_DEFAULT_LISTS; +} + +/* + * Get proc + */ +FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum) +{ + if (!OidIsValid(index_getprocid(index, 1, procnum))) + return NULL; + + return index_getprocinfo(index, 1, procnum); +} + +/* + * Normalize value + */ +Datum IvfflatNormValue(const IvfflatTypeInfo *typeInfo, Oid collation, Datum value) +{ + return DirectFunctionCall1Coll(typeInfo->normalize, collation, value); +} + +/* + * Check if non-zero norm + */ +bool IvfflatCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value) +{ + return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0; +} + +/* + * New buffer + */ +Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum) +{ + Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return buf; +} + +/* + * Init page + */ +void IvfflatInitPage(Buffer buf, Page page) +{ + PageInit(page, BufferGetPageSize(buf), sizeof(IvfflatPageOpaqueData)); + IvfflatPageGetOpaque(page)->nextblkno = InvalidBlockNumber; + IvfflatPageGetOpaque(page)->page_id = IVFFLAT_PAGE_ID; +} + +/* + * Init and register page + */ +void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state) +{ + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE); + IvfflatInitPage(*buf, *page); +} + +/* + * Commit buffer + */ +void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state) +{ + GenericXLogFinish(state); + UnlockReleaseBuffer(buf); +} + +/* + * Add a new page + * + * The order is very important!! + */ +void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum) +{ + /* Get new buffer */ + Buffer newbuf = IvfflatNewBuffer(index, forkNum); + Page newpage = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + + /* Update the previous buffer */ + IvfflatPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf); + + /* Init new page */ + IvfflatInitPage(newbuf, newpage); + + /* Commit */ + GenericXLogFinish(*state); + + /* Unlock */ + UnlockReleaseBuffer(*buf); + + *state = GenericXLogStart(index); + *page = GenericXLogRegisterBuffer(*state, newbuf, GENERIC_XLOG_FULL_IMAGE); + *buf = newbuf; +} + +/* + * Get the metapage info + */ +void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + if (unlikely(metap->magicNumber != IVFFLAT_MAGIC_NUMBER)) + elog(ERROR, "ivfflat index is not valid"); + + if (lists != NULL) + *lists = metap->lists; + + if (dimensions != NULL) + *dimensions = metap->dimensions; + + UnlockReleaseBuffer(buf); +} + +/* + * Update the start or insert page of a list + */ +void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, + BlockNumber startPage, ForkNumber forkNum) +{ + Buffer buf; + Page page; + GenericXLogState *state; + IvfflatList list; + bool changed = false; + + buf = ReadBufferExtended(index, forkNum, listInfo.blkno, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + list = (IvfflatList)PageGetItem(page, PageGetItemId(page, listInfo.offno)); + if (BlockNumberIsValid(insertPage) && insertPage != list->insertPage) { + /* Skip update if insert page is lower than original insert page */ + /* This is needed to prevent insert from overwriting vacuum */ + if (!BlockNumberIsValid(originalInsertPage) || insertPage >= originalInsertPage) { + list->insertPage = insertPage; + changed = true; + } + } + + if (BlockNumberIsValid(startPage) && startPage != list->startPage) { + list->startPage = startPage; + changed = true; + } + + /* Only commit if changed */ + if (changed) { + IvfflatCommitBuffer(buf, state); + } else { + GenericXLogAbort(state); + UnlockReleaseBuffer(buf); + } +} + +char* IVFPQLoadPQtable(Relation index) +{ + Buffer buf; + Page page; + uint16 nblks; + uint32 curFlushSize; + uint32 pqTableSize; + char* pqTable; + + IvfGetPQInfoFromMetaPage(index, &nblks, &pqTableSize, NULL, NULL); + pqTable = (char*)palloc0(pqTableSize); + + for (uint16 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? (pqTableSize - i * IVFPQTABLE_STORAGE_SIZE) : IVFPQTABLE_STORAGE_SIZE; + buf = ReadBuffer(index, IVFPQTABLE_START_BLKNO + i); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + errno_t err = memcpy_s(pqTable + i * IVFPQTABLE_STORAGE_SIZE, curFlushSize, + PageGetContents(page), curFlushSize); + securec_check(err, "\0", "\0"); + UnlockReleaseBuffer(buf); + } + return pqTable; +} + +float* IVFPQLoadPQDisTable(Relation index) +{ + Buffer buf; + Page page; + uint16 pqTableNblk; + uint32 nblks; + uint32 curFlushSize; + uint64 pqDisTableSize; + float* disTable; + + IvfGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &nblks, &pqDisTableSize); + disTable = (float*)palloc0(pqDisTableSize); + + BlockNumber startBlkno = IVFPQTABLE_START_BLKNO + pqTableNblk; + for (uint32 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? (pqDisTableSize - i * IVFPQTABLE_STORAGE_SIZE) : IVFPQTABLE_STORAGE_SIZE; + buf = ReadBuffer(index, startBlkno + i); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + errno_t err = memcpy_s((char*)disTable + i * IVFPQTABLE_STORAGE_SIZE, curFlushSize, + PageGetContents(page), curFlushSize); + securec_check(err, "\0", "\0"); + UnlockReleaseBuffer(buf); + } + return disTable; +} + +/* + * Get Ivfflat PQ info + */ +void GetPQInfoOnDisk(IvfflatScanOpaque so, Relation index) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + if (unlikely(metap->magicNumber != IVFFLAT_MAGIC_NUMBER)) { + UnlockReleaseBuffer(buf); + elog(ERROR, "ivfflat index is not valid"); + } + + so->enablePQ = metap->enablePQ; + so->pqM = metap->pqM; + so->pqKsub = metap->pqKsub; + so->byResidual = metap->byResidual; + UnlockReleaseBuffer(buf); + + if (so->enablePQ) { + so->funcType = getIVFPQfunctionType(so->procinfo, so->normprocinfo); + /* Now save pqTable and pqDistanceTable in the relcache entry. */ + if (index->pqTable == NULL) { + MemoryContext oldcxt = MemoryContextSwitchTo(index->rd_indexcxt); + index->pqTable = IVFPQLoadPQtable(index); + (void)MemoryContextSwitchTo(oldcxt); + } + if (index->pqDistanceTable == NULL && so->byResidual && so->funcType != IVFPQ_DIS_IP) { + MemoryContext oldcxt = MemoryContextSwitchTo(index->rd_indexcxt); + index->pqDistanceTable = IVFPQLoadPQDisTable(index); + (void)MemoryContextSwitchTo(oldcxt); + } + } +} + +void IvfpqComputeQueryRelTablesInternal(IvfflatScanOpaque so, float *q, char *pqTable, bool innerPro, float *simTable) +{ + int pqM = so->pqM; + int pqKsub = so->pqKsub; + int dim = so->dimensions; + int dsub = dim / pqM; + Size subSize = MAXALIGN(so->typeInfo->itemSize(dsub)); + + for (int m = 0; m < pqM; m++) { + int offset = m * pqKsub; + float *qsubVector = q + m * dsub; + float *dis = simTable + offset; + /* one-to-many computation */ + if (innerPro) { + /* negate when GetPQDistance */ + VectorInnerProductNY(dsub, pqKsub, qsubVector, pqTable, subSize, offset, dis); + } else { + VectorL2SquaredDistanceNY(dsub, pqKsub, qsubVector, pqTable, subSize, offset, dis); + } + } +} + +/* + * Precompute some tables specific to query q, r is cluster center of PQ. + */ +void IvfpqComputeQueryRelTables(IvfflatScanOpaque so, Relation index, Datum q, float *simTable) +{ + if (so->funcType == IVFPQ_DIS_IP) { + /* compute q*r */ + IvfpqComputeQueryRelTablesInternal(so, DatumGetVector(q)->x, index->pqTable, true, simTable); + } else { + /* funcType is cosine or l2 */ + if (so->byResidual) { + /* compute q*r */ + IvfpqComputeQueryRelTablesInternal(so, DatumGetVector(q)->x, index->pqTable, true, simTable); + } else { + /* compute (q-r)^2 */ + IvfpqComputeQueryRelTablesInternal(so, DatumGetVector(q)->x, index->pqTable, false, simTable); + } + } +} + +uint8 *LoadPQCode(IndexTuple itup) +{ + return (uint8 *)((char *)itup + MAXALIGN(IndexTupleSize(itup))); +} + +float GetPQDistance(float *pqDistanceTable, uint8 *code, double dis0, int pqM, int pqKsub, bool innerPro) +{ + float resDistance = 0.0; + for (int i = 0; i < pqM; i++) { + int offset = i * pqKsub + code[i]; + resDistance += pqDistanceTable[offset]; + } + return innerPro ? (dis0 - resDistance) : (dis0 + resDistance); +} + +IvfpqPairingHeapNode * IvfpqCreatePairingHeapNode(float distance, ItemPointer heapTid, + BlockNumber indexBlk, OffsetNumber indexOff) +{ + IvfpqPairingHeapNode *n = (IvfpqPairingHeapNode *)palloc(sizeof(IvfpqPairingHeapNode)); + n->distance = distance; + n->heapTid = heapTid; + n->indexBlk = indexBlk; + n->indexOff = indexOff; + return n; +} + +/* + * Get type info + */ +const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index) +{ + FmgrInfo *procinfo = IvfflatOptionalProcInfo(index, IVFFLAT_TYPE_INFO_PROC); + + if (procinfo == NULL) { + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM, + .supportPQ = true, + .normalize = l2_normalize, + .itemSize = VectorItemSize, + .updateCenter = VectorUpdateCenter, + .sumCenter = VectorSumCenter}; + + return (&typeInfo); + } else { + return (const IvfflatTypeInfo *)DatumGetPointer(OidFunctionCall0Coll(procinfo->fn_oid, InvalidOid)); + } +} + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_halfvec_support); +Datum ivfflat_halfvec_support(PG_FUNCTION_ARGS) +{ + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM * 2, + .supportPQ = false, + .normalize = halfvec_l2_normalize, + .itemSize = HalfvecItemSize, + .updateCenter = HalfvecUpdateCenter, + .sumCenter = HalfvecSumCenter}; + + PG_RETURN_POINTER(&typeInfo); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_bit_support); +Datum ivfflat_bit_support(PG_FUNCTION_ARGS) +{ + static const IvfflatTypeInfo typeInfo = {.maxDimensions = IVFFLAT_MAX_DIM * 32, + .supportPQ = false, + .normalize = NULL, + .itemSize = BitItemSize, + .updateCenter = BitUpdateCenter, + .sumCenter = BitSumCenter}; + + PG_RETURN_POINTER(&typeInfo); +}; + +int getIVFPQfunctionType(FmgrInfo *procinfo, FmgrInfo *normprocinfo) +{ + if (procinfo->fn_oid == 8431) { + return IVF_PQ_DIS_L2; + } else if (procinfo->fn_oid == 8434) { + if (normprocinfo == NULL) { + return IVF_PQ_DIS_IP; + } else { + return IVF_PQ_DIS_COSINE; + } + } else { + ereport(ERROR, (errmsg("current data type or distance type can't support IVFPQ."))); + return -1; + } +} + +/* +* Get the info related to pqTable in metapage +*/ +void IvfGetPQInfoFromMetaPage(Relation index, uint16 *pqTableNblk, uint32 *pqTableSize, + uint32 *pqPreComputeTableNblk, uint64 *pqPreComputeTableSize) +{ + Buffer buf; + Page page; + IvfflatMetaPage metap; + + buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + metap = IvfflatPageGetMeta(page); + + PG_TRY(); + { + if (unlikely(metap->magicNumber != IVFFLAT_MAGIC_NUMBER)) { + elog(ERROR, "ivfflat index is not valid"); + } + } + PG_CATCH(); + { + UnlockReleaseBuffer(buf); + PG_RE_THROW(); + } + PG_END_TRY(); + + if (pqTableNblk != NULL) { + *pqTableNblk = metap->pqTableNblk; + } + if (pqTableSize != NULL) { + *pqTableSize = metap->pqTableSize; + } + if (pqPreComputeTableNblk != NULL) { + *pqPreComputeTableNblk = metap->pqPreComputeTableNblk; + } + if (pqPreComputeTableSize != NULL) { + *pqPreComputeTableSize = metap->pqPreComputeTableSize; + } + + UnlockReleaseBuffer(buf); +} + +/* + * Get whether to enable PQ + */ +bool IvfGetEnablePQ(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) { + return opts->enablePQ; + } + + return GENERIC_DEFAULT_ENABLE_PQ; +} + +/* + * Get the number of subquantizer + */ +int IvfGetPqM(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) { + return opts->pqM; + } + + return GENERIC_DEFAULT_PQ_M; +} + +/* + * Get the number of centroids for each subquantizer + */ +int IvfGetPqKsub(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) { + return opts->pqKsub; + } + + return GENERIC_DEFAULT_PQ_KSUB; +} + +/* + * Get whether to use residual + */ +int IvfGetByResidual(Relation index) +{ + IvfflatOptions *opts = (IvfflatOptions *)index->rd_options; + + if (opts) { + return opts->byResidual; + } + + return IVFPQ_DEFAULT_RESIDUAL; +} + +void IvfFlushPQInfoInternal(Relation index, char* table, BlockNumber startBlkno, uint32 nblks, uint64 totalSize) +{ + Buffer buf; + Page page; + PageHeader p; + uint32 curFlushSize; + GenericXLogState *state; + + for (uint32 i = 0; i < nblks; i++) { + curFlushSize = (i == nblks - 1) ? + (totalSize - i * IVF_PQTABLE_STORAGE_SIZE) : IVF_PQTABLE_STORAGE_SIZE; + buf = ReadBufferExtended(index, MAIN_FORKNUM, startBlkno + i, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + errno_t err = memcpy_s(PageGetContents(page), curFlushSize, + table + i * IVF_PQTABLE_STORAGE_SIZE, curFlushSize); + securec_check(err, "\0", "\0"); + p = (PageHeader)page; + p->pd_lower += curFlushSize; + MarkBufferDirty(buf); + IvfflatCommitBuffer(buf, state); + } +} + +/* +* Flush PQ table into page during index building +*/ +void IvfFlushPQInfo(IvfflatBuildState *buildstate) +{ + Relation index = buildstate->index; + char* pqTable = buildstate->pqTable; + float* preComputeTable = buildstate->preComputeTable; + uint16 pqTableNblk; + uint32 pqTableSize; + uint32 pqPrecomputeTableNblk; + uint64 pqPrecomputeTableSize; + + IvfGetPQInfoFromMetaPage(index, &pqTableNblk, &pqTableSize, &pqPrecomputeTableNblk, &pqPrecomputeTableSize); + + /* Flush pq table */ + IvfFlushPQInfoInternal(index, pqTable, IVF_PQTABLE_START_BLKNO, pqTableNblk, pqTableSize); + if (buildstate->byResidual && buildstate->params->funcType != IVF_PQ_DIS_IP) { + /* Flush pq distance table */ + IvfFlushPQInfoInternal(index, (char*)preComputeTable, + IVF_PQTABLE_START_BLKNO + pqTableNblk, pqPrecomputeTableNblk, pqPrecomputeTableSize); + } +} diff --git a/src/gausskernel/storage/access/datavec/ivfvacuum.cpp b/src/gausskernel/storage/access/datavec/ivfvacuum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e4eff80a06c8e96888f47a0c2a44bccdd781e35 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/ivfvacuum.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfvacuum.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/ivfvacuum.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "commands/vacuum.h" +#include "access/datavec/ivfflat.h" +#include "storage/buf/bufmgr.h" + +/* + * Bulk delete tuples from the index + */ +IndexBulkDeleteResult *ivfflatbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState) +{ + uint16 pqTableNblk; + uint32 pqDisTableNblk; + Relation index = info->index; + IvfGetPQInfoFromMetaPage(index, &pqTableNblk, NULL, &pqDisTableNblk, NULL); + BlockNumber blkno = IVFPQTABLE_START_BLKNO + pqTableNblk + pqDisTableNblk; + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + if (stats == NULL) + stats = (IndexBulkDeleteResult *)palloc0(sizeof(IndexBulkDeleteResult)); + + /* Iterate over list pages */ + while (BlockNumberIsValid(blkno)) { + Buffer cbuf; + Page cpage; + OffsetNumber coffno; + OffsetNumber cmaxoffno; + BlockNumber startPages[MaxOffsetNumber]; + ListInfo listInfo; + + cbuf = ReadBuffer(index, blkno); + LockBuffer(cbuf, BUFFER_LOCK_SHARE); + cpage = BufferGetPage(cbuf); + + cmaxoffno = PageGetMaxOffsetNumber(cpage); + + /* Iterate over lists */ + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) { + IvfflatList list = (IvfflatList)PageGetItem(cpage, PageGetItemId(cpage, coffno)); + + startPages[coffno - FirstOffsetNumber] = list->startPage; + } + + listInfo.blkno = blkno; + blkno = IvfflatPageGetOpaque(cpage)->nextblkno; + + UnlockReleaseBuffer(cbuf); + + for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno)) { + BlockNumber searchPage = startPages[coffno - FirstOffsetNumber]; + BlockNumber insertPage = InvalidBlockNumber; + + /* Iterate over entry pages */ + while (BlockNumberIsValid(searchPage)) { + Buffer buf; + Page page; + GenericXLogState *state; + OffsetNumber offno; + OffsetNumber maxoffno; + OffsetNumber deletable[MaxOffsetNumber]; + int ndeletable; + + vacuum_delay_point(); + + buf = ReadBufferExtended(index, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + + /* + * ambulkdelete cannot delete entries from pages that are + * pinned by other backends + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + LockBufferForCleanup(buf); + + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + + maxoffno = PageGetMaxOffsetNumber(page); + ndeletable = 0; + + /* Find deleted tuples */ + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) { + IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, offno)); + ItemPointer htup = &(itup->t_tid); + + if (callback(htup, callbackState, InvalidOid, InvalidBktId)) { + deletable[ndeletable++] = offno; + stats->tuples_removed++; + } else + stats->num_index_tuples++; + } + + /* Set to first free page */ + /* Must be set before searchPage is updated */ + if (!BlockNumberIsValid(insertPage) && ndeletable > 0) + insertPage = searchPage; + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + if (ndeletable > 0) { + /* Delete tuples */ + PageIndexMultiDelete(page, deletable, ndeletable); + GenericXLogFinish(state); + } else + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); + } + + /* + * Update after all tuples deleted. + * + * We don't add or delete items from lists pages, so offset won't + * change. + */ + if (BlockNumberIsValid(insertPage)) { + listInfo.offno = coffno; + IvfflatUpdateList(index, listInfo, insertPage, InvalidBlockNumber, InvalidBlockNumber, MAIN_FORKNUM); + } + } + } + + FreeAccessStrategy(bas); + + return stats; +} + +/* + * Clean up after a VACUUM operation + */ +IndexBulkDeleteResult *ivfflatvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) +{ + Relation rel = info->index; + + if (info->analyze_only) + return stats; + + /* stats is NULL if ambulkdelete not called */ + /* OK to return NULL if index not changed */ + if (stats == NULL) + return NULL; + + stats->num_pages = RelationGetNumberOfBlocks(rel); + + return stats; +} diff --git a/src/gausskernel/storage/access/datavec/utils.cpp b/src/gausskernel/storage/access/datavec/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b877cf571840ac9971ac441cf12d210071e79c64 --- /dev/null +++ b/src/gausskernel/storage/access/datavec/utils.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * utils.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/utils.cpp + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/builtins.h" +#include "access/datavec/utils.h" +#include "access/datavec/halfutils.h" +#include "access/datavec/halfvec.h" +#include "access/datavec/bitvec.h" +#include "access/datavec/vector.h" + +pq_func_t g_pq_func = {0}; + +Size VectorItemSize(int dimensions) +{ + return VECTOR_SIZE(dimensions); +} + +Size HalfvecItemSize(int dimensions) +{ + return HALFVEC_SIZE(dimensions); +} + +Size BitItemSize(int dimensions) +{ + return VARBITTOTALLEN(dimensions); +} + +void VectorUpdateCenter(Pointer v, int dimensions, const float *x) +{ + Vector *vec = (Vector *)v; + + SET_VARSIZE(vec, VECTOR_SIZE(dimensions)); + vec->dim = dimensions; + + for (int k = 0; k < dimensions; k++) { + vec->x[k] = x[k]; + } +} + +void HalfvecUpdateCenter(Pointer v, int dimensions, const float *x) +{ + HalfVector *vec = (HalfVector *)v; + + SET_VARSIZE(vec, HALFVEC_SIZE(dimensions)); + vec->dim = dimensions; + + for (int k = 0; k < dimensions; k++) { + vec->x[k] = Float4ToHalfUnchecked(x[k]); + } +} + +void BitUpdateCenter(Pointer v, int dimensions, const float *x) +{ + VarBit *vec = (VarBit *)v; + unsigned char *nx = VARBITS(vec); + + SET_VARSIZE(vec, VARBITTOTALLEN(dimensions)); + VARBITLEN(vec) = dimensions; + + for (uint32 k = 0; k < VARBITBYTES(vec); k++) { + nx[k] = 0; + } + + for (int k = 0; k < dimensions; k++) { + nx[k / 8] |= (x[k] > 0.5 ? 1 : 0) << (7 - (k % 8)); + } +} + +void VectorSumCenter(Pointer v, float *x) +{ + Vector *vec = (Vector *)v; + + for (int k = 0; k < vec->dim; k++) { + x[k] += vec->x[k]; + } +} + +void HalfvecSumCenter(Pointer v, float *x) +{ + HalfVector *vec = (HalfVector *)v; + + for (int k = 0; k < vec->dim; k++) { + x[k] += HalfToFloat4(vec->x[k]); + } +} + +void BitSumCenter(Pointer v, float *x) +{ + VarBit *vec = (VarBit *)v; + + for (int k = 0; k < VARBITLEN(vec); k++) { + x[k] += (float)(((VARBITS(vec)[k / 8]) >> (7 - (k % 8))) & 0x01); + } +} + +/* + * Allocate a vector array + */ +VectorArray VectorArrayInit(int maxlen, int dimensions, Size itemsize) +{ + VectorArray res = (VectorArray)palloc(sizeof(VectorArrayData)); + + /* Ensure items are aligned to prevent UB */ + itemsize = MAXALIGN(itemsize); + + res->length = 0; + res->maxlen = maxlen; + res->dim = dimensions; + res->itemsize = itemsize; + res->items = (char *)palloc0_huge(CurrentMemoryContext, maxlen * itemsize); + return res; +} + +/* + * Free a vector array + */ +void VectorArrayFree(VectorArray arr) +{ + if (arr->items != NULL) { + pfree(arr->items); + } + pfree(arr); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/datavec/vecindex.cpp b/src/gausskernel/storage/access/datavec/vecindex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7732a36154c60c7c412b1b425183d2d86e4eb73a --- /dev/null +++ b/src/gausskernel/storage/access/datavec/vecindex.cpp @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * vecindex.cpp + * + * IDENTIFICATION + * src/gausskernel/storage/access/datavec/vecindex.cpp + * + * ------------------------------------------------------------------------- + */ +#include "access/transam.h" +#include "access/datavec/hnsw.h" +#include "storage/procarray.h" +#include "access/datavec/vecindex.h" + +VectorScanData *VecGetScanData(IndexScanDesc scan) +{ + switch (scan->indexRelation->rd_rel->relam) { + case HNSW_AM_OID: + return &((HnswScanOpaque)scan->opaque)->vs; + default: + break; + } + return NULL; +} + +size_t VecDefaultMaxItemSize(IndexScanDesc scan) +{ + switch (scan->indexRelation->rd_rel->relam) { + case HNSW_AM_OID: + return HnswDefaultMaxItemSize; + default: + break; + } + return (size_t)-1; +} + +TransactionIdStatus HnswCheckXid(TransactionId xid) +{ + TransactionIdStatus ts = TransactionIdGetStatus(xid); + /* Please refer to HeapTupleSatisfiesVaccum */ + if (ts == XID_INPROGRESS) { + if (TransactionIdIsInProgress(xid)) { + /* Inprogress */ + } else if (TransactionIdDidCommit(xid)) { + ts = XID_COMMITTED; + } else { + ts = XID_ABORTED; + } + } + return ts; +} + +bool VecItupGetXminXmax(Page page, OffsetNumber offnum, TransactionId oldest_xmin, TransactionId *xmin, + TransactionId *xmax, bool *xminCommitted, bool *xmaxCommitted, bool isToast) +{ + ItemId iid = PageGetItemId(page, offnum); + HnswElementTuple itup = (HnswElementTuple)PageGetItem(page, iid); + IndexTransInfo *idxXid = (IndexTransInfo *)VecIndexTupleGetXid(itup); + bool isDead = false; + bool needCheckXmin = true; + + *xminCommitted = *xmaxCommitted = false; + + if (ItemIdIsDead(iid)) { + *xmin = InvalidTransactionId; + *xmax = InvalidTransactionId; + return true; + } + + *xmin = idxXid->xmin; + *xmax = idxXid->xmax; + + /* examine xmax */ + if (TransactionIdIsValid(*xmax)) { + TransactionIdStatus ts = HnswCheckXid(*xmax); + switch (ts) { + case XID_INPROGRESS: + if (TransactionIdEquals(*xmin, *xmax)) { + needCheckXmin = false; + } + break; + case XID_COMMITTED: + *xminCommitted = *xmaxCommitted = true; + needCheckXmin = false; + break; + case XID_ABORTED: + idxXid->xmax = InvalidTransactionId; + *xmax = InvalidTransactionId; + if (TransactionIdEquals(*xmin, *xmax)) { + /* xmin xmax aborted */ + idxXid->xmin = InvalidTransactionId; + *xmin = InvalidTransactionId; + needCheckXmin = false; + } + break; + } + } + + /* examine xmin */ + if (needCheckXmin) { + if (IndexItemIdIsFrozen(iid)) { + *xminCommitted = true; + } else if (TransactionIdIsValid(*xmin)) { + TransactionIdStatus ts = HnswCheckXid(*xmin); + switch (ts) { + case XID_INPROGRESS: + break; + case XID_COMMITTED: + *xminCommitted = true; + break; + case XID_ABORTED: + idxXid->xmin = InvalidTransactionId; + *xmin = InvalidTransactionId; + break; + } + } + } + + /* if there is no passed oldest_xmin, we will ues the current oldest_xmin */ + if (!TransactionIdIsValid(oldest_xmin)) { + if (isToast) { + GetOldestXminForUndo(&oldest_xmin); + } else { + oldest_xmin = u_sess->utils_cxt.RecentGlobalDataXmin; + } + } + /* we can't do bypass in hotstandby read mode, or there will be different between index scan and seq scan */ + if (RecoveryInProgress()) { + oldest_xmin = InvalidTransactionId; + } + + if (!TransactionIdIsValid(*xmin)) { + isDead = true; + } + /* before we mark the tuple as DEAD because of xmax, must comfirm that xmax has committed */ + if (*xmaxCommitted && TransactionIdPrecedes(*xmax, oldest_xmin)) { + isDead = true; + } + + /* before we mark the tuple as FROZEN, must comfirm that xmin has committed */ + if (IndexItemIdIsFrozen(iid)) { + *xmin = FrozenTransactionId; + } else if (*xminCommitted && TransactionIdPrecedes(*xmin, oldest_xmin)) { + IndexItemIdSetFrozen(iid); + *xmin = FrozenTransactionId; + } + + if (isDead) { + ItemIdMarkDead(iid); + *xmin = InvalidTransactionId; + *xmax = InvalidTransactionId; + *xminCommitted = *xmaxCommitted = false; + } + + return isDead; +} + +static bool VecItupEquals(IndexTuple itup1, IndexTuple itup2) +{ + if (itup1 == NULL || itup2 == NULL) { + return false; + } + if (IndexTupleSize(itup1) == 0 || IndexTupleSize(itup2) == 0) { + return false; + } + /* + * compare the binary directly. If these index tuples are formed from the + * same uheap tuple, they should be exactly the same. + */ + return memcmp(itup1, itup2, IndexTupleSize(itup1)) == 0; +} + +static bool VecVisibilityCheckCid(IndexScanDesc scan, IndexTuple itup, bool *needRecheck) +{ + VectorScanData *vs = VecGetScanData(scan); + Assert(vs != NULL); + + if (VecItupEquals((IndexTuple)vs->lastSelfModifiedItup, itup)) { + *needRecheck = false; + return false; /* tuples with same key and TID will only returned once */ + } + + /* save this index tuple as lastSelfModifiedItup */ + /* Step1: Check that the buffer space is large enough. */ + size_t maxItemSize = VecDefaultMaxItemSize(scan); + uint newSize = 0; + int multiSize = 2; + if (vs->lastSelfModifiedItup == NULL) { + newSize = IndexTupleSize(itup); + } else if (vs->lastSelfModifiedItupBufferSize < IndexTupleSize(itup)) { + newSize = MAX(vs->lastSelfModifiedItupBufferSize * multiSize, IndexTupleSize(itup)); + newSize = MIN(newSize, maxItemSize); + pfree(vs->lastSelfModifiedItup); + } + /* Step2: Extend when necessary. */ + if (newSize != 0) { + vs->lastSelfModifiedItup = (char *)palloc(newSize); + vs->lastSelfModifiedItupBufferSize = newSize; + } + /* Step3: Save the current IndexTuple. */ + errno_t rc = 0; + rc = memcpy_s(vs->lastSelfModifiedItup, maxItemSize, itup, IndexTupleSize(itup)); + securec_check(rc, "\0", "\0"); + + *needRecheck = true; + return true; /* treat as visible, but need recheck */ +} + +static bool VecXidSatisfiesMVCC(TransactionId xid, bool committed, Snapshot snapshot, Buffer buffer) +{ + TransactionIdStatus ignore; + + if (!TransactionIdIsValid(xid)) { + return false; /* invisible */ + } + if (xid == FrozenTransactionId) { + return true; /* frozen */ + } + + /* + * We can use snapshot's xmin/xmax as fast bypass after they become valid again. + * Currently, snapshot's csn and xmin/xmax may be inconsistent. The reavsn is + * that there is a problem with the cooperation of committing and subtransaction. + */ + + /* we can't tell visibility by snapshot's xmin/xmax alone, check snapshot */ + return XidVisibleInSnapshot(xid, snapshot, &ignore, (RecoveryInProgress() ? buffer : InvalidBuffer), NULL); +} + +static bool VecVisibilityCheckXid(TransactionId xmin, TransactionId xmax, bool xminCommitted, bool xmaxCommitted, + Snapshot snapshot, Buffer buffer, bool isUpsert) +{ + if (snapshot->satisfies == SNAPSHOT_DIRTY && isUpsert) { + bool xmaxVisible = xmaxCommitted || TransactionIdIsCurrentTransactionId(xmax); + if (xmaxVisible) { + return false; + } + return true; + } + + /* only support MVCC and NOW, ereport used to locate bug */ + if (snapshot->satisfies != SNAPSHOT_VERSION_MVCC && snapshot->satisfies != SNAPSHOT_MVCC && + snapshot->satisfies != SNAPSHOT_NOW) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported snapshot type %u for UBTree index.", snapshot->satisfies), + errhint("This kind of operation may not supported."))); + } + + /* handle snapshot MVCC */ + if (snapshot->satisfies == SNAPSHOT_VERSION_MVCC || snapshot->satisfies == SNAPSHOT_MVCC) { + if (VecXidSatisfiesMVCC(xmax, xmaxCommitted, snapshot, buffer)) { + return false; /* already deleted */ + } + if (!VecXidSatisfiesMVCC(xmin, xminCommitted, snapshot, buffer)) { + return false; /* have not inserted yet */ + } + } + + /* handle snapshot NOW */ + if (snapshot->satisfies == SNAPSHOT_NOW) { + return xminCommitted && !xmaxCommitted; + } + + return true; +} + +bool VecVisibilityCheck(IndexScanDesc scan, Page page, OffsetNumber offnum, bool *needRecheck) +{ + bool needVisibilityCheck = + scan->xs_snapshot->satisfies != SNAPSHOT_ANY && scan->xs_snapshot->satisfies != SNAPSHOT_TOAST; + TransactionId xmin, xmax; + bool xminCommitted = false; + bool xmaxCommitted = false; + bool isDead = VecItupGetXminXmax(page, offnum, InvalidTransactionId, &xmin, &xmax, &xminCommitted, &xmaxCommitted, + RelationGetNamespace(scan->indexRelation) == PG_TOAST_NAMESPACE); + + if (needRecheck == NULL) { + *needRecheck = false; + } + + bool isVisible = !isDead; + if (needVisibilityCheck && !isDead) { + /* + * If this IndexTuple is not visible to the current Snapshot, try to get the next one. + * We're not going to tell heap to skip visibility check, because it doesn't cost a lot and we need heap + * to check the visibility with CID when snapshot's xid equals to xmin or xmax. + */ + if (scan->xs_snapshot->satisfies == SNAPSHOT_MVCC && + (TransactionIdIsCurrentTransactionId(xmin) || TransactionIdIsCurrentTransactionId(xmax))) { + ItemId iid = PageGetItemId(page, offnum); + IndexTuple tuple = (IndexTuple)PageGetItem(page, iid); + isVisible = VecVisibilityCheckCid(scan, tuple, needRecheck); /* need check cid */ + } else { + VectorScanData *vs = VecGetScanData(scan); + isVisible = VecVisibilityCheckXid(xmin, xmax, xminCommitted, xmaxCommitted, scan->xs_snapshot, vs->buf, + scan->isUpsert); + } + } + + return isVisible; +} diff --git a/src/gausskernel/storage/access/index/indexam.cpp b/src/gausskernel/storage/access/index/indexam.cpp index f450adb94131aff8c7f81caa67cb5a2fbf3e50a6..b28baeb35645a6c53aff61cccd01e033be0df5d4 100644 --- a/src/gausskernel/storage/access/index/indexam.cpp +++ b/src/gausskernel/storage/access/index/indexam.cpp @@ -208,8 +208,47 @@ bool UBTreeDelete(Relation indexRelation, Datum* values, const bool* isnull, Ite void index_delete(Relation index_relation, Datum* values, const bool* isnull, ItemPointer heap_t_ctid, bool isRollbackIndex) { - /* Assert(Ustore) Assert(B tree) */ - UBTreeDelete(index_relation, values, isnull, heap_t_ctid, isRollbackIndex); + if (RelationIsUstoreIndex(index_relation)) { + /* Assert(Ustore) Assert(B tree) */ + UBTreeDelete(index_relation, values, isnull, heap_t_ctid, isRollbackIndex); + } else { + HeapTuple tuple; + char* accessMethodName; + Form_pg_am accessMethodForm; + FmgrInfo flinfo; + FunctionCallInfoData fcinfo; + Datum result; + + switch (index_relation->rd_rel->relam) { + case HNSW_AM_OID: + accessMethodName = DEFAULT_HNSW_INDEX_TYPE; + break; + default: + ereport(ERROR, (errmsg("the current index method is not supported, please check\n"))); + Assert(false); + break; + } + + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName)); + accessMethodForm = (Form_pg_am)GETSTRUCT(tuple); + + fmgr_info(accessMethodForm->amdelete, &flinfo); + InitFunctionCallInfoData(fcinfo, &flinfo, 5, InvalidOid, NULL, NULL); + fcinfo.arg[0] = PointerGetDatum(index_relation); + fcinfo.arg[1] = PointerGetDatum(values); + fcinfo.arg[2] = PointerGetDatum(isnull); + fcinfo.arg[3] = PointerGetDatum(heap_t_ctid); + fcinfo.arg[4] = BoolGetDatum(isRollbackIndex); + fcinfo.argnull[0] = false; + fcinfo.argnull[1] = false; + fcinfo.argnull[2] = false; + fcinfo.argnull[3] = false; + fcinfo.argnull[4] = false; + + result = FunctionCallInvoke(&fcinfo); + + ReleaseSysCache(tuple); + } } diff --git a/src/gausskernel/storage/access/redo/redo_xlog.cpp b/src/gausskernel/storage/access/redo/redo_xlog.cpp index 429f64995261eb6c9a734e3ca5adefcaf61182b9..9cd70ca0abbc5d9424f72a6e71c0a901a1920323 100644 --- a/src/gausskernel/storage/access/redo/redo_xlog.cpp +++ b/src/gausskernel/storage/access/redo/redo_xlog.cpp @@ -51,6 +51,7 @@ typedef enum { XLogRecParseState *xlog_fpi_parse_to_block(XLogReaderState *record, uint32 *blocknum) { XLogRecParseState *recordstatehead = NULL; + XLogRecParseState *recordblockstat = NULL; (*blocknum)++; XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); @@ -59,6 +60,19 @@ XLogRecParseState *xlog_fpi_parse_to_block(XLogReaderState *record, uint32 *bloc } XLogRecSetBlockDataState(record, XLOG_FULL_PAGE_ORIG_BLOCK_NUM, recordstatehead); + + if ((XLogRecGetInfo(record) & XLR_INFO_MASK) == XLOG_MERGE_RECORD) { + for (int blockid = 1; blockid <= record->max_block_id; blockid++) { + (*blocknum)++; + XLogParseBufferAllocListFunc(record, &recordblockstat, recordstatehead); + if (recordblockstat == NULL) { + return NULL; + } + + XLogRecSetBlockDataState(record, blockid, recordblockstat); + } + } + return recordstatehead; } diff --git a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp index 3ec52d1c532d340a10769967a719f222cfcf6e91..8b45e23a0e46eba49f8c5ce940a24289df404a7a 100644 --- a/src/gausskernel/storage/access/redo/redo_xlogutils.cpp +++ b/src/gausskernel/storage/access/redo/redo_xlogutils.cpp @@ -30,6 +30,7 @@ #include "access/xlog_internal.h" #include "access/transam.h" #include "access/xlogproc.h" +#include "access/generic_xlog.h" #include "catalog/storage_xlog.h" #include "access/visibilitymap.h" #include "access/multi_redo_api.h" @@ -342,8 +343,18 @@ void DoRecordCheck(XLogRecParseState *recordstate, XLogRecPtr pageLsn, bool repl RedoParseManager *manager = recordstate->manager; if (manager->refOperate != NULL) { - manager->refOperate->checkFunc(recordstate->refrecord, pageLsn, - recordstate->blockparse.extra_rec.blockdatarec.blockhead.cur_block_id, replayed); + XLogReaderState *record = (XLogReaderState *)recordstate->refrecord; + + if ((XLogRecGetRmid(record) == RM_XLOG_ID) && ((XLogRecGetInfo(record) & XLR_INFO_MASK) == XLOG_MERGE_RECORD)) { + for (int blockid = 0; blockid <= record->max_block_id; blockid++) { + manager->refOperate->checkFunc(recordstate->refrecord, pageLsn, + blockid, replayed); + } + } else { + manager->refOperate->checkFunc(recordstate->refrecord, pageLsn, + recordstate->blockparse.extra_rec.blockdatarec.blockhead.cur_block_id, + replayed); + } } } #endif @@ -1321,6 +1332,9 @@ void XLogBlockDataCommonRedo(XLogBlockHead *blockhead, void *blockrecbody, RedoB case RM_SEGPAGE_ID: SegPageRedoDataBlock(blockhead, blockdatarec, bufferinfo); break; + case RM_GENERIC_ID: + GenericRedoDataBlock(blockhead, blockdatarec, bufferinfo); + break; default: ereport(PANIC, (errmsg("XLogBlockDataCommonRedo: unknown rmid %u", rmid))); } @@ -1947,6 +1961,9 @@ static const XLogParseBlock g_xlogParseBlockTable[RM_MAX_ID + 1] = { { UBTree2RedoParseToBlock, RM_UBTREE2_ID }, { segpage_redo_parse_to_block, RM_SEGPAGE_ID }, { NULL, RM_REPLORIGIN_ID }, + { NULL, RM_COMPRESSION_REL_ID }, + { NULL, RM_LOGICALDDLMSG_ID }, + { GenericRedoParseToBlock, RM_GENERIC_ID }, }; inline XLogRecParseState *XLogParseToBlockCommonFunc(XLogReaderState *record, uint32 *blocknum) { diff --git a/src/gausskernel/storage/access/table/tableam.cpp b/src/gausskernel/storage/access/table/tableam.cpp index 25b54e8c4f2e8684722d16dda62b04194f60e9ef..1af1bb152556da10e6ae662894742cac5370a8c3 100644 --- a/src/gausskernel/storage/access/table/tableam.cpp +++ b/src/gausskernel/storage/access/table/tableam.cpp @@ -378,9 +378,11 @@ void HeapamScanInitParallelSeqscan(TableScanDesc sscan, int32 dop, ScanDirection } double HeapamIndexBuildScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, - IndexBuildCallback callback, void *callback_state, TableScanDesc scan) + IndexBuildCallback callback, void *callback_state, TableScanDesc scan, + BlockNumber startBlkno, BlockNumber numblocks) { - return IndexBuildHeapScan(heapRelation, indexRelation, indexInfo, allow_sync, callback, callback_state, scan); + return IndexBuildHeapScan(heapRelation, indexRelation, indexInfo, allow_sync, callback, + callback_state, scan, startBlkno, numblocks); } void HeapamIndexValidateScan (Relation heapRelation, Relation indexRelation, @@ -972,7 +974,8 @@ void UHeapamTslotStoreUHeapTuple(Tuple tuple, TupleTableSlot *slot, Buffer buffe */ double UHeapamIndexBuildScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allowSync, - IndexBuildCallback callback, void *callback_state, TableScanDesc scan) + IndexBuildCallback callback, void *callback_state, TableScanDesc scan, + BlockNumber startBlkno, BlockNumber numblocks) { return IndexBuildUHeapScan(heapRelation, indexRelation, indexInfo, allowSync, callback, callback_state, scan); } diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp index b10e83536285a20abb339f017afc4d2822b38f68..1d481563b80f965acc9a6241071b8867785053ae 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp @@ -41,6 +41,7 @@ #include "access/gin_private.h" #include "access/xlogutils.h" #include "access/gin.h" +#include "access/generic_xlog.h" #include "catalog/storage_xlog.h" #include "storage/buf/buf_internals.h" @@ -148,6 +149,7 @@ static bool DispatchRepSlotRecord(XLogReaderState *record, List *expectedTLIs, T static bool DispatchHeap3Record(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchDefaultRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchBarrierRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); #ifdef ENABLE_MOT static bool DispatchMotRecord(XLogReaderState* record, List* expectedTLIs, TimestampTz recordXTime); #endif @@ -225,6 +227,7 @@ static const RmgrDispatchData g_dispatchTable[RM_MAX_ID + 1] = { XLOG_CFS_SHRINK_OPERATION }, { DispatchLogicalDDLMsgRecord, RmgrRecordInfoValid, RM_LOGICALDDLMSG_ID, XLOG_LOGICAL_DDL_MESSAGE, XLOG_LOGICAL_DDL_MESSAGE }, + { DispatchGenericRecord, RmgrRecordInfoValid, RM_GENERIC_ID, XLOG_GENERIC_LOG, XLOG_GENERIC_LOG }, }; const int REDO_WAIT_SLEEP_TIME = 5000; /* 5ms */ @@ -1435,6 +1438,12 @@ static bool DispatchSpgistRecord(XLogReaderState *record, List *expectedTLIs, Ti return false; } +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) +{ + DispatchRecordWithPages(record, expectedTLIs); + return false; +} + /** * dispatch record to a specified thread */ diff --git a/src/gausskernel/storage/access/transam/generic_xlog.cpp b/src/gausskernel/storage/access/transam/generic_xlog.cpp index e07a1b722b197febc42c7108976e89a3f8495d36..4a17e112c3ed18c71ad5097523e96589f6c90caf 100644 --- a/src/gausskernel/storage/access/transam/generic_xlog.cpp +++ b/src/gausskernel/storage/access/transam/generic_xlog.cpp @@ -428,3 +428,39 @@ generic_redo(XLogReaderState *record) UnlockReleaseBuffer(buffers[block_id].buf); } } + +void +GenericRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatarec, RedoBufferInfo *bufferinfo) +{ + XLogRedoAction action = XLogCheckBlockDataRedoAction(blockdatarec, bufferinfo); + XLogRecPtr lsn = bufferinfo->lsn; + + if (action == BLK_NEEDS_REDO) { + Pointer blockData; + Page page; + + Size blkdatalen = 0; + page = bufferinfo->pageinfo.page; + blockData = XLogBlockDataGetBlockData(blockdatarec, &blkdatalen); + + applyPageRedo(page, blockData, blkdatalen); + PageSetLSN(page, lsn); + MarkBufferDirty(bufferinfo->buf); + } +} + +XLogRecParseState * +GenericRedoParseToBlock(XLogReaderState *record, uint32 *blocknum) +{ + XLogRecParseState *recordstatehead = NULL; + + (*blocknum)++; + XLogParseBufferAllocListFunc(record, &recordstatehead, NULL); + + if (recordstatehead == NULL) { + return NULL; + } + + XLogRecSetBlockDataState(record, 0, recordstatehead); + return recordstatehead; +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp index 6f4a6da3d7830a2b1d0ffaf0c5f6998b1ad26f73..ee0709b268c83522479185e4791665db57eb18a2 100644 --- a/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/ondemand_extreme_rto/dispatcher.cpp @@ -41,6 +41,7 @@ #include "access/gin_private.h" #include "access/xlogutils.h" #include "access/gin.h" +#include "access/generic_xlog.h" #include "catalog/storage_xlog.h" #include "storage/buf/buf_internals.h" @@ -150,6 +151,7 @@ static bool DispatchRepSlotRecord(XLogReaderState *record, List *expectedTLIs, T static bool DispatchHeap3Record(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchDefaultRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchBarrierRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); #ifdef ENABLE_MOT static bool DispatchMotRecord(XLogReaderState* record, List* expectedTLIs, TimestampTz recordXTime); #endif @@ -224,6 +226,7 @@ static const RmgrDispatchData g_dispatchTable[RM_MAX_ID + 1] = { XLOG_CFS_SHRINK_OPERATION }, { DispatchLogicalDDLMsgRecord, RmgrRecordInfoValid, RM_LOGICALDDLMSG_ID, XLOG_LOGICAL_DDL_MESSAGE, XLOG_LOGICAL_DDL_MESSAGE }, + { DispatchGenericRecord, RmgrRecordInfoValid, RM_GENERIC_ID, XLOG_GENERIC_LOG, XLOG_GENERIC_LOG }, }; const int REDO_WAIT_SLEEP_TIME = 5000; /* 5ms */ @@ -1385,6 +1388,12 @@ static bool DispatchSpgistRecord(XLogReaderState *record, List *expectedTLIs, Ti return false; } +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) +{ + DispatchRecordWithPages(record, expectedTLIs); + return false; +} + /** * dispatch record to a specified thread */ diff --git a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp index 78d04bef55ab2edbf89e10564843eddccc62517f..18a24185b73e6730012942d9b0e2a4d510457879 100755 --- a/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/parallel_recovery/dispatcher.cpp @@ -42,6 +42,7 @@ #include "access/xlogutils.h" #include "access/gin.h" #include "access/ustore/knl_uredo.h" +#include "access/generic_xlog.h" #include "catalog/storage_xlog.h" #include "storage/buf/buf_internals.h" @@ -160,6 +161,7 @@ static bool DispatchRepSlotRecord(XLogReaderState *record, List *expectedTLIs, T static bool DispatchHeap3Record(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchDefaultRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchBarrierRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); static bool DispatchBtreeRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime); @@ -232,6 +234,7 @@ static const RmgrDispatchData g_dispatchTable[RM_MAX_ID + 1] = { XLOG_CFS_SHRINK_OPERATION }, { DispatchLogicalDDLMsgRecord, RmgrRecordInfoValid, RM_LOGICALDDLMSG_ID, XLOG_LOGICAL_DDL_MESSAGE, XLOG_LOGICAL_DDL_MESSAGE }, + { DispatchGenericRecord, RmgrRecordInfoValid, RM_GENERIC_ID, XLOG_GENERIC_LOG, XLOG_GENERIC_LOG }, }; /* Run from the dispatcher and txn worker thread. */ @@ -1436,6 +1439,12 @@ static bool DispatchUBTree2Record(XLogReaderState *record, List *expectedTLIs, T return false; } +static bool DispatchGenericRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) +{ + DispatchRecordWithPages(record, expectedTLIs, true); + return false; +} + /* Run from the dispatcher thread. */ static bool DispatchGinRecord(XLogReaderState *record, List *expectedTLIs, TimestampTz recordXTime) { diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index ac0f438ab7d7b306fd2e37f156d786acc4de74d7..8f9a4f684c188afa103c7e7ae31880b4d86049b2 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -14385,11 +14385,23 @@ void xlog_redo(XLogReaderState *record) * XLOG_FPI and XLOG_FPI_FOR_HINT records, they use a different info * code just to distinguish them for statistics purposes. */ - if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED) { - ereport(ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), - errmsg("unexpected XLogReadBufferForRedo result when restoring backup block"))); + if ((XLogRecGetInfo(record) & XLR_INFO_MASK) == XLOG_MERGE_RECORD){ + for (uint8 block_id = 0; block_id <= record->max_block_id; block_id++) { + if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED) { + ereport(ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), + errmsg("unexpected XLogReadBufferForRedo result when restoring backup block, block id: %d", block_id))); + } + + UnlockReleaseBuffer(buffer.buf); + } + } else { + if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED) { + ereport(ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), + errmsg("unexpected XLogReadBufferForRedo result when restoring backup block"))); + } + + UnlockReleaseBuffer(buffer.buf); } - UnlockReleaseBuffer(buffer.buf); } else if (info == XLOG_BACKUP_END) { XLogRecPtr startpoint; errno_t rc = EOK; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 8a19a6818f6fba3974defbbd63ec795577a2f557..45490b90aad6dd28a2ae296a066ee84b36c22ce3 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -29,6 +29,7 @@ #define PG_AM_VACUUMCLEANUP_ARGS_NUM 2 #define PG_AM_COSTESTIMATE_ARGS_NUM 7 #define PG_AM_OPTIONS_ARGS_NUM 2 +#define PG_AM_DELETE_ARGS_NUM 5 #define PG_AM_FUNC_MAX_ARGS_NUM PG_AM_COSTESTIMATE_ARGS_NUM struct IndexInfo; @@ -173,6 +174,7 @@ typedef struct IndexAmRoutine char ammarkposfuncname[NAMEDATALEN]; char amrestrposfuncname[NAMEDATALEN]; char ammergefuncname[NAMEDATALEN]; + char amdeletefuncname[NAMEDATALEN]; } IndexAmRoutine; typedef IndexAmRoutine *AmRoutine; diff --git a/src/include/access/datavec/bitvec.h b/src/include/access/datavec/bitvec.h new file mode 100644 index 0000000000000000000000000000000000000000..ad607849c56057c16d7bba17e35c5175e50a2a0a --- /dev/null +++ b/src/include/access/datavec/bitvec.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * bitvec.h + * + * IDENTIFICATION + * src/include/access/datavec/bitvec.h + * + * ------------------------------------------------------------------------- + */ +#ifndef BITVEC_H +#define BITVEC_H + +#include "postgres.h" +#include "utils/varbit.h" + +extern uint64 (*BitHammingDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance); +extern double (*BitJaccardDistance)(uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 ab, uint64 aa, + uint64 bb); + +void BitvecInit(void); + +VarBit *InitBitVector(int dim); + +Datum hamming_distance(PG_FUNCTION_ARGS); +Datum jaccard_distance(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/access/datavec/halfutils.h b/src/include/access/datavec/halfutils.h new file mode 100644 index 0000000000000000000000000000000000000000..b6676db3ff4e10cd6ca0b744b1744ed01ea14b61 --- /dev/null +++ b/src/include/access/datavec/halfutils.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * halfutils.h + * + * IDENTIFICATION + * src/include/access/datavec/halfutils.h + * + * ------------------------------------------------------------------------- + */ +#ifndef HALFUTILS_H +#define HALFUTILS_H + +#include + +#include "access/datavec/halfvec.h" +#include "access/datavec/shortest_dec.h" + +#ifdef F16C_SUPPORT +#include +#endif + +extern float (*HalfvecL2SquaredDistance)(int dim, half *ax, half *bx); +extern float (*HalfvecInnerProduct)(int dim, half *ax, half *bx); +extern double (*HalfvecCosineSimilarity)(int dim, half *ax, half *bx); +extern float (*HalfvecL1Distance)(int dim, half *ax, half *bx); + +void HalfvecInit(void); + +/* + * Check if half is NaN + */ +static inline bool HalfIsNan(half num) +{ +#ifdef FLT16_SUPPORT + return isnan(num); +#else + return (num & 0x7C00) == 0x7C00 && (num & 0x7FFF) != 0x7C00; +#endif +} + +/* + * Check if half is infinite + */ +static inline bool HalfIsInf(half num) +{ +#ifdef FLT16_SUPPORT + return isinf(num); +#else + return (num & 0x7FFF) == 0x7C00; +#endif +} + +/* + * Check if half is zero + */ +static inline bool HalfIsZero(half num) +{ +#ifdef FLT16_SUPPORT + return num == 0; +#else + return (num & 0x7FFF) == 0x0000; +#endif +} + +/* + * Convert a half to a float4 + */ +static inline float HalfToFloat4(half num) +{ +#if defined(F16C_SUPPORT) + return _cvtsh_ss(num); +#elif defined(FLT16_SUPPORT) + return (float)num; +#else + union { + float f; + uint32 i; + } swapfloat; + + union { + half h; + uint16 i; + } swaphalf; + + uint16 bin; + uint32 exponent; + uint32 mantissa; + uint32 result; + + swaphalf.h = num; + bin = swaphalf.i; + exponent = (bin & 0x7C00) >> 10; + mantissa = bin & 0x03FF; + + /* Sign */ + result = (bin & 0x8000) << 16; + + if (unlikely(exponent == 31)) { + if (mantissa == 0) { + /* Infinite */ + result |= 0x7F800000; + } else { + /* NaN */ + result |= 0x7FC00000; + } + } else if (unlikely(exponent == 0)) { + /* Subnormal */ + if (mantissa != 0) { + exponent = -14; + + for (int i = 0; i < 10; i++) { + mantissa <<= 1; + exponent -= 1; + + if ((mantissa >> 10) % 2 == 1) { + mantissa &= 0x03ff; + break; + } + } + + result |= (exponent + 127) << 23; + } + } else { + /* Normal */ + result |= (exponent - 15 + 127) << 23; + } + + result |= mantissa << 13; + + swapfloat.i = result; + return swapfloat.f; +#endif +} + +/* + * Convert a float4 to a half + */ +static inline half Float4ToHalfUnchecked(float num) +{ +#if defined(F16C_SUPPORT) + return _cvtss_sh(num, 0); +#elif defined(FLT16_SUPPORT) + return num; +#else + union { + float f; + uint32 i; + } swapfloat; + + union { + half h; + uint16 i; + } swaphalf; + + uint32 bin; + int exponent; + int mantissa; + uint16 result; + + swapfloat.f = num; + bin = swapfloat.i; + exponent = (bin & 0x7F800000) >> 23; + mantissa = bin & 0x007FFFFF; + + /* Sign */ + result = (bin & 0x80000000) >> 16; + + if (isinf(num)) { + /* Infinite */ + result |= 0x7C00; + } else if (isnan(num)) { + /* NaN */ + result |= 0x7E00; + result |= mantissa >> 13; + } else if (exponent > 98) { + int m; + int gr; + int s; + + exponent -= 127; + s = mantissa & 0x00000FFF; + + /* Subnormal */ + if (exponent < -14) { + int diff = -exponent - 14; + + mantissa >>= diff; + mantissa += 1 << (23 - diff); + s |= mantissa & 0x00000FFF; + } + + m = mantissa >> 13; + + /* Round */ + gr = (mantissa >> 12) % 4; + if (gr == 3 || (gr == 1 && s != 0)) + m += 1; + + if (m == 1024) { + m = 0; + exponent += 1; + } + + if (exponent > 15) { + /* Infinite */ + result |= 0x7C00; + } else { + if (exponent >= -14) + result |= (exponent + 15) << 10; + + result |= m; + } + } + + swaphalf.i = result; + return swaphalf.h; +#endif +} + +/* + * Convert a float4 to a half + */ +static inline half Float4ToHalf(float num) +{ + half result = Float4ToHalfUnchecked(num); + if (unlikely(HalfIsInf(result)) && !isinf(num)) { + char *buf = (char *)palloc(FLOAT_SHORTEST_DECIMAL_LEN); + + FloatToShortestDecimalBuf(num, buf); + + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("\"%s\" is out of range for type halfvec", buf))); + } + + return result; +} + +#endif diff --git a/src/include/access/datavec/halfvec.h b/src/include/access/datavec/halfvec.h new file mode 100644 index 0000000000000000000000000000000000000000..2fe7a0f0edc080eb38b9550bb12285cdd6c5f31b --- /dev/null +++ b/src/include/access/datavec/halfvec.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * halfvec.h + * + * IDENTIFICATION + * src/include/access/datavec/halfvec.h + * + * ------------------------------------------------------------------------- + */ +#ifndef HALFVEC_H +#define HALFVEC_H + +#define __STDC_WANT_IEC_60559_TYPES_EXT__ + +#include +#include "fmgr.h" + +/* We use two types of dispatching: intrinsics and target_clones */ +/* TODO Move to better place */ +#ifndef DISABLE_DISPATCH +/* Only enable for more recent compilers to keep build process simple */ +#if defined(__x86_64__) && defined(__GNUC__) && __GNUC__ >= 11 +#define USE_DISPATCH +#elif defined(__x86_64__) && defined(__clang_major__) && __clang_major__ >= 7 +#define USE_DISPATCH +#elif defined(_M_AMD64) && defined(_MSC_VER) && _MSC_VER >= 1920 +#define USE_DISPATCH +#endif +#endif + +/* target_clones requires glibc */ +#if defined(USE_DISPATCH) && defined(__gnu_linux__) && defined(__has_attribute) +/* Use separate line for portability */ +#if __has_attribute(target_clones) +#define USE_TARGET_CLONES +#endif +#endif + +/* Apple clang check needed for universal binaries on Mac */ +#if defined(USE_DISPATCH) && (defined(HAVE__GET_CPUID) || defined(__apple_build_version__)) +#define USE__GET_CPUID +#endif + +#if defined(USE_DISPATCH) +#define HALFVEC_DISPATCH +#endif + +/* F16C has better performance than _Float16 (on x86-64) */ +#if defined(__F16C__) +#define F16C_SUPPORT +#elif defined(__FLT16_MAX__) && !defined(HALFVEC_DISPATCH) +#define FLT16_SUPPORT +#endif + +// TODO support _Float16 +#ifdef FLT16_SUPPORT +#define half float +#define HALF_MAX FLT16_MAX +#else +#define half uint16 +#define HALF_MAX 65504 +#endif + +#define HALFVEC_MAX_DIM 16000 + +#define HALFVEC_SIZE(_dim) (offsetof(HalfVector, x) + sizeof(half) * (_dim)) +#define DatumGetHalfVector(x) ((HalfVector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_HALFVEC_P(x) DatumGetHalfVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_HALFVEC_P(x) PG_RETURN_POINTER(x) + +typedef struct HalfVector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int16 dim; /* number of dimensions */ + int16 unused; /* reserved for future use, always zero */ + half x[FLEXIBLE_ARRAY_MEMBER]; +} HalfVector; + +HalfVector *InitHalfVector(int dim); + +Datum halfvec_in(PG_FUNCTION_ARGS); +Datum halfvec_out(PG_FUNCTION_ARGS); +Datum halfvec_typmod_in(PG_FUNCTION_ARGS); +Datum halfvec_recv(PG_FUNCTION_ARGS); +Datum halfvec_send(PG_FUNCTION_ARGS); +Datum halfvec_l2_distance(PG_FUNCTION_ARGS); +Datum halfvec_inner_product(PG_FUNCTION_ARGS); +Datum halfvec_cosine_distance(PG_FUNCTION_ARGS); +Datum halfvec_l1_distance(PG_FUNCTION_ARGS); +Datum halfvec_vector_dims(PG_FUNCTION_ARGS); +Datum halfvec_l2_norm(PG_FUNCTION_ARGS); +Datum halfvec_l2_normalize(PG_FUNCTION_ARGS); +Datum halfvec_binary_quantize(PG_FUNCTION_ARGS); +Datum halfvec_subvector(PG_FUNCTION_ARGS); +Datum halfvec_add(PG_FUNCTION_ARGS); +Datum halfvec_sub(PG_FUNCTION_ARGS); +Datum halfvec_mul(PG_FUNCTION_ARGS); +Datum halfvec_concat(PG_FUNCTION_ARGS); +Datum halfvec_lt(PG_FUNCTION_ARGS); +Datum halfvec_le(PG_FUNCTION_ARGS); +Datum halfvec_eq(PG_FUNCTION_ARGS); +Datum halfvec_ne(PG_FUNCTION_ARGS); +Datum halfvec_ge(PG_FUNCTION_ARGS); +Datum halfvec_gt(PG_FUNCTION_ARGS); +Datum halfvec_cmp(PG_FUNCTION_ARGS); +Datum halfvec_l2_squared_distance(PG_FUNCTION_ARGS); +Datum halfvec_negative_inner_product(PG_FUNCTION_ARGS); +Datum halfvec_spherical_distance(PG_FUNCTION_ARGS); +Datum halfvec_accum(PG_FUNCTION_ARGS); +Datum halfvec_avg(PG_FUNCTION_ARGS); +Datum halfvec_combine(PG_FUNCTION_ARGS); +Datum halfvec(PG_FUNCTION_ARGS); +Datum halfvec_to_vector(PG_FUNCTION_ARGS); +Datum vector_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum array_to_halfvec(PG_FUNCTION_ARGS); +Datum halfvec_to_float4(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/access/datavec/hnsw.h b/src/include/access/datavec/hnsw.h new file mode 100644 index 0000000000000000000000000000000000000000..81b4c5b74a1e2af831131f6b37fccfcbc698b0e8 --- /dev/null +++ b/src/include/access/datavec/hnsw.h @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * hnsw.h + * + * IDENTIFICATION + * src/include/access/datavec/hnsw.h + * + * ------------------------------------------------------------------------- + */ +#ifndef HNSW_H +#define HNSW_H + +#include "postgres.h" + +#include "access/genam.h" +#include "lib/pairingheap.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "access/datavec/vector.h" +#include "access/datavec/vecindex.h" +#include "access/datavec/utils.h" + +#define HNSW_MAX_DIM 2000 +#define HNSW_MAX_NNZ 1000 + +/* Support functions */ +#define HNSW_DISTANCE_PROC 1 +#define HNSW_NORM_PROC 2 +#define HNSW_TYPE_INFO_PROC 3 +#define HNSW_KMEANS_NORMAL_PROC 4 + +#define HNSW_VERSION 1 +#define HNSW_MAGIC_NUMBER 0xA953A953 +#define HNSW_PAGE_ID 0xFF90 + +/* Preserved page numbers */ +#define HNSW_METAPAGE_BLKNO 0 +#define HNSW_HEAD_BLKNO 1 /* first element page */ +#define HNSW_PQTABLE_START_BLKNO 1 /* pqtable start page */ +#define HNSW_PQTABLE_STORAGE_SIZE (uint16)(6 * 1024) /* pqtable storage size in each page */ + +/* Append page slot info */ +#define HNSW_DEFAULT_NPAGES_PER_SLOT 50 +#define HNSW_BUFFER_THRESHOLD 4 + +/* Must correspond to page numbers since page lock is used */ +#define HNSW_UPDATE_LOCK 0 +#define HNSW_SCAN_LOCK 1 + +/* HNSW parameters */ +#define HNSW_FUNC_NUM 4 +#define HNSW_DEFAULT_M 16 +#define HNSW_MIN_M 2 +#define HNSW_MAX_M 100 +#define HNSW_DEFAULT_EF_CONSTRUCTION 64 +#define HNSW_MIN_EF_CONSTRUCTION 4 +#define HNSW_MAX_EF_CONSTRUCTION 1000 +#define HNSW_DEFAULT_EF_SEARCH 40 +#define HNSW_MIN_EF_SEARCH 1 +#define HNSW_DEFAULT_THRESHOLD INT32_MAX +#define HNSW_MIN_THRESHOLD 160 +#define HNSW_MAX_THRESHOLD INT32_MAX +#define HNSW_MAX_EF_SEARCH 1000000 + +#define HNSW_PQMODE_ADC 1 +#define HNSW_PQMODE_SDC 2 +#define HNSW_PQMODE_DEFAULT HNSW_PQMODE_ADC +#define HNSW_PQ_DIS_L2 1 +#define HNSW_PQ_DIS_IP 2 +#define HNSW_PQ_DIS_COSINE 3 + +/* Tuple types */ +#define HNSW_ELEMENT_TUPLE_TYPE 1 +#define HNSW_NEIGHBOR_TUPLE_TYPE 2 + +/* page types */ +#define HNSW_DEFAULT_PAGE_TYPE 0 +#define HNSW_ELEMENT_PAGE_TYPE 1 +#define HNSW_NEIGHBOR_PAGE_TYPE 2 +#define HNSW_USTORE_PAGE_TYPE 3 + +/* Make graph robust against non-HOT updates */ +#define HNSW_HEAPTIDS 10 + +#define HNSW_UPDATE_ENTRY_GREATER 1 +#define HNSW_UPDATE_ENTRY_ALWAYS 2 + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_HNSW_PHASE_LOAD 2 + +#define PQ_SUCCESS 0 +#define PQ_ERROR (-1) + +#define HNSWPQ_MAX_PATH_LEN 4096 +#ifndef MAX_PATH_LEN +#define MAX_PATH_LEN UWAL_MAX_PATH_LEN +#endif + +#define HNSWPQ_DEFAULT_TARGET_ROWS 300 + +#define PQ_ENV_PATH "DATAVEC_PQ_LIB_PATH" +#define PQ_SO_NAME "libkvecturbo.so" + +#define HNSW_MAX_SIZE \ + (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - sizeof(ItemIdData)) +#define HNSW_TUPLE_ALLOC_SIZE BLCKSZ + +#define HNSW_ELEMENT_TUPLE_SIZE(size) MAXALIGN(offsetof(HnswElementTupleData, data) + (size)) +#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) \ + MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData)) + +#define HNSW_NEIGHBOR_ARRAY_SIZE(lm) (offsetof(HnswNeighborArray, items) + sizeof(HnswCandidate) * (lm)) + +#define HnswPageGetOpaque(page) ((HnswPageOpaque)PageGetSpecialPointer(page)) +#define HnswPageGetMeta(page) ((HnswMetaPageData *)PageGetContents(page)) +#define HnswPageGetAppendMeta(page) ((HnswAppendMetaPageData *)PageGetContents(page)) + +#define HnswDefaultMaxItemSize \ + MAXALIGN_DOWN((BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData) + sizeof(ItemPointerData)) - \ + MAXALIGN(sizeof(HnswPageOpaqueData)))) + +#define RandomDouble() (((double)random()) / MAX_RANDOM_VALUE) +#define SeedRandom(seed) srandom(seed) + +#define list_delete_last(list) list_truncate(list, list_length(list) - 1) +#define list_sort(list, cmp) \ + do { \ + ListCell *cell; \ + int i; \ + int len = list_length(list); \ + ListCell **list_arr; \ + List *new_list; \ + \ + if (len == 0) { \ + list = NIL; \ + return list; \ + } \ + i = 0; \ + list_arr = (ListCell **)palloc(sizeof(ListCell *) * len); \ + foreach (cell, list) \ + list_arr[i++] = cell; \ + \ + qsort(list_arr, len, sizeof(ListCell *), cmp); \ + \ + new_list = (List *)palloc(sizeof(List)); \ + new_list->type = (list->type); \ + new_list->length = len; \ + new_list->head = list_arr[len - 1]; \ + new_list->tail = list_arr[0]; \ + \ + for (i = len - 1; i > 0; i--) \ + list_arr[i]->next = list_arr[i - 1]; \ + \ + list_arr[0]->next = NULL; \ + pfree(list_arr); \ + list = new_list; \ + } while (0) + +#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE) +#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE) + +/* 2 * M connections for ground layer */ +#define HnswGetLayerM(m, layer) ((layer == 0) ? (m) * 2 : (m)) + +/* Optimal ML from paper */ +#define HnswGetMl(m) (1 / log(m)) + +/* Ensure fits on page and in uint8 */ +#define HnswGetMaxLevel(m) \ + Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - \ + offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / \ + (sizeof(ItemPointerData)) / (m)) - \ + 2, \ + 255) + +#define HnswGetValue(base, element) PointerGetDatum(HnswPtrAccess(base, (element)->value)) + +#define relptr_offset(rp) ((rp).relptr_off - 1) + +/* Pointer macros */ +#define HnswPtrAccess(base, hp) ((base) == NULL ? (hp).ptr : relptr_access(base, (hp).relptr)) +#define HnswPtrStore(base, hp, value) \ + ((base) == NULL ? (void)((hp).ptr = (value)) : (void)relptr_store(base, (hp).relptr, value)) +#define HnswPtrIsNull(base, hp) ((base) == NULL ? (hp).ptr == NULL : relptr_is_null((hp).relptr)) +#define HnswPtrEqual(base, hp1, hp2) \ + ((base) == NULL ? (hp1).ptr == (hp2).ptr : relptr_offset((hp1).relptr) == relptr_offset((hp2).relptr)) + +/* For code paths dedicated to each type */ +#define HnswPtrPointer(hp) (hp).ptr +#define HnswPtrOffset(hp) relptr_offset((hp).relptr) + +/* Variables */ +extern int hnsw_lock_tranche_id; + +typedef struct HnswElementData HnswElementData; +typedef struct HnswNeighborArray HnswNeighborArray; + +#define relptr(type) \ + union { \ + type *relptr_type; \ + Size relptr_off; \ + } + +#define relptr_declare(type, relptrtype) typedef relptr(type) (relptrtype) + +#ifdef HAVE__BUILTIN_TYPES_COMPATIBLE_P +#define relptr_access(base, rp) \ + (AssertVariableIsOfTypeMacro(base, char *), \ + (__typeof__((rp).relptr_type))((rp).relptr_off == 0 ? NULL : (base) + (rp).relptr_off - 1)) +#else +/* + * If we don't have __builtin_types_compatible_p, assume we might not have + * __typeof__ either. + */ +#define relptr_access(base, rp) \ + (AssertVariableIsOfTypeMacro(base, char *), (void *)((rp).relptr_off == 0 ? NULL : (base) + (rp).relptr_off - 1)) +#endif + +#define relptr_is_null(rp) ((rp).relptr_off == 0) + +#define relptr_offset(rp) ((rp).relptr_off - 1) + +/* We use this inline to avoid double eval of "val" in relptr_store */ +static inline Size relptr_store_eval(char *base, char *val) +{ + if (val == NULL) { + return 0; + } else { + Assert(val >= base); + return val - base + 1; + } +} + +#ifdef HAVE__BUILTIN_TYPES_COMPATIBLE_P +#define relptr_store(base, rp, val) \ + (AssertVariableIsOfTypeMacro(base, char *), AssertVariableIsOfTypeMacro(val, __typeof__((rp).relptr_type)), \ + (rp).relptr_off = relptr_store_eval((base), (char *)(val))) +#else +/* + * If we don't have __builtin_types_compatible_p, assume we might not have + * __typeof__ either. + */ +#define relptr_store(base, rp, val) \ + (AssertVariableIsOfTypeMacro(base, char *), (rp).relptr_off = relptr_store_eval((base), (char *)(val))) +#endif + +#define HnswPtrDeclare(type, relptrtype, ptrtype) \ + relptr_declare(type, relptrtype); \ + typedef union { \ + type *ptr; \ + relptrtype relptr; \ + } (ptrtype); + +/* Pointers that can be absolute or relative */ +/* Use char for HnswDatumPtr so works with Pointer */ +HnswPtrDeclare(HnswElementData, HnswElementRelptr, HnswElementPtr); +HnswPtrDeclare(HnswNeighborArray, HnswNeighborArrayRelptr, HnswNeighborArrayPtr); +HnswPtrDeclare(HnswNeighborArrayPtr, HnswNeighborsRelptr, HnswNeighborsPtr); +HnswPtrDeclare(char, DatumRelptr, HnswDatumPtr); + +struct HnswElementData { + HnswElementPtr next; + ItemPointerData heaptids[HNSW_HEAPTIDS]; + uint8 heaptidsLength; + uint8 level; + uint8 deleted; + uint32 hash; + HnswNeighborsPtr neighbors; + BlockNumber blkno; + OffsetNumber offno; + OffsetNumber neighborOffno; + BlockNumber neighborPage; + HnswDatumPtr value; + HnswDatumPtr pqcodes; + LWLock lock; +}; + +typedef HnswElementData *HnswElement; + +typedef struct HnswCandidate { + HnswElementPtr element; + float distance; + bool closer; +} HnswCandidate; + +struct HnswNeighborArray { + int length; + bool closerSet; + HnswCandidate items[FLEXIBLE_ARRAY_MEMBER]; +}; + +typedef struct HnswPairingHeapNode { + HnswCandidate *inner; + pairingheap_node c_node; + pairingheap_node w_node; +} HnswPairingHeapNode; + +/* HNSW index options */ +typedef struct HnswOptions { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int m; /* number of connections */ + int efConstruction; /* size of dynamic candidate list */ + bool enablePQ; + int pqM; /* number of subquantizer */ + int pqKsub; /* number of centroids for each subquantizer */ + char *storage_type; /* table access method kind */ +} HnswOptions; + +typedef struct HnswGraph { + /* Graph state */ + slock_t lock; + HnswElementPtr head; + double indtuples; + + /* Entry state */ + LWLock entryLock; + LWLock entryWaitLock; + HnswElementPtr entryPoint; + + /* Allocations state */ + LWLock allocatorLock; + long memoryUsed; + long memoryTotal; + + /* Flushed state */ + LWLock flushLock; + bool flushed; +} HnswGraph; + +typedef struct HnswShared { + /* Immutable state */ + Oid heaprelid; + Oid indexrelid; + char *pqTable; + float *pqDistanceTable; + + /* Mutex for mutable state */ + slock_t mutex; + + /* Mutable state */ + int nparticipantsdone; + double reltuples; + HnswGraph graphData; + + char *hnswarea; + ParallelHeapScanDescData heapdesc; +} HnswShared; + +typedef struct HnswLeader { + int nparticipanttuplesorts; + HnswShared *hnswshared; +} HnswLeader; + +typedef struct HnswAllocator { + void *(*alloc)(Size size, void *state); + void *state; +} HnswAllocator; + +typedef struct HnswTypeInfo { + int maxDimensions; + bool supportPQ; + Size (*itemSize) (int dimensions); + Datum (*normalize)(PG_FUNCTION_ARGS); + void (*checkValue)(Pointer v); +} HnswTypeInfo; + +typedef struct HnswBuildState { + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + ForkNumber forkNum; + const HnswTypeInfo *typeInfo; + + /* Settings */ + int dimensions; + int m; + int efConstruction; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + FmgrInfo *kmeansnormprocinfo; + Oid collation; + + /* Variables */ + HnswGraph graphData; + HnswGraph *graph; + double ml; + int maxLevel; + + /* Memory */ + MemoryContext graphCtx; + MemoryContext tmpCtx; + HnswAllocator allocator; + + /* Parallel builds */ + HnswLeader *hnswleader; + HnswShared *hnswshared; + char *hnswarea; + + /* PQ info */ + bool enablePQ; + int pqM; + int pqKsub; + char *pqTable; + Size pqTableSize; + float *pqDistanceTable; + uint16 pqcodeSize; + PQParams *params; + int pqMode; + + VectorArray samples; + BlockSamplerData bs; + double rstate; + int rowstoskip; + + /* storage page info */ + bool isUStore; /* false means astore */ +} HnswBuildState; + +typedef struct HnswMetaPageData { + uint32 magicNumber; + uint32 version; + uint32 dimensions; + uint16 m; + uint16 efConstruction; + BlockNumber entryBlkno; + OffsetNumber entryOffno; + int16 entryLevel; + BlockNumber insertPage; + + /* PQ info */ + bool enablePQ; + uint16 pqM; + uint16 pqKsub; + uint16 pqcodeSize; + uint32 pqTableSize; + uint16 pqTableNblk; + uint32 pqDisTableSize; /* SDC */ + uint16 pqDisTableNblk; +} HnswMetaPageData; + +typedef HnswMetaPageData *HnswMetaPage; + +typedef struct HnswAppendMetaPageData { + uint32 magicNumber; + uint32 version; + uint32 dimensions; + uint16 m; + uint16 efConstruction; + BlockNumber entryBlkno; + OffsetNumber entryOffno; + int16 entryLevel; + + /* PQ info */ + bool enablePQ; + uint16 pqM; /* number of subquantizer */ + uint16 pqKsub; /* number of centroids for each subquantizer */ + uint16 pqcodeSize; /* number of bits per quantization index */ + uint32 pqTableSize; /* dim * pqKsub * sizeof(float) */ + uint16 pqTableNblk; /* total number of blks pqtable */ + uint32 pqDisTableSize; /* SDC */ + uint16 pqDisTableNblk; + + /* slot info */ + int npages; /* number of pages per slot */ + BlockNumber slotStartBlkno; + BlockNumber elementInsertSlot; /* the first page of the element type to be inserted into the slot */ + BlockNumber neighborInsertSlot; /* the first page of the neighbor type to be inserted into the slot */ +} HnswAppendMetaPageData; + +typedef HnswAppendMetaPageData *HnswAppendMetaPage; + +typedef struct HnswPageOpaqueData { + BlockNumber nextblkno; + uint8 pageType; /* element or neighbor page */ + uint8 unused; + uint16 page_id; /* for identification of HNSW indexes */ +} HnswPageOpaqueData; + +typedef HnswPageOpaqueData *HnswPageOpaque; + +typedef struct HnswElementTupleData { + uint8 type; + uint8 level; + uint8 deleted; + uint8 unused; + ItemPointerData heaptids[HNSW_HEAPTIDS]; + ItemPointerData neighbortid; + uint16 unused2; + Vector data; +} HnswElementTupleData; + +typedef HnswElementTupleData *HnswElementTuple; + +typedef struct HnswNeighborTupleData { + uint8 type; + uint8 unused; + uint16 count; + ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER]; +} HnswNeighborTupleData; + +typedef HnswNeighborTupleData *HnswNeighborTuple; + +typedef struct HnswBuildParams { + /* build params */ + char *base; + FmgrInfo *procinfo; + Oid collation; + int m; + int ef; + bool existing; + + /* PQ params */ + bool enablePQ; + int pqM; + int pqKsub; + char *pqTable; + Size pqTableSize; + float *pqDistanceTable; + HnswAllocator *allocator; +} HnswBuildParams; + +typedef struct HnswScanOpaqueData { + const HnswTypeInfo *typeInfo; + bool first; + List *w; + MemoryContext tmpCtx; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + bool enablePQ; + PQParams params; + int pqMode; + + /* used in ustore only */ + VectorScanData vs; + int length; + int currentLoc; + Datum value; +} HnswScanOpaqueData; + +typedef HnswScanOpaqueData *HnswScanOpaque; + +typedef struct HnswVacuumState { + /* Info */ + Relation index; + IndexBulkDeleteResult *stats; + IndexBulkDeleteCallback callback; + void *callbackState; + BlockNumber hnswHeadBlkno; + + /* Settings */ + int m; + int efConstruction; + + /* Support functions */ + FmgrInfo *procinfo; + Oid collation; + + /* Variables */ + struct tidhash_hash *deleted; + BufferAccessStrategy bas; + HnswNeighborTuple ntup; + HnswElementData highestPoint; + + /* Memory */ + MemoryContext tmpCtx; +} HnswVacuumState; + +typedef struct PQSearchInfo { + PQParams params; + int lc; + int pqMode; + uint8 *qPQCode; + float *pqDistanceTable; +} PQSearchInfo; + +typedef struct Candidate { + float *vector; + float distance; + void *heaptids; + uint8 heaptidsLength; +} Candidate; + +/* Methods */ +int HnswGetM(Relation index); +int HnswGetEfConstruction(Relation index); +bool HnswGetEnablePQ(Relation index); +int HnswGetPqM(Relation index); +int HnswGetPqKsub(Relation index); +FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); +Datum HnswNormValue(const HnswTypeInfo *typeInfo, Oid collation, Datum value); +bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); +Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); +void HnswInitPage(Buffer buf, Page page); +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, + int m, bool inserting, HnswElement skipElement, IndexScanDesc scan = NULL, bool enablePQ = false, + PQSearchInfo *pqinfo = NULL); +HnswElement HnswGetEntryPoint(Relation index); +void HnswGetMetaPageInfo(Relation index, int *m, HnswElement *entryPoint); +void *HnswAlloc(HnswAllocator *allocator, Size size); +HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator *alloc); +HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); +void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, + FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing, + bool enablePQ, PQParams *params); +HnswCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, + bool loadVec, IndexScanDesc scan = NULL, bool enablePQ = false, + PQSearchInfo *pqinfo = NULL); +void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, + ForkNumber forkNum, bool building); +void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); +void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); +void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *alloc); +bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, const bool *isnull, ItemPointer heap_tid, + bool building); +void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, + bool checkExisting, bool building); +void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); +bool HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, + bool loadVec, float *maxDistance, IndexScanDesc scan = NULL, bool enablePQ = false, + PQSearchInfo *pqinfo = NULL); +void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate *hc, int lm, int lc, int *updateIdx, + Relation index, FmgrInfo *procinfo, Oid collation); +void HnswLoadNeighbors(HnswElement element, Relation index, int m); +const HnswTypeInfo *HnswGetTypeInfo(Relation index); +bool HnswDelete(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, bool isRollbackIndex); + +void HnswUpdateAppendMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber eleInsertPage, + BlockNumber neiInsertPage, ForkNumber forkNum, bool building); +void FlushPQInfo(HnswBuildState *buildstate); +void HnswGetPQInfoFromMetaPage(Relation index, uint16 *pqTableNblk, uint32 *pqTableSize, + uint16 *pqDisTableNblk, uint32 *pqDisTableSize); + +int ComputePQTable(VectorArray samples, PQParams *params); +int ComputeVectorPQCode(float *vector, const PQParams *params, uint8 *pqCode); +int GetPQDistanceTableSdc(const PQParams *params, float *pqDistanceTable); +int GetPQDistanceTableAdc(float *vector, const PQParams *params, float *pqDistanceTable); +int GetPQDistance(const uint8 *basecode, const uint8 *querycode, const PQParams *params, + const float *pqDistanceTable, float *pqDistance); +int getPQfunctionType(FmgrInfo *procinfo, FmgrInfo *normprocinfo); +void InitPQParamsOnDisk(PQParams *params, Relation index, FmgrInfo *procinfo, int dim, bool *enablePQ); + +Datum hnswhandler(PG_FUNCTION_ARGS); +Datum hnswbuild(PG_FUNCTION_ARGS); +Datum hnswbuildempty(PG_FUNCTION_ARGS); +Datum hnswinsert(PG_FUNCTION_ARGS); +Datum hnswbulkdelete(PG_FUNCTION_ARGS); +Datum hnswvacuumcleanup(PG_FUNCTION_ARGS); +Datum hnswcostestimate(PG_FUNCTION_ARGS); +Datum hnswoptions(PG_FUNCTION_ARGS); +Datum hnswvalidate(PG_FUNCTION_ARGS); +Datum hnswbeginscan(PG_FUNCTION_ARGS); +Datum hnswrescan(PG_FUNCTION_ARGS); +Datum hnswgettuple(PG_FUNCTION_ARGS); +Datum hnswendscan(PG_FUNCTION_ARGS); +Datum hnswdelete(PG_FUNCTION_ARGS); +Datum hnsw_halfvec_support(PG_FUNCTION_ARGS); +Datum hnsw_bit_support(PG_FUNCTION_ARGS); +Datum hnsw_sparsevec_support(PG_FUNCTION_ARGS); + +/* Index access methods */ +IndexBuildResult *hnswbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo); +void hnswbuildempty_internal(Relation index); +bool hnswinsert_internal(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique); +IndexBulkDeleteResult *hnswbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState); +IndexBulkDeleteResult *hnswvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc hnswbeginscan_internal(Relation index, int nkeys, int norderbys); +void hnswrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool hnswgettuple_internal(IndexScanDesc scan, ScanDirection dir); +void hnswendscan_internal(IndexScanDesc scan); +bool hnswdelete_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heapTCtid, + bool isRollbackIndex); + +static inline HnswNeighborArray *HnswGetNeighbors(char *base, HnswElement element, int lc) +{ + HnswNeighborArrayPtr *neighborList = (HnswNeighborArrayPtr *)HnswPtrAccess(base, element->neighbors); + + Assert(element->level >= lc); + + return (HnswNeighborArray *)HnswPtrAccess(base, neighborList[lc]); +} + +/* Hash tables */ +typedef struct TidHashEntry { + ItemPointerData tid; + char status; +} TidHashEntry; + +#define SH_PREFIX tidhash +#define SH_ELEMENT_TYPE TidHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +typedef struct PointerHashEntry { + uintptr_t ptr; + char status; +} PointerHashEntry; + +#define SH_PREFIX pointerhash +#define SH_ELEMENT_TYPE PointerHashEntry +#define SH_KEY_TYPE uintptr_t +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +typedef struct OffsetHashEntry { + Size offset; + char status; +} OffsetHashEntry; + +#define SH_PREFIX offsethash +#define SH_ELEMENT_TYPE OffsetHashEntry +#define SH_KEY_TYPE Size +#define SH_SCOPE extern +#define SH_DECLARE +#include "lib/simplehash.h" + +#endif diff --git a/src/include/access/datavec/ivfflat.h b/src/include/access/datavec/ivfflat.h new file mode 100644 index 0000000000000000000000000000000000000000..2be80dc57fba47b107c5b84cf39a2e2720ca0a47 --- /dev/null +++ b/src/include/access/datavec/ivfflat.h @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ivfflat.h + * + * IDENTIFICATION + * src/include/access/datavec/ivfflat.h + * + * ------------------------------------------------------------------------- + */ +#ifndef IVFFLAT_H +#define IVFFLAT_H + +#include "postgres.h" + +#include "access/genam.h" +#include "access/generic_xlog.h" +#include "catalog/pg_operator.h" +#include "lib/pairingheap.h" +#include "nodes/execnodes.h" +#include "port.h" /* for random() */ +#include "sampling.h" +#include "utils/tuplesort.h" +#include "access/datavec/vector.h" +#include "access/datavec/utils.h" +#include "postmaster/bgworker.h" + +#ifdef IVFFLAT_BENCH +#include "portability/instr_time.h" +#endif + +#define IVFFLAT_MAX_DIM 2000 + +/* Support functions */ +#define IVFFLAT_DISTANCE_PROC 1 +#define IVFFLAT_NORM_PROC 2 +#define IVFFLAT_KMEANS_DISTANCE_PROC 3 +#define IVFFLAT_KMEANS_NORM_PROC 4 +#define IVFFLAT_TYPE_INFO_PROC 5 + +#define IVFFLAT_VERSION 1 +#define IVFFLAT_MAGIC_NUMBER 0x14FF1A7 +#define IVFFLAT_PAGE_ID 0xFF84 + +/* Preserved page numbers */ +#define IVFFLAT_METAPAGE_BLKNO 0 +#define IVFPQTABLE_START_BLKNO 1 /* first list page of pqtable start page */ + +/* IVFFlat parameters */ +#define IVFFLAT_DEFAULT_LISTS 100 +#define IVFFLAT_MIN_LISTS 1 +#define IVFFLAT_MAX_LISTS 32768 +#define IVFFLAT_DEFAULT_PROBES 1 + +/* IVFPQ parameters */ +#define IVFPQ_DEFAULT_RESIDUAL false +#define IVFPQ_DIS_L2 1 +#define IVFPQ_DIS_IP 2 +#define IVFPQ_DIS_COSINE 3 +#define IVFPQTABLE_STORAGE_SIZE (uint16)(6 * 1024) /* pqtable storage size in each page */ + +/* Build phases */ +/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */ +#define PROGRESS_IVFFLAT_PHASE_KMEANS 2 +#define PROGRESS_IVFFLAT_PHASE_ASSIGN 3 +#define PROGRESS_IVFFLAT_PHASE_LOAD 4 + +#define IVF_NUM_COLUMNS 4 +#define IVF_LISTID 1 +#define IVF_TID 2 +#define IVF_VECTOR 3 +#define IVF_RESIDUAL 4 + +#define IVFFLAT_LIST_SIZE(size) (offsetof(IvfflatListData, center) + (size)) + +#define IvfflatPageGetOpaque(page) ((IvfflatPageOpaque)PageGetSpecialPointer(page)) +#define IvfflatPageGetMeta(page) ((IvfflatMetaPageData *)PageGetContents(page)) + +#ifdef IVFFLAT_BENCH +#define IvfflatBench(name, code) \ + do { \ + instr_time start; \ + instr_time duration; \ + INSTR_TIME_SET_CURRENT(start); \ + (code); \ + INSTR_TIME_SET_CURRENT(duration); \ + INSTR_TIME_SUBTRACT(duration, start); \ + elog(INFO, "%s: %.3f ms", name, INSTR_TIME_GET_MILLISEC(duration)); \ + } while (0) +#else +#define IvfflatBench(name, code) (code) +#endif + +#define RandomDouble() (((double)random()) / MAX_RANDOM_VALUE) +#define RandomInt() random() + +#define IVF_PQ_DIS_L2 1 +#define IVF_PQ_DIS_IP 2 +#define IVF_PQ_DIS_COSINE 3 + +/* Preserved page numbers */ +#define IVF_METAPAGE_BLKNO 0 +#define IVF_HEAD_BLKNO 1 /* first element page */ +#define IVF_PQTABLE_START_BLKNO 1 /* pqtable start page */ +#define IVF_PQTABLE_STORAGE_SIZE (uint16)(6 * 1024) /* pqtable storage size in each page */ + +typedef struct ListInfo { + BlockNumber blkno; + OffsetNumber offno; +} ListInfo; + +/* IVFFlat index options */ +typedef struct IvfflatOptions { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int lists; /* number of lists */ + bool enablePQ; + int pqM; + int pqKsub; + bool byResidual; /* whether to quantify by residual */ +} IvfflatOptions; + +typedef struct IvfflatSpool { + Tuplesortstate *sortstate; + Relation heap; + Relation index; +} IvfflatSpool; + +typedef struct IvfflatShared { + /* Immutable state */ + Oid heaprelid; + Oid indexrelid; + int scantuplesortstates; + + /* Mutex for mutable state */ + slock_t mutex; + + /* Mutable state */ + int nparticipantsdone; + double reltuples; + double indtuples; + + Sharedsort *sharedsort; + Vector *ivfcenters; + List *rlist; + int workmem; + + /* Memory */ + MemoryContext tmpCtx; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; +#endif + ParallelHeapScanDescData heapdesc; // must come last +} IvfflatShared; + +#define ParallelTableScanFromIvfflatShared(shared) \ + (ParallelTableScanDesc)((char *)(shared) + BUFFERALIGN(sizeof(IvfflatShared))) + +typedef struct IvfflatLeader { + int nparticipanttuplesorts; + IvfflatShared *ivfshared; +} IvfflatLeader; + +typedef struct IvfflatTypeInfo { + int maxDimensions; + bool supportPQ; + Datum (*normalize)(PG_FUNCTION_ARGS); + Size (*itemSize)(int dimensions); + void (*updateCenter)(Pointer v, int dimensions, const float *x); + void (*sumCenter)(Pointer v, float *x); +} IvfflatTypeInfo; + +typedef struct IvfflatBuildState { + /* Info */ + Relation heap; + Relation index; + IndexInfo *indexInfo; + const IvfflatTypeInfo *typeInfo; + + /* Settings */ + int dimensions; + int lists; + + /* Statistics */ + double indtuples; + double reltuples; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + FmgrInfo *kmeansnormprocinfo; + Oid collation; + + /* Variables */ + VectorArray samples; + VectorArray residuals; + VectorArray centers; + List *rlist; + ListInfo *listInfo; + +#ifdef IVFFLAT_KMEANS_DEBUG + double inertia; + double *listSums; + int *listCounts; +#endif + + /* Sampling */ + BlockSamplerData bs; + double rstate; + int rowstoskip; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + + /* Memory */ + MemoryContext tmpCtx; + + /* Parallel builds */ + IvfflatLeader *ivfleader; + + /* PQ info */ + bool enablePQ; + int pqM; + int pqKsub; + bool byResidual = false ; + char *pqTable; + Size pqTableSize; + float *pqDistanceTable; + uint16 pqcodeSize; + PQParams *params; + float *preComputeTable; + uint64 preComputeTableSize; +} IvfflatBuildState; + +typedef struct IvfflatMetaPageData { + uint32 magicNumber; + uint32 version; + uint16 dimensions; + uint16 lists; + + /* PQ info */ + bool enablePQ; + bool byResidual; + uint16 pqM; + uint16 pqKsub; + uint16 pqcodeSize; + uint32 pqTableSize; + uint16 pqTableNblk; + uint64 pqPreComputeTableSize; + uint32 pqPreComputeTableNblk; +} IvfflatMetaPageData; + +typedef IvfflatMetaPageData *IvfflatMetaPage; + +typedef struct IvfflatPageOpaqueData { + BlockNumber nextblkno; + uint16 unused; + uint16 page_id; /* for identification of IVFFlat indexes */ +} IvfflatPageOpaqueData; + +typedef IvfflatPageOpaqueData *IvfflatPageOpaque; + +typedef struct IvfflatListData { + BlockNumber startPage; + BlockNumber insertPage; + Vector center; +} IvfflatListData; + +typedef IvfflatListData *IvfflatList; + +typedef struct IvfflatScanList { + pairingheap_node ph_node; + BlockNumber startPage; + double distance; + int key; + double pqDistance; +} IvfflatScanList; + +typedef struct IvfflatScanOpaqueData { + const IvfflatTypeInfo *typeInfo; + int listCount; + int probes; + int dimensions; + bool first; + + /* Sorting */ + Tuplesortstate *sortstate; + TupleDesc tupdesc; + TupleTableSlot *slot; + bool isnull; + + /* Support functions */ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + Datum (*distfunc)(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2); + + /* PQ info */ + bool enablePQ; + int pqM; + int pqKsub; + int funcType; + bool byResidual; + int kreorder; + MemoryContext pqCtx; + + /* Lists */ + pairingheap *listQueue; + IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */ +} IvfflatScanOpaqueData; + +typedef IvfflatScanOpaqueData *IvfflatScanOpaque; + +typedef struct IvfpqPairingHeapNode { + pairingheap_node ph_node; + double distance; + ItemPointer heapTid; + BlockNumber indexBlk; + OffsetNumber indexOff; +} IvfpqPairingHeapNode; + +/* Methods */ +void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo *typeInfo); +FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum); +Datum IvfflatNormValue(const IvfflatTypeInfo *typeInfo, Oid collation, Datum value); +bool IvfflatCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); +int IvfflatGetLists(Relation index); +void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions); +void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, + BlockNumber startPage, ForkNumber forkNum); +void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state); +void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum); +Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum); +void IvfflatInitPage(Buffer buf, Page page); +void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state); +PGDLLEXPORT void IvfflatParallelBuildMain(const BgWorkerContext *bwc); +const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index); + +bool IvfGetEnablePQ(Relation index); +int IvfGetPqM(Relation index); +int IvfGetPqKsub(Relation index); +int IvfGetByResidual(Relation index); + + +void IvfGetPQInfoFromMetaPage(Relation index, uint16 *pqTableNblk, uint32 *pqTableSize, + uint32 *pqPreComputeTableNblk, uint64 *pqPreComputeTableSize); +int getIVFPQfunctionType(FmgrInfo *procinfo, FmgrInfo *normprocinfo); +void IvfFlushPQInfoInternal(Relation index, char* table, BlockNumber startBlkno, uint32 nblks, uint64 totalSize); +void IvfFlushPQInfo(IvfflatBuildState *buildstate); + +int IvfComputePQTable(VectorArray samples, PQParams *params); +int IvfComputeVectorPQCode(float *vector, const PQParams *params, uint8 *pqCode); +int IvfGetPQDistanceTableAdc(float *vector, const PQParams *params, float *pqDistanceTable); +int IvfGetPQDistance(const uint8 *basecode, const uint8 *querycode, const PQParams *params, + const float *pqDistanceTable, float *pqDistance); + +void GetPQInfoOnDisk(IvfflatScanOpaque so, Relation index); +void IvfpqComputeQueryRelTables(IvfflatScanOpaque so, Relation index, Datum q, float *simTable); +uint8 *LoadPQCode(IndexTuple itup); +float GetPQDistance(float *pqDistanceTable, uint8 *code, double dis0, int pqM, int pqKsub, bool innerPro); +IvfpqPairingHeapNode * IvfpqCreatePairingHeapNode(float distance, ItemPointer heapTid, + BlockNumber indexBlk, OffsetNumber indexOff); +char* IVFPQLoadPQtable(Relation index); + +Datum ivfflathandler(PG_FUNCTION_ARGS); +Datum ivfflatbuild(PG_FUNCTION_ARGS); +Datum ivfflatbuildempty(PG_FUNCTION_ARGS); +Datum ivfflatinsert(PG_FUNCTION_ARGS); +Datum ivfflatbulkdelete(PG_FUNCTION_ARGS); +Datum ivfflatvacuumcleanup(PG_FUNCTION_ARGS); +Datum ivfflatcostestimate(PG_FUNCTION_ARGS); +Datum ivfflatoptions(PG_FUNCTION_ARGS); +Datum ivfflatvalidate(PG_FUNCTION_ARGS); +Datum ivfflatbeginscan(PG_FUNCTION_ARGS); +Datum ivfflatrescan(PG_FUNCTION_ARGS); +Datum ivfflatgettuple(PG_FUNCTION_ARGS); +Datum ivfflatendscan(PG_FUNCTION_ARGS); +Datum ivfflat_halfvec_support(PG_FUNCTION_ARGS); +Datum ivfflat_bit_support(PG_FUNCTION_ARGS); + +/* Index access methods */ +IndexBuildResult *ivfflatbuild_internal(Relation heap, Relation index, IndexInfo *indexInfo); +void ivfflatbuildempty_internal(Relation index); +bool ivfflatinsert_internal(Relation index, Datum *values, const bool *isnull, ItemPointer heap_tid, Relation heap, + IndexUniqueCheck checkUnique); +IndexBulkDeleteResult *ivfflatbulkdelete_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, void *callbackState); +IndexBulkDeleteResult *ivfflatvacuumcleanup_internal(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); +IndexScanDesc ivfflatbeginscan_internal(Relation index, int nkeys, int norderbys); +void ivfflatrescan_internal(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys); +bool ivfflatgettuple_internal(IndexScanDesc scan, ScanDirection dir); +void ivfflatendscan_internal(IndexScanDesc scan); + +#endif diff --git a/src/include/access/datavec/pg_prng.h b/src/include/access/datavec/pg_prng.h new file mode 100644 index 0000000000000000000000000000000000000000..0ce92ccf6422640b9bc4ead488644de01f8dcb31 --- /dev/null +++ b/src/include/access/datavec/pg_prng.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * pg_prng.h + * + * IDENTIFICATION + * src/include/access/datavec/pg_prng.h + * + * ------------------------------------------------------------------------- + */ +#ifndef PG_PRNG_H +#define PG_PRNG_H + +/* + * State vector for PRNG generation. Callers should treat this as an + * opaque typedef, but we expose its definition to allow it to be + * embedded in other structs. + */ +typedef struct pg_prng_state { + uint64 s0, s1; +} pg_prng_state; + +/* + * Callers not needing local PRNG series may use this global state vector, + * after initializing it with one of the pg_prng_...seed functions. + */ +extern PGDLLIMPORT pg_prng_state pg_global_prng_state; + +extern void pg_prng_seed(pg_prng_state *state, uint64 seed); +extern void pg_prng_fseed(pg_prng_state *state, double fseed); +extern bool pg_prng_seed_check(pg_prng_state *state); + +/* + * Initialize the PRNG state from the pg_strong_random source, + * taking care that we don't produce all-zeroes. If this returns false, + * caller should initialize the PRNG state from some other random seed, + * using pg_prng_[f]seed. + * + * We implement this as a macro, so that the pg_strong_random() call is + * in the caller. If it were in pg_prng.c, programs using pg_prng.c + * but not needing strong seeding would nonetheless be forced to pull in + * pg_strong_random.c and thence OpenSSL. + */ +#define pg_prng_strong_seed(state) \ + (pg_strong_random((void *)(state), sizeof(pg_prng_state)) ? pg_prng_seed_check(state) : false) + +extern uint64 pg_prng_uint64(pg_prng_state *state); +extern uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax); +extern int64 pg_prng_int64(pg_prng_state *state); +extern int64 pg_prng_int64p(pg_prng_state *state); +extern uint32 pg_prng_uint32(pg_prng_state *state); +extern int32 pg_prng_int32(pg_prng_state *state); +extern int32 pg_prng_int32p(pg_prng_state *state); +extern double pg_prng_double(pg_prng_state *state); +extern double pg_prng_double_normal(pg_prng_state *state); +extern bool pg_prng_bool(pg_prng_state *state); + +#endif /* PG_PRNG_H */ diff --git a/src/include/access/datavec/ryu_common.h b/src/include/access/datavec/ryu_common.h new file mode 100644 index 0000000000000000000000000000000000000000..e309bc2823e8adfcfb9ac7f59a60cfa63eaed146 --- /dev/null +++ b/src/include/access/datavec/ryu_common.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * ryu_common.h + * + * IDENTIFICATION + * src/include/access/datavec/ryu_common.h + * + * ------------------------------------------------------------------------- + */ +#ifndef RYU_COMMON_H +#define RYU_COMMON_H + +/* + * Upstream Ryu's output is always the shortest possible. But we adjust that + * slightly to improve portability: we avoid outputting the exact midpoint + * value between two representable floats, since that relies on the reader + * getting the round-to-even rule correct, which seems to be the common + * failure mode. + * + * Defining this to 1 would restore the upstream behavior. + */ +#define STRICTLY_SHORTEST 0 + +#if SIZEOF_SIZE_T < 8 +#define RYU_32_BIT_PLATFORM +#endif + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', + '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', + '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', + '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', + '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', + '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', + '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', + '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', '9', '1', + '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'}; + +/* Returns e == 0 ? 1 : ceil(log_2(5^e)). */ +static inline uint32 pow5bits(const int32 e) +{ + /* + * This approximation works up to the point that the multiplication + * overflows at e = 3529. + * + * If the multiplication were done in 64 bits, it would fail at 5^4004 + * which is just greater than 2^9297. + */ + Assert(e >= 0); + Assert(e <= 3528); + return ((((uint32)e) * 1217359) >> 19) + 1; +} + +/* Returns floor(log_10(2^e)). */ +static inline int32 log10Pow2(const int32 e) +{ + /* + * The first value this approximation fails for is 2^1651 which is just + * greater than 10^297. + */ + Assert(e >= 0); + Assert(e <= 1650); + return (int32)((((uint32)e) * 78913) >> 18); +} + +/* Returns floor(log_10(5^e)). */ +static inline int32 log10Pow5(const int32 e) +{ + /* + * The first value this approximation fails for is 5^2621 which is just + * greater than 10^1832. + */ + Assert(e >= 0); + Assert(e <= 2620); + return (int32)((((uint32)e) * 732923) >> 20); +} + +static inline int copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa) +{ + errno_t rc = EOK; + if (mantissa) { + rc = memcpy_s(result, 3, "NaN", 3); + securec_check(rc, "\0", "\0"); + return 3; + } + if (sign) { + result[0] = '-'; + } + if (exponent) { + rc = memcpy_s(result + sign, 8, "Infinity", 8); + securec_check(rc, "\0", "\0"); + return sign + 8; + } + result[sign] = '0'; + return sign + 1; +} + +static inline uint32 float_to_bits(const float f) +{ + uint32 bits = 0; + errno_t rc = EOK; + + rc = memcpy_s(&bits, sizeof(float), &f, sizeof(float)); + securec_check(rc, "\0", "\0"); + return bits; +} + +static inline uint64 double_to_bits(const double d) +{ + uint64 bits = 0; + errno_t rc = EOK; + + rc = memcpy_s(&bits, sizeof(double), &d, sizeof(double)); + securec_check(rc, "\0", "\0"); + return bits; +} + +#endif /* RYU_COMMON_H */ diff --git a/src/include/access/datavec/sampling.h b/src/include/access/datavec/sampling.h new file mode 100644 index 0000000000000000000000000000000000000000..89b76cd6333bfbd5b691c37aa49cd42c9a1d7f70 --- /dev/null +++ b/src/include/access/datavec/sampling.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * sampling.h + * + * IDENTIFICATION + * src/include/access/datavec/sampling.h + * + * ------------------------------------------------------------------------- + */ +#ifndef SAMPLING_H +#define SAMPLING_H + +#include "access/datavec/pg_prng.h" +#include "storage/buf/block.h" + +extern void sampler_random_init_state(uint32 seed, pg_prng_state *randstate); +extern double sampler_random_fract(pg_prng_state *randstate); + +typedef struct { + BlockNumber N; /* number of blocks, known in advance */ + uint32 n; /* desired sample size */ + BlockNumber t; /* current block number */ + uint32 m; /* blocks selected so far */ + pg_prng_state randstate; /* random generator state */ +} BlockSamplerData2; + +typedef BlockSamplerData2 *BlockSampler2; + +extern BlockNumber BlockSampler_Init2(BlockSampler2 bs, BlockNumber nblocks, int samplesize, uint32 randseed); +extern bool BlockSampler_HasMore2(BlockSampler2 bs); +extern BlockNumber BlockSampler_Next2(BlockSampler2 bs); + +typedef struct { + double W; + pg_prng_state randstate; /* random generator state */ +} ReservoirStateData; + +typedef ReservoirStateData *ReservoirState; + +extern void reservoir_init_selection_state(ReservoirState rs, int n); +extern double reservoir_get_next_S(ReservoirState rs, double t, int n); + +#endif /* SAMPLING_H */ diff --git a/src/include/access/datavec/shortest_dec.h b/src/include/access/datavec/shortest_dec.h new file mode 100644 index 0000000000000000000000000000000000000000..d734b721442e5a1c3169cc412ead8f3c47e26441 --- /dev/null +++ b/src/include/access/datavec/shortest_dec.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * shortest_dec.h + * + * IDENTIFICATION + * src/include/access/datavec/shortest_dec.h + * + * ------------------------------------------------------------------------- + */ +#ifndef SHORTEST_DEC_H +#define SHORTEST_DEC_H + +#define FLOAT_SHORTEST_DECIMAL_LEN 16 + +int FloatToShortestDecimalBufn(float f, char *result); +int FloatToShortestDecimalBuf(float f, char *result); + +#endif /* SHORTEST_DEC_H */ diff --git a/src/include/access/datavec/sparsevec.h b/src/include/access/datavec/sparsevec.h new file mode 100644 index 0000000000000000000000000000000000000000..573f62db424983b6cb21621192112858faac8e0d --- /dev/null +++ b/src/include/access/datavec/sparsevec.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * sparsevec.h + * + * IDENTIFICATION + * src/include/access/datavec/sparsevec.h + * + * ------------------------------------------------------------------------- + */ +#ifndef SPARSEVEC_H +#define SPARSEVEC_H + +#define SPARSEVEC_MAX_DIM 1000000000 +#define SPARSEVEC_MAX_NNZ 16000 + +#define DatumGetSparseVector(x) ((SparseVector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_SPARSEVEC_P(x) DatumGetSparseVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_SPARSEVEC_P(x) PG_RETURN_POINTER(x) + +/* + * Indices use 0-based numbering for the on-disk (and binary) format (consistent with C) + * and are always sorted. Values come after indices. + */ + +Datum sparsevec_in(PG_FUNCTION_ARGS); +Datum sparsevec_out(PG_FUNCTION_ARGS); +Datum sparsevec_typmod_in(PG_FUNCTION_ARGS); +Datum sparsevec_recv(PG_FUNCTION_ARGS); +Datum sparsevec_send(PG_FUNCTION_ARGS); +Datum sparsevec_l2_distance(PG_FUNCTION_ARGS); +Datum sparsevec_inner_product(PG_FUNCTION_ARGS); +Datum sparsevec_cosine_distance(PG_FUNCTION_ARGS); +Datum sparsevec_l1_distance(PG_FUNCTION_ARGS); +Datum sparsevec_l2_norm(PG_FUNCTION_ARGS); +Datum sparsevec_l2_normalize(PG_FUNCTION_ARGS); +Datum sparsevec_lt(PG_FUNCTION_ARGS); +Datum sparsevec_le(PG_FUNCTION_ARGS); +Datum sparsevec_eq(PG_FUNCTION_ARGS); +Datum sparsevec_ne(PG_FUNCTION_ARGS); +Datum sparsevec_ge(PG_FUNCTION_ARGS); +Datum sparsevec_gt(PG_FUNCTION_ARGS); +Datum sparsevec_cmp(PG_FUNCTION_ARGS); +Datum sparsevec_l2_squared_distance(PG_FUNCTION_ARGS); +Datum sparsevec_negative_inner_product(PG_FUNCTION_ARGS); +Datum sparsevec(PG_FUNCTION_ARGS); +Datum vector_to_sparsevec(PG_FUNCTION_ARGS); +Datum sparsevec_to_vector(PG_FUNCTION_ARGS); +Datum halfvec_to_sparsevec(PG_FUNCTION_ARGS); +Datum sparsevec_to_halfvec(PG_FUNCTION_ARGS); + +typedef struct SparseVector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 dim; /* number of dimensions */ + int32 nnz; /* number of non-zero elements */ + int32 unused; /* reserved for future use, always zero */ + int32 indices[FLEXIBLE_ARRAY_MEMBER]; +} SparseVector; + +/* Use functions instead of macros to avoid double evaluation */ + +static inline Size SPARSEVEC_SIZE(int nnz) +{ + return offsetof(SparseVector, indices) + (nnz * sizeof(int32)) + (nnz * sizeof(float)); +} + +static inline float *SPARSEVEC_VALUES(SparseVector *x) +{ + return (float *)(((char *)x) + offsetof(SparseVector, indices) + (x->nnz * sizeof(int32))); +} + +SparseVector *InitSparseVector(int dim, int nnz); + +#endif diff --git a/src/include/access/datavec/utils.h b/src/include/access/datavec/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..3041bc6380e34a81f6ada33f9c75d94bbabf9d2e --- /dev/null +++ b/src/include/access/datavec/utils.h @@ -0,0 +1,74 @@ +#ifndef UTILS_H +#define UTILS_H +#include "postgres.h" +#include "fmgr/fmgr_comp.h" +#include "access/multi_redo_api.h" +#include + +#define GENERIC_DEFAULT_ENABLE_PQ false +#define GENERIC_DEFAULT_PQ_M 8 +#define GENERIC_MIN_PQ_M 1 +#define GENERIC_MAX_PQ_M HNSW_MAX_DIM +#define GENERIC_DEFAULT_PQ_KSUB 256 +#define GENERIC_MIN_PQ_KSUB 1 +#define GENERIC_MAX_PQ_KSUB 256 + +typedef struct VectorArrayData { + int length; + int maxlen; + int dim; + Size itemsize; + char *items; +} VectorArrayData; + +typedef struct PQParams { + int pqM; + int pqKsub; + int funcType; + int dim; + size_t subItemSize; + char *pqTable; +} PQParams; + +#define VECTOR_ARRAY_SIZE(_length, _size) (sizeof(VectorArrayData) + (_length) * MAXALIGN(_size)) + +typedef VectorArrayData * VectorArray; + +typedef struct st_pq_func { + bool inited; + void *handle; + int (*ComputePQTable)(VectorArray samples, PQParams *params); + int (*ComputeVectorPQCode)(float *vector, const PQParams *params, uint8 *pqCode); + int (*GetPQDistanceTableSdc)(const PQParams *params, float *pqDistanceTable); + int (*GetPQDistanceTableAdc)(float *vector, const PQParams *params, float *pqDistanceTable); + int (*GetPQDistance)(const uint8 *basecode, const uint8 *querycode, const PQParams *params, + const float *pqDistanceTable, float *pqDistance); +} pq_func_t; +extern pq_func_t g_pq_func; + +static inline Pointer VectorArrayGet(VectorArray arr, int offset) +{ + return ((char *) arr->items) + (offset * arr->itemsize); +} + +static inline void VectorArraySet(VectorArray arr, int offset, Pointer val) +{ + errno_t rc = memcpy_s(VectorArrayGet(arr, offset), VARSIZE_ANY(val), val, VARSIZE_ANY(val)); + securec_check_c(rc, "\0", "\0"); +} + +Size VectorItemSize(int dimensions); +Size HalfvecItemSize(int dimensions); +Size BitItemSize(int dimensions); +void VectorUpdateCenter(Pointer v, int dimensions, const float *x); +void HalfvecUpdateCenter(Pointer v, int dimensions, const float *x); +void BitUpdateCenter(Pointer v, int dimensions, const float *x); +void VectorSumCenter(Pointer v, float *x); +void HalfvecSumCenter(Pointer v, float *x); +void BitSumCenter(Pointer v, float *x); +VectorArray VectorArrayInit(int maxlen, int dimensions, Size itemsize); +void VectorArrayFree(VectorArray arr); + +int PQInit(); +void PQUinit(); +#endif \ No newline at end of file diff --git a/src/include/access/datavec/vecindex.h b/src/include/access/datavec/vecindex.h new file mode 100644 index 0000000000000000000000000000000000000000..16d7659930dde0cdd886ddf52cb8b175ea5f7101 --- /dev/null +++ b/src/include/access/datavec/vecindex.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * vecindex.h + * + * IDENTIFICATION + * src/include/access/datavec/vecindex.h + * + * ------------------------------------------------------------------------- + */ +#ifndef VECINDEX_H +#define VECINDEX_H + +#define MIN(A, B) ((B) < (A) ? (B) : (A)) +#define MAX(A, B) ((B) > (A) ? (B) : (A)) + +#define VecIndexTupleGetXid(itup) (((char *)(itup)) + HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(&(itup)->data))) + +struct VectorScanData { + /* + * used in ustore only, indicate the last returned index tuple which is modified + * by current transaction. see VecVisibilityCheckCid() for more information. + */ + char *lastSelfModifiedItup; + uint16 lastSelfModifiedItupBufferSize; + Buffer buf; +}; + +bool VecItupGetXminXmax(Page page, OffsetNumber offnum, TransactionId oldest_xmin, TransactionId *xmin, + TransactionId *xmax, bool *xminCommitted, bool *xmaxCommitted, bool isToast); +bool VecVisibilityCheck(IndexScanDesc scan, Page page, OffsetNumber offnum, bool *needRecheck); + +#endif // VECINDEX_H diff --git a/src/include/access/datavec/vector.h b/src/include/access/datavec/vector.h new file mode 100644 index 0000000000000000000000000000000000000000..c2fcb2bf662c58c2e581b07662067c1537963c61 --- /dev/null +++ b/src/include/access/datavec/vector.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * + * vector.h + * + * IDENTIFICATION + * src/include/access/datavec/vector.h + * + * ------------------------------------------------------------------------- + */ +#ifndef VECTOR_H +#define VECTOR_H + +#define VECTOR_MAX_DIM 16000 +#define MEM_INFO_NUM (1024 * 1024) + +#define VECTOR_SIZE(_dim) (offsetof(Vector, x) + sizeof(float) * (_dim)) +#define DatumGetVector(x) ((Vector *)PG_DETOAST_DATUM(x)) +#define PG_GETARG_VECTOR_P(x) DatumGetVector(PG_GETARG_DATUM(x)) +#define PG_RETURN_VECTOR_P(x) PG_RETURN_POINTER(x) +#define UpdateProgress(index, val) ((void)(val)) + +typedef struct Vector { + int32 vl_len_; /* varlena header (do not touch directly!) */ + int16 dim; /* number of dimensions */ + int16 unused; /* reserved for future use, always zero */ + float x[FLEXIBLE_ARRAY_MEMBER]; +} Vector; + +#if defined(USE_TARGET_CLONES) && !defined(__FMA__) +#define VECTOR_TARGET_CLONES __attribute__((target_clones("default", "fma"))) +#else +#define VECTOR_TARGET_CLONES +#endif + +VECTOR_TARGET_CLONES float VectorL2SquaredDistance(int dim, float *ax, float *bx); +VECTOR_TARGET_CLONES float VectorInnerProduct(int dim, float *ax, float *bx); +Vector *InitVector(int dim); +void PrintVector(char *msg, Vector *vector); +int vector_cmp_internal(Vector *a, Vector *b); +void VectorMadd(size_t n, const float *ax, float bf, const float *bx, float *cx); +void VectorL2SquaredDistanceNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis); +void VectorInnerProductNY(size_t d, size_t ny, float *x, char *pqTable, Size subSize, int offset, float *dis); +void LogNewpageRange(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std); +int PlanCreateIndexWorkers(Relation heapRelation, IndexInfo *indexInfo); + +Datum vector_in(PG_FUNCTION_ARGS); +Datum vector_out(PG_FUNCTION_ARGS); +Datum vector_typmod_in(PG_FUNCTION_ARGS); +Datum vector_recv(PG_FUNCTION_ARGS); +Datum vector_send(PG_FUNCTION_ARGS); +Datum vector(PG_FUNCTION_ARGS); +Datum array_to_vector(PG_FUNCTION_ARGS); +Datum vector_to_float4(PG_FUNCTION_ARGS); +Datum vector_to_int4(PG_FUNCTION_ARGS); +Datum vector_to_float8(PG_FUNCTION_ARGS); +Datum vector_to_numeric(PG_FUNCTION_ARGS); +Datum vector_to_text(PG_FUNCTION_ARGS); +Datum vector_to_varchar(PG_FUNCTION_ARGS); +Datum l2_distance(PG_FUNCTION_ARGS); +Datum vector_l2_squared_distance(PG_FUNCTION_ARGS); +Datum inner_product(PG_FUNCTION_ARGS); +Datum vector_negative_inner_product(PG_FUNCTION_ARGS); +Datum cosine_distance(PG_FUNCTION_ARGS); +Datum vector_spherical_distance(PG_FUNCTION_ARGS); +Datum vector_dims(PG_FUNCTION_ARGS); +Datum vector_norm(PG_FUNCTION_ARGS); +Datum vector_add(PG_FUNCTION_ARGS); +Datum vector_sub(PG_FUNCTION_ARGS); +Datum vector_le(PG_FUNCTION_ARGS); +Datum vector_lt(PG_FUNCTION_ARGS); +Datum vector_eq(PG_FUNCTION_ARGS); +Datum vector_ne(PG_FUNCTION_ARGS); +Datum vector_ge(PG_FUNCTION_ARGS); +Datum vector_gt(PG_FUNCTION_ARGS); +Datum vector_cmp(PG_FUNCTION_ARGS); +Datum vector_accum(PG_FUNCTION_ARGS); +Datum vector_combine(PG_FUNCTION_ARGS); +Datum vector_avg(PG_FUNCTION_ARGS); +Datum l1_distance(PG_FUNCTION_ARGS); +Datum l2_normalize(PG_FUNCTION_ARGS); +Datum binary_quantize(PG_FUNCTION_ARGS); +Datum subvector(PG_FUNCTION_ARGS); +Datum vector_mul(PG_FUNCTION_ARGS); +Datum vector_concat(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/access/generic_xlog.h b/src/include/access/generic_xlog.h index c6f5da1d1f5dbedf2569b22e3bebd79f3491ef8e..c2b10a8b29b564d6ef74d58f8e15a7cf1f3a9f05 100644 --- a/src/include/access/generic_xlog.h +++ b/src/include/access/generic_xlog.h @@ -11,6 +11,7 @@ #define GENERIC_XLOG_H #include "access/xlog.h" +#include "access/xlogproc.h" #include "access/xlog_internal.h" #include "access/xloginsert.h" #include "storage/buf/bufpage.h" @@ -19,6 +20,8 @@ #define MAX_GENERIC_XLOG_PAGES XLR_NORMAL_MAX_BLOCK_ID #define GENERIC_XLOG_FULL_IMAGE 0x0001 /* write full-page image */ +#define XLOG_GENERIC_LOG 0x00 + /* state of generic xlog record construction */ struct GenericXLogState; typedef struct GenericXLogState GenericXLogState; @@ -31,6 +34,10 @@ extern void GenericXLogUnregister(GenericXLogState *state, Buffer buffer); extern XLogRecPtr GenericXLogFinish(GenericXLogState *state); extern void GenericXLogAbort(GenericXLogState *state); +extern void GenericRedoDataBlock(XLogBlockHead *blockhead, XLogBlockDataParse *blockdatarec, + RedoBufferInfo *bufferinfo); +extern XLogRecParseState *GenericRedoParseToBlock(XLogReaderState *record, uint32 *blocknum); + /* functions defined for rmgr */ extern void generic_redo(XLogReaderState *record); extern const char *generic_identify(uint8 info); diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index 85e06682acdeec698f2ac23587254f6493daeab5..7ac4881aed3d70499c3017465f0cb74081659d57 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -51,8 +51,10 @@ typedef enum relopt_kind { RELOPT_KIND_NPARSER = (1 << 12), /* text search configuration options defined by ngram */ RELOPT_KIND_CBTREE = (1 << 13), RELOPT_KIND_PPARSER = (1 << 14), /* text search configuration options defined by pound */ + RELOPT_KIND_IVFFLAT = (1 << 15), + RELOPT_KIND_HNSW = (1 << 16), /* if you add a new kind, make sure you update "last_default" too */ - RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PPARSER, + RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_HNSW, /* some compilers treat enums as signed ints, so we can't use 1 << 31 */ RELOPT_KIND_MAX = (1 << 30) } relopt_kind; diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index d3f40dd24006f8da4f84f92b3b91facf436e30f6..6cec21b4a6ac0384e58c9276c056c097bea719c2 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -173,6 +173,7 @@ typedef struct IndexScanDescData { /* put decompressed heap tuple data into xs_ctbuf_hdr be careful! when malloc memory should give extra mem for *xs_ctbuf_hdr. t_bits which is varlength arr */ + int64 count; HeapTupleHeaderData xs_ctbuf_hdr; /* DO NOT add any other members here. xs_ctbuf_hdr must be the last one. */ } IndexScanDescData; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 061a610fbc44f5b3f9470cb093d92e43999747e5..cd63787dd31419d329b5f7a69171f4adbd117f50 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -482,7 +482,8 @@ typedef struct TableAmRoutine { */ double (*index_build_scan)(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, - IndexBuildCallback callback, void *callback_state, TableScanDesc scan); + IndexBuildCallback callback, void *callback_state, TableScanDesc scan, + BlockNumber startBlkno, BlockNumber numblocks); void (*index_validate_scan)(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, v_i_state *state); @@ -993,10 +994,11 @@ static inline void tableam_scan_init_parallel_seqscan(TableScanDesc sscan, int32 } static inline double tableam_index_build_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, - bool allow_sync, IndexBuildCallback callback, void *callback_state, TableScanDesc scan) + bool allow_sync, IndexBuildCallback callback, void *callback_state, TableScanDesc scan, + BlockNumber startBlkno = 0, BlockNumber numblocks = InvalidBlockNumber) { return heapRelation->rd_tam_ops->index_build_scan(heapRelation, indexRelation, indexInfo, - allow_sync, callback, callback_state, scan); + allow_sync, callback, callback_state, scan, startBlkno, numblocks); } static inline void tableam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 4e322f6f472fc0b99857090f4ff5efdbae14774f..f50127acb7ed423dd5beff6608606df58489002f 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -169,7 +169,8 @@ extern void index_build(Relation heapRelation, bool isTruncGTT = false); extern double IndexBuildHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, - bool allow_sync, IndexBuildCallback callback, void *callback_state, TableScanDesc scan = NULL); + bool allow_sync, IndexBuildCallback callback, void *callback_state, TableScanDesc scan = NULL, + BlockNumber startBlkno = 0, BlockNumber numblocks = InvalidBlockNumber); extern double IndexBuildUHeapScan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index 6cd0082ebd8be42cb7f9c2aed16a805c7281ff7d..c362b9641120e7052aa3e2c2e52771b6378c36c3 100644 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -485,7 +485,9 @@ DATA(insert ( 9990 tdigest_merge tdigest_merge_to_one calculate_quantile_of DATA(insert ( 9986 tdigest_mergep tdigest_merge_to_one calculate_value_at 0 4406 _null_ _null_ n 0)); #define ADDTDIGESTMERGEPOID 9986 - +/*vector aggregate function*/ +DATA(insert ( 8241 vector_accum vector_combine vector_avg 0 1022 "{0}" "{0}" n 0)); +DATA(insert ( 8242 vector_add vector_add - 0 8305 _null_ _null_ n 0)); /* * prototypes for functions in pg_aggregate.c */ diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 45f7c421c95c11f0129d3072f006901685849a8d..8ff4082a8fe59dbbd84655a6fc255552b6929725 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -70,6 +70,7 @@ CATALOG(pg_am,2601) BKI_SCHEMA_MACRO regproc amcostestimate; /* estimate cost of an indexscan */ regproc amoptions; /* parse AM-specific parameters */ regproc amhandler; /* handler function */ + regproc amdelete; /* index delete function */ } FormData_pg_am; /* ---------------- @@ -83,7 +84,7 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 32 +#define Natts_pg_am 33 #define Anum_pg_am_amname 1 #define Anum_pg_am_amstrategies 2 #define Anum_pg_am_amsupport 3 @@ -116,44 +117,53 @@ typedef FormData_pg_am *Form_pg_am; #define Anum_pg_am_amcostestimate 30 #define Anum_pg_am_amoptions 31 #define Anum_pg_am_amhandler 32 +#define Anum_pg_am_amdelete 33 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 403 ( btree 5 3 t f t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btmerge btbuild btbuildempty btbulkdelete btvacuumcleanup btcanreturn btcostestimate btoptions -)); +DATA(insert OID = 403 ( btree 5 3 t f t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btmerge btbuild btbuildempty btbulkdelete btvacuumcleanup btcanreturn btcostestimate btoptions - -)); DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashmerge hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup - hashcostestimate hashoptions -)); +DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashmerge hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup - hashcostestimate hashoptions - -)); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 0 8 f t f f t t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistmerge gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions -)); +DATA(insert OID = 783 ( gist 0 8 f t f f t t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistmerge gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions - -)); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginmerge ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions -)); +DATA(insert OID = 2742 ( gin 0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginmerge ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions - -)); DESCR("GIN index access method"); #define GIN_AM_OID 2742 -DATA(insert OID = 4000 ( spgist 0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgmerge spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions -)); +DATA(insert OID = 4000 ( spgist 0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgmerge spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions - -)); DESCR("SP-GiST index access method"); #define SPGIST_AM_OID 4000 -DATA(insert OID = 4039 ( psort 5 1 f f f f t t f t f f f 0 - - psortgettuple psortgetbitmap - - - - - psortbuild - - - psortcanreturn psortcostestimate psortoptions -)); +DATA(insert OID = 4039 ( psort 5 1 f f f f t t f t f f f 0 - - psortgettuple psortgetbitmap - - - - - psortbuild - - - psortcanreturn psortcostestimate psortoptions - -)); DESCR("psort index access method"); #define PSORT_AM_OID 4039 -DATA(insert OID = 4239 ( cbtree 5 1 f f f t t t f t f f t 0 btinsert btbeginscan cbtreegettuple cbtreegetbitmap btrescan btendscan - - - cbtreebuild btbuildempty - - cbtreecanreturn cbtreecostestimate cbtreeoptions -)); +DATA(insert OID = 4239 ( cbtree 5 1 f f f t t t f t f f t 0 btinsert btbeginscan cbtreegettuple cbtreegetbitmap btrescan btendscan - - - cbtreebuild btbuildempty - - cbtreecanreturn cbtreecostestimate cbtreeoptions - -)); DESCR("cstore btree index access method"); #define CBTREE_AM_OID 4239 -DATA(insert OID = 4444 ( cgin 0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - cgingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginmerge cginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions -)); +DATA(insert OID = 4444 ( cgin 0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - cgingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginmerge cginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions - -)); DESCR("cstore GIN index access method"); #define CGIN_AM_OID 4444 -DATA(insert OID = 4439 ( ubtree 5 3 t f t t t t t t f t t 0 ubtinsert ubtbeginscan ubtgettuple ubtgetbitmap ubtrescan ubtendscan ubtmarkpos ubtrestrpos ubtmerge ubtbuild ubtbuildempty ubtbulkdelete ubtvacuumcleanup ubtcanreturn ubtcostestimate ubtoptions -)); +DATA(insert OID = 4439 ( ubtree 5 3 t f t t t t t t f t t 0 ubtinsert ubtbeginscan ubtgettuple ubtgetbitmap ubtrescan ubtendscan ubtmarkpos ubtrestrpos ubtmerge ubtbuild ubtbuildempty ubtbulkdelete ubtvacuumcleanup ubtcanreturn ubtcostestimate ubtoptions - -)); DESCR("ustore b-tree index access method"); #define UBTREE_AM_OID 4439 +DATA(insert OID = 8300 ( hnsw 0 4 f t f f f t f f f f f 0 hnswinsert hnswbeginscan hnswgettuple - hnswrescan hnswendscan - - - hnswbuild hnswbuildempty hnswbulkdelete hnswvacuumcleanup - hnswcostestimate hnswoptions - hnswdelete)); +DESCR("hnsw index access method"); +#define HNSW_AM_OID 8300 + +DATA(insert OID = 8301 ( ivfflat 0 5 f t f f f t f f f f f 0 ivfflatinsert ivfflatbeginscan ivfflatgettuple - ivfflatrescan ivfflatendscan - - - ivfflatbuild ivfflatbuildempty ivfflatbulkdelete ivfflatvacuumcleanup - ivfflatcostestimate ivfflatoptions - -)); +DESCR("ivfflat index access method"); +#define IVFFLAT_AM_OID 8301 + #define OID_IS_BTREE(oid) ((oid) == BTREE_AM_OID || (oid) == UBTREE_AM_OID) #endif /* PG_AM_H */ diff --git a/src/include/catalog/pg_amop.data b/src/include/catalog/pg_amop.data index ae6fdb342ecbbaabe61b65a3f19bab82136e4fd9..51bfc7637f000dd55cd3977f86646ac1943f69cb 100644 --- a/src/include/catalog/pg_amop.data +++ b/src/include/catalog/pg_amop.data @@ -1594,3 +1594,41 @@ DATA(insert OID = 7272 ( 9570 9003 9003 2 s 5553 4439 0 )); DATA(insert OID = 7273 ( 9570 9003 9003 3 s 5550 4439 0 )); DATA(insert OID = 7274 ( 9570 9003 9003 4 s 5549 4439 0 )); DATA(insert OID = 7275 ( 9570 9003 9003 5 s 5554 4439 0 )); + +/* vector */ +DATA(insert ( 8371 8305 8305 1 o 8311 8300 1970 )); +DATA(insert ( 8372 8305 8305 1 o 8312 8300 1970 )); +DATA(insert ( 8373 8305 8305 1 o 8313 8300 1970 )); +DATA(insert ( 8374 8305 8305 1 o 8314 8300 1970 )); +DATA(insert ( 8381 8307 8307 1 o 8319 8300 1970 )); +DATA(insert ( 8382 8307 8307 1 o 8320 8300 1970 )); +DATA(insert ( 8383 8307 8307 1 o 8321 8300 1970 )); +DATA(insert ( 8384 8307 8307 1 o 8322 8300 1970 )); +DATA(insert ( 8379 1560 1560 1 o 8324 8300 1970 )); +DATA(insert ( 8380 1560 1560 1 o 8323 8300 1970 )); +DATA(insert ( 8385 8305 8305 1 o 8311 8301 1970 )); +DATA(insert ( 8386 8305 8305 1 o 8312 8301 1970 )); +DATA(insert ( 8387 8305 8305 1 o 8313 8301 1970 )); +DATA(insert ( 8394 1560 1560 1 o 8323 8301 1970 )); +DATA(insert ( 8392 8305 8305 1 s 8327 403 0 )); +DATA(insert ( 8392 8305 8305 2 s 8328 403 0 )); +DATA(insert ( 8392 8305 8305 3 s 8331 403 0 )); +DATA(insert ( 8392 8305 8305 4 s 8330 403 0 )); +DATA(insert ( 8392 8305 8305 5 s 8329 403 0 )); +DATA(insert ( 8397 8307 8307 1 s 8333 403 0 )); +DATA(insert ( 8397 8307 8307 2 s 8334 403 0 )); +DATA(insert ( 8397 8307 8307 3 s 8337 403 0 )); +DATA(insert ( 8397 8307 8307 4 s 8336 403 0 )); +DATA(insert ( 8397 8307 8307 5 s 8335 403 0 )); + +DATA(insert ( 8375 8305 8305 1 s 8327 4439 0 )); +DATA(insert ( 8375 8305 8305 2 s 8328 4439 0 )); +DATA(insert ( 8375 8305 8305 3 s 8331 4439 0 )); +DATA(insert ( 8375 8305 8305 4 s 8330 4439 0 )); +DATA(insert ( 8375 8305 8305 5 s 8329 4439 0 )); + +DATA(insert ( 8376 8307 8307 1 s 8333 4439 0 )); +DATA(insert ( 8376 8307 8307 2 s 8334 4439 0 )); +DATA(insert ( 8376 8307 8307 3 s 8337 4439 0 )); +DATA(insert ( 8376 8307 8307 4 s 8336 4439 0 )); +DATA(insert ( 8376 8307 8307 5 s 8335 4439 0 )); \ No newline at end of file diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 138c7d54ea780cee2dfcee5652649e5df16b9de7..7b54d6b4b200bd45a17e032e4df8c26c21c675f8 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -655,4 +655,51 @@ DATA(insert ( 8901 3831 3831 1 3870 )); DATA(insert ( 8626 3614 3614 1 3622 )); DATA(insert ( 8683 3615 3615 1 3668 )); +DATA(insert (8371 8305 8305 1 8431)); +DATA(insert (8372 8305 8305 1 8434)); +DATA(insert (8372 8305 8305 4 8438)); +DATA(insert (8373 8305 8305 1 8434)); +DATA(insert (8373 8305 8305 2 8438)); +DATA(insert (8373 8305 8305 4 8438)); +DATA(insert (8374 8305 8305 1 8436)); + +DATA(insert (8379 1560 1560 1 8468)); +DATA(insert (8379 1560 1560 3 8209)); + +DATA(insert (8380 1560 1560 1 8469)); +DATA(insert (8380 1560 1560 3 8209)); + +DATA(insert (8381 8307 8307 1 8470)); +DATA(insert (8381 8307 8307 3 8479)); + +DATA(insert (8382 8307 8307 1 8463)); +DATA(insert (8382 8307 8307 3 8479)); + +DATA(insert (8383 8307 8307 1 8463)); +DATA(insert (8383 8307 8307 2 8478)); +DATA(insert (8383 8307 8307 3 8479)); + +DATA(insert (8384 8307 8307 1 8467)); +DATA(insert (8384 8307 8307 3 8479)); + +DATA(insert (8385 8305 8305 1 8431)); +DATA(insert (8385 8305 8305 3 8433)); + +DATA(insert (8386 8305 8305 1 8434)); +DATA(insert (8386 8305 8305 3 8432)); +DATA(insert (8386 8305 8305 4 8438)); + +DATA(insert (8387 8305 8305 1 8434)); +DATA(insert (8387 8305 8305 2 8438)); +DATA(insert (8387 8305 8305 3 8432)); +DATA(insert (8387 8305 8305 4 8438)); + +DATA(insert (8394 1560 1560 1 8469)); +DATA(insert (8394 1560 1560 3 8469)); +DATA(insert (8394 1560 1560 5 8210)); +DATA(insert (8392 8305 8305 1 8450)); +DATA(insert (8397 8307 8307 1 8464)); + +DATA(insert (8375 8305 8305 1 8450)); +DATA(insert (8376 8307 8307 1 8464)); #endif /* PG_AMPROC_H */ diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index e2532011f3e22426757d6245142158162fc1c5b0..86e1fd221a144744a34df6cfa62f426848cb193d 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -596,4 +596,21 @@ DATA(insert ( 1042 3272 3314 i f _null_)); DATA(insert ( 3272 3969 3323 i f _null_)); DATA(insert ( 3969 3272 3321 i f _null_)); +/* vector <-> int[] float4[] float8[] numeric[] text[] varchar[] */ +DATA(insert OID = 8299 (8305 8305 8214 i f _null_)); +DATA(insert OID = 8298 (1007 8305 8215 a f _null_)); +DATA(insert OID = 8297 (1021 8305 8216 a f _null_)); +DATA(insert OID = 8296 (1022 8305 8217 a f _null_)); +DATA(insert OID = 8295 (1231 8305 8218 a f _null_)); +DATA(insert OID = 8294 (8305 1021 8219 i f _null_)); +DATA(insert OID = 8293 (8305 1007 8212 i f _null_)); +DATA(insert OID = 8292 (8305 1022 8213 i f _null_)); +DATA(insert OID = 8291 (8305 1231 8221 i f _null_)); +DATA(insert OID = 8290 (8305 1009 8222 i f _null_)); +DATA(insert OID = 8289 (8305 1015 8223 i f _null_)); + +/* sparsevec <-> int[] float4[] float8[] numeric[] vector */ +DATA(insert OID = 8285 (8307 8307 8228 i f _null_)); +DATA(insert OID = 8284 (8305 8307 8229 i f _null_)); +DATA(insert OID = 8283 (8307 8305 8230 a f _null_)); #endif /* PG_CAST_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 30dc9575eb1d4d2e4ba59313a7e4dbe1d92883f0..72b7f7fcb77f081be3fef667be619ac241d1b1c0 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -95,6 +95,7 @@ typedef struct CheckPointUndo #define XLOG_FPI 0xB0 /* not use in mppdb*/ #define XLOG_DELAY_XLOG_RECYCLE 0xC0 +#define XLOG_MERGE_RECORD 0x01 /* * System status indicator. Note this is stored in pg_control; if you change diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index 7f83a4c8e20d08b0ea284b9a211818135e73fa92..552d2c959d1b9fd751cc05f7d0f4df9133371900 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -375,5 +375,29 @@ DATA(insert ( 405 settext_ops PGNSP PGUID 1995 3272 f 0 )); DATA(insert ( 4439 setasint_ops PGNSP PGUID 6976 3272 t 0 )); DATA(insert ( 405 set_ops PGNSP PGUID 8646 3272 t 0 )); +DATA(insert OID = 8900 (8300 vector_l2_ops PGNSP PGUID 8371 8305 f 0)); +DATA(insert OID = 8999 (8300 vector_ip_ops PGNSP PGUID 8372 8305 f 0)); +DATA(insert OID = 8902 (8300 vector_cosine_ops PGNSP PGUID 8373 8305 f 0)); +DATA(insert OID = 8903 (8300 vector_l1_ops PGNSP PGUID 8374 8305 f 0)); + +DATA(insert OID = 8908 (8300 bit_jaccard_ops PGNSP PGUID 8379 1560 f 0)); +DATA(insert OID = 8909 (8300 bit_hamming_ops PGNSP PGUID 8380 1560 f 0)); + +DATA(insert OID = 8910 (8300 sparsevec_l2_ops PGNSP PGUID 8381 8307 f 0)); +DATA(insert OID = 8911 (8300 sparsevec_ip_ops PGNSP PGUID 8382 8307 f 0)); +DATA(insert OID = 8912 (8300 sparsevec_cosine_ops PGNSP PGUID 8383 8307 f 0)); +DATA(insert OID = 8913 (8300 sparsevec_l1_ops PGNSP PGUID 8384 8307 f 0)); + +DATA(insert OID = 8914 (8301 vector_l2_ops PGNSP PGUID 8385 8305 t 0)); +DATA(insert OID = 8915 (8301 vector_ip_ops PGNSP PGUID 8386 8305 f 0)); +DATA(insert OID = 8916 (8301 vector_cosine_ops PGNSP PGUID 8387 8305 f 0)); + +DATA(insert OID = 8923 (8301 bit_hamming_ops PGNSP PGUID 8394 1560 f 0)); + +DATA(insert OID = 8977 (403 vector_ops PGNSP PGUID 8392 8305 t 0)); +DATA(insert OID = 8979 (403 sparsevec_ops PGNSP PGUID 8397 8307 t 0)); + +DATA(insert OID = 8951 (4439 vector_ops PGNSP PGUID 8375 8305 t 0)); +DATA(insert OID = 8952 (4439 sparsevec_ops PGNSP PGUID 8376 8307 t 0)); #endif /* PG_OPCLASS_H */ diff --git a/src/include/catalog/pg_operator.data b/src/include/catalog/pg_operator.data index 67e260f31083e31d332283c0075eca2f3a38bcc4..71d9af566f1cd27b55e54ba27d98bb2a04ebc020 100644 --- a/src/include/catalog/pg_operator.data +++ b/src/include/catalog/pg_operator.data @@ -1918,6 +1918,63 @@ DESCR("greater than or equal"); DATA(insert OID = 6565 ("-" PGNSP PGUID l f f 0 16 16 0 0 boolum - -)); DESCR("negate"); +DATA(insert OID = 8311 ("<->" PGNSP PGUID b f f 8305 8305 701 8311 0 8433 - -)); +DESCR("l2_distance"); +DATA(insert OID = 8312 ("<#>" PGNSP PGUID b f f 8305 8305 701 8312 0 vector_negative_inner_product - -)); +DESCR("vector_negative_inner_product"); +DATA(insert OID = 8313 ("<=>" PGNSP PGUID b f f 8305 8305 701 8313 0 8435 - -)); +DESCR("cosine_distance"); +DATA(insert OID = 8314 ("<+>" PGNSP PGUID b f f 8305 8305 701 8314 0 8436 - -)); +DESCR("l1_distance"); +DATA(insert OID = 8339 ("||" PGNSP PGUID b f f 8305 8305 8305 0 0 vector_concat - -)); +DESCR("vector_concat"); + +DATA(insert OID = 8319 ("<->" PGNSP PGUID b f f 8307 8307 701 8319 0 8465 - -)); +DESCR("sparsevec_l2_distance"); +DATA(insert OID = 8320 ("<#>" PGNSP PGUID b f f 8307 8307 701 8320 0 sparsevec_negative_inner_product - -)); +DESCR("sparsevec_negative_inner_product"); +DATA(insert OID = 8321 ("<=>" PGNSP PGUID b f f 8307 8307 701 8321 0 8466 - -)); +DESCR("sparsevec_cosine_distance"); +DATA(insert OID = 8322 ("<+>" PGNSP PGUID b f f 8307 8307 701 8322 0 8467 - -)); +DESCR("sparsevec_l1_distance"); + +DATA(insert OID = 8323 ("<~>" PGNSP PGUID b f f 1560 1560 701 8323 0 hamming_distance - -)); +DESCR("hamming_distance"); +DATA(insert OID = 8324 ("<%>" PGNSP PGUID b f f 1560 1560 701 8324 0 jaccard_distance - -)); +DESCR("jaccard_distance"); + +DATA(insert OID = 8325 ("+" PGNSP PGUID b f f 8305 8305 8305 8325 0 vector_add 0 0)); +DESCR("vector_add"); +DATA(insert OID = 8326 ("-" PGNSP PGUID b f f 8305 8305 8305 8326 0 vector_sub 0 0)); +DESCR("vector_sub"); +DATA(insert OID = 8349 ("*" PGNSP PGUID b f f 8305 8305 8305 8349 0 8203 0 0)); +DESCR("vector_mul"); +DATA(insert OID = 8327 ("<" PGNSP PGUID b f f 8305 8305 16 8329 8330 vector_lt scalarltsel scalarltjoinsel)); +DESCR("vector less than"); +DATA(insert OID = 8328 ("<=" PGNSP PGUID b f f 8305 8305 16 8330 8329 vector_le scalarltsel scalarltjoinsel)); +DESCR("vector less than or equal"); +DATA(insert OID = 8329 (">" PGNSP PGUID b f f 8305 8305 16 8327 8328 vector_gt scalargtsel scalargtjoinsel)); +DESCR("vector greater than"); +DATA(insert OID = 8330 (">=" PGNSP PGUID b f f 8305 8305 16 8328 8327 vector_ge scalargtsel scalargtjoinsel)); +DESCR("vector greater than or equal"); +DATA(insert OID = 8331 ("=" PGNSP PGUID b f t 8305 8305 16 8331 8332 vector_eq eqsel eqjoinsel)); +DESCR("vector equal"); +DATA(insert OID = 8332 ("<>" PGNSP PGUID b f f 8305 8305 16 8332 8331 vector_ne neqsel neqjoinsel)); +DESCR("vector unequal"); + +DATA(insert OID = 8333 ("<" PGNSP PGUID b f f 8307 8307 16 8335 8336 sparsevec_lt scalarltsel scalarltjoinsel)); +DESCR("sparsevec less than"); +DATA(insert OID = 8334 ("<=" PGNSP PGUID b f f 8307 8307 16 8336 8335 sparsevec_le scalarltsel scalarltjoinsel)); +DESCR("sparsevec less than or equal"); +DATA(insert OID = 8335 (">" PGNSP PGUID b f f 8307 8307 16 8333 8334 sparsevec_gt scalargtsel scalargtjoinsel)); +DESCR("sparsevec greater than"); +DATA(insert OID = 8336 (">=" PGNSP PGUID b f f 8307 8307 16 8334 8333 sparsevec_ge scalargtsel scalargtjoinsel)); +DESCR("sparsevec greater than or equal"); +DATA(insert OID = 8337 ("=" PGNSP PGUID b f t 8307 8307 16 8337 8338 sparsevec_eq eqsel eqjoinsel)); +DESCR("sparsevec equal"); +DATA(insert OID = 8338 ("<>" PGNSP PGUID b f f 8307 8307 16 8338 8337 sparsevec_ne neqsel neqjoinsel)); +DESCR("sparsevec unequal"); + /* * function prototypes */ \ No newline at end of file diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 740f061d18a79a46be630cebba1a81543d1633e8..72741f312aaca6d7da6b25bb5bf9751a9d774f9d 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -199,6 +199,30 @@ DATA(insert OID = 4262 (4239 int1_ops PGNSP PGUID)); DATA(insert OID = 4263 (4239 bool_ops PGNSP PGUID)); DATA(insert OID = 4264 (4239 smalldatetime_ops PGNSP PGUID)); +/* datavec index ops */ +DATA(insert OID = 8371 (8300 vector_l2_ops PGNSP PGUID)); +DATA(insert OID = 8372 (8300 vector_ip_ops PGNSP PGUID)); +DATA(insert OID = 8373 (8300 vector_cosine_ops PGNSP PGUID)); +DATA(insert OID = 8374 (8300 vector_l1_ops PGNSP PGUID)); + +DATA(insert OID = 8379 (8300 bit_jaccard_ops PGNSP PGUID)); +DATA(insert OID = 8380 (8300 bit_hamming_ops PGNSP PGUID)); + +DATA(insert OID = 8381 (8300 sparsevec_l2_ops PGNSP PGUID)); +DATA(insert OID = 8382 (8300 sparsevec_ip_ops PGNSP PGUID)); +DATA(insert OID = 8383 (8300 sparsevec_cosine_ops PGNSP PGUID)); +DATA(insert OID = 8384 (8300 sparsevec_l1_ops PGNSP PGUID)); + +DATA(insert OID = 8385 (8301 vector_l2_ops PGNSP PGUID)); +DATA(insert OID = 8386 (8301 vector_ip_ops PGNSP PGUID)); +DATA(insert OID = 8387 (8301 vector_cosine_ops PGNSP PGUID)); + +DATA(insert OID = 8394 (8301 bit_hamming_ops PGNSP PGUID)); +DATA(insert OID = 8392 (403 vector_ops PGNSP PGUID)); +DATA(insert OID = 8397 (403 sparsevec_ops PGNSP PGUID)); + +DATA(insert OID = 8375 (4439 vector_ops PGNSP PGUID)); +DATA(insert OID = 8376 (4439 sparsevec_ops PGNSP PGUID)); /* ubtree index */ #define BTREE_UBTREE_FAM_OID_DIFF 5000 #define BTREE_UBTREE_FAM_OID_SPECIAL_DIFF 4000 diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 6f8fdc18733066a098ff322e61f056ea5f240510..6605b1a26555eece1e46bf167a85e4cb46cd469e 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -821,6 +821,19 @@ DATA(insert OID = 3272 ( anyset PGNSP PGUID -1 f s H t t \054 0 0 0 anyset_in DATA(insert OID = 4408 ( undefined PGNSP PGUID -2 f u W f t \054 0 0 0 undefinedin undefinedout undefinedrecv undefinedsend - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("undefined objects at PLSQL compilation time"); #define UNDEFINEDOID 4408 + +DATA(insert OID = 8305 (vector PGNSP PGUID -1 f b U f t \054 0 0 8308 vector_in vector_out vector_recv vector_send vector_typmod_in - - i e f 0 -1 0 0 _null_ _null_ _null_)); +#define VECTOROID 8305 + +DATA(insert OID = 8307 (sparsevec PGNSP PGUID -1 f b U f t \054 0 0 8310 sparsevec_in sparsevec_out sparsevec_recv sparsevec_send sparsevec_typmod_in - - i e f 0 -1 0 0 _null_ _null_ _null_)); +#define SPARSEVECTOROID 8307 + +DATA(insert OID = 8308 ( _vector PGNSP PGUID -1 f b A f t \054 0 8305 0 array_in array_out array_recv array_send vector_typmod_in - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +#define VECTORARRAYOID 8308 + +DATA(insert OID = 8310 ( _sparsevec PGNSP PGUID -1 f b A f t \054 0 8307 0 array_in array_out array_recv array_send sparsevec_typmod_in - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); +#define SPARSEVECARRAYOID 8310 + /* * macros */ diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_901.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_901.sql index 58370c295ce4502f7c7a5d3f4fe7fd441007d712..d9bd2596d15c0af240f72af24da12923926a386c 100644 --- a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_901.sql +++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_92_901.sql @@ -19,10 +19,10 @@ RETURNS void AS $$ DECLARE query_str text; BEGIN -query_str := 'update pg_catalog.pg_am set amsupport = 2, amhandler = 0 where amname = ''btree'' or amname = ''ubtree'''; +query_str := 'update pg_catalog.pg_am set amsupport = 2, amhandler = 0, amdelete = 0 where amname = ''btree'' or amname = ''ubtree'''; EXECUTE(query_str); return; END; $$ LANGUAGE 'plpgsql'; SELECT Update_pg_amproc_temp(); -DROP FUNCTION Update_pg_amproc_temp(); \ No newline at end of file +DROP FUNCTION Update_pg_amproc_temp(); diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_93_019.sql b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_93_019.sql new file mode 100644 index 0000000000000000000000000000000000000000..1bd356568009c0a53969ff6f1d77a6c84754a697 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_maindb/rollback-post_catalog_maindb_93_019.sql @@ -0,0 +1,223 @@ +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; + +DO $$ +DECLARE + cnt int; +BEGIN + select count(*) into cnt from pg_am where amname = 'ivfflat'; + if cnt = 1 then + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; + end if; +END$$; + +DO $$ +DECLARE + cnt int; +BEGIN + select count(*) into cnt from pg_am where amname = 'hnsw'; + if cnt = 1 then + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; + end if; +END$$; + +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'sparsevec' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.l2_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.inner_product(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l1_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_norm(sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_lt(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_le(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_eq(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ne(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ge(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_gt(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_cmp(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec(sparsevec, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<->(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<#>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<+>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>(sparsevec, sparsevec) CASCADE; + end if; +END$$; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_in(cstring, oid, int4) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_typmod_in(_cstring) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_recv(internal, oid, int4) CASCADE; +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'sparsevec' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_out(pg_catalog.sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_send(pg_catalog.sparsevec) CASCADE; + end if; +END$$; +DROP TYPE IF EXISTS pg_catalog.sparsevec CASCADE; +DROP TYPE IF EXISTS pg_catalog._sparsevec CASCADE; + +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'vector' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.inner_product(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l1_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_dims(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_norm(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.binary_quantize(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.subvector(vector, int4, int4) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_add(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_sub(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_mul(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_concat(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_lt(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_le(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_eq(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_ne(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_ge(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_gt(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_cmp(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_l2_squared_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_negative_inner_product(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_spherical_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_accum(_float8, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector(vector, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_float4(vector, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_int4(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_float8(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_numeric(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_text(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_varchar(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_sparsevec(vector, int4, boolean) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<->(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<#>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<+>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.+(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.-(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.*(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.||(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>(vector, vector) CASCADE; + drop aggregate if exists pg_catalog.avg(vector) CASCADE; + drop aggregate if exists pg_catalog.sum(vector) CASCADE; + end if; +END$$; + +DROP FUNCTION IF EXISTS pg_catalog.vector_in(cstring, oid, int4) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_typmod_in(_cstring) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_recv(internal, oid, int4) CASCADE; +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'vector' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.vector_out(pg_catalog.vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_send(pg_catalog.vector) CASCADE; + end if; +END$$; +DROP TYPE IF EXISTS pg_catalog.vector CASCADE; +DROP TYPE IF EXISTS pg_catalog._vector CASCADE; + +DROP OPERATOR IF EXISTS pg_catalog.<~>(bit, bit) CASCADE; +DROP OPERATOR IF EXISTS pg_catalog.<%>(bit, bit) CASCADE; +DROP ACCESS METHOD IF EXISTS ivfflat CASCADE; +DROP ACCESS METHOD IF EXISTS hnsw CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_avg(_float8) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_combine(_float8, _float8) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_int4, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float4, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float8, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_numeric, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuild(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuildempty(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvacuumcleanup(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatoptions(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvalidate(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbeginscan(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatgettuple(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatendscan(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflathandler(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflat_bit_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbuild(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbuildempty(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbulkdelete(internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswvacuumcleanup(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswoptions(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswvalidate(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbeginscan(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswrescan(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswdelete(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswgettuple(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswendscan(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswhandler(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnsw_bit_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnsw_sparsevec_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hamming_distance(bit, bit) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.jaccard_distance(bit, bit) CASCADE; + diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_901.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_901.sql index 58370c295ce4502f7c7a5d3f4fe7fd441007d712..d9bd2596d15c0af240f72af24da12923926a386c 100644 --- a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_901.sql +++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_92_901.sql @@ -19,10 +19,10 @@ RETURNS void AS $$ DECLARE query_str text; BEGIN -query_str := 'update pg_catalog.pg_am set amsupport = 2, amhandler = 0 where amname = ''btree'' or amname = ''ubtree'''; +query_str := 'update pg_catalog.pg_am set amsupport = 2, amhandler = 0, amdelete = 0 where amname = ''btree'' or amname = ''ubtree'''; EXECUTE(query_str); return; END; $$ LANGUAGE 'plpgsql'; SELECT Update_pg_amproc_temp(); -DROP FUNCTION Update_pg_amproc_temp(); \ No newline at end of file +DROP FUNCTION Update_pg_amproc_temp(); diff --git a/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_93_019.sql b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_93_019.sql new file mode 100644 index 0000000000000000000000000000000000000000..1bd356568009c0a53969ff6f1d77a6c84754a697 --- /dev/null +++ b/src/include/catalog/upgrade_sql/rollback_catalog_otherdb/rollback-post_catalog_otherdb_93_019.sql @@ -0,0 +1,223 @@ +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; + +DO $$ +DECLARE + cnt int; +BEGIN + select count(*) into cnt from pg_am where amname = 'ivfflat'; + if cnt = 1 then + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; + end if; +END$$; + +DO $$ +DECLARE + cnt int; +BEGIN + select count(*) into cnt from pg_am where amname = 'hnsw'; + if cnt = 1 then + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; + DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; + DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; + end if; +END$$; + +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'sparsevec' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.l2_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.inner_product(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l1_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_norm(sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_lt(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_le(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_eq(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ne(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ge(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_gt(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_cmp(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec(sparsevec, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<->(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<#>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<+>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<>(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>=(sparsevec, sparsevec) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>(sparsevec, sparsevec) CASCADE; + end if; +END$$; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_in(cstring, oid, int4) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_typmod_in(_cstring) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_recv(internal, oid, int4) CASCADE; +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'sparsevec' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_out(pg_catalog.sparsevec) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.sparsevec_send(pg_catalog.sparsevec) CASCADE; + end if; +END$$; +DROP TYPE IF EXISTS pg_catalog.sparsevec CASCADE; +DROP TYPE IF EXISTS pg_catalog._sparsevec CASCADE; + +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'vector' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.inner_product(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l1_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_dims(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_norm(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.binary_quantize(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.subvector(vector, int4, int4) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_add(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_sub(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_mul(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_concat(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_lt(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_le(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_eq(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_ne(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_ge(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_gt(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_cmp(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_l2_squared_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_negative_inner_product(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_spherical_distance(vector, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_accum(_float8, vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector(vector, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_float4(vector, int4, boolean) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_int4(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_float8(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_numeric(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_text(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_varchar(vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_to_sparsevec(vector, int4, boolean) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<->(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<#>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<+>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.+(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.-(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.*(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.||(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.<>(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>=(vector, vector) CASCADE; + DROP OPERATOR IF EXISTS pg_catalog.>(vector, vector) CASCADE; + drop aggregate if exists pg_catalog.avg(vector) CASCADE; + drop aggregate if exists pg_catalog.sum(vector) CASCADE; + end if; +END$$; + +DROP FUNCTION IF EXISTS pg_catalog.vector_in(cstring, oid, int4) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_typmod_in(_cstring) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_recv(internal, oid, int4) CASCADE; +DO $$ +DECLARE +ans boolean; +BEGIN + select case when count(*)=1 then true else false end as ans from (select * from pg_type where typname = 'vector' limit 1) into ans; + if ans = true then + DROP FUNCTION IF EXISTS pg_catalog.vector_out(pg_catalog.vector) CASCADE; + DROP FUNCTION IF EXISTS pg_catalog.vector_send(pg_catalog.vector) CASCADE; + end if; +END$$; +DROP TYPE IF EXISTS pg_catalog.vector CASCADE; +DROP TYPE IF EXISTS pg_catalog._vector CASCADE; + +DROP OPERATOR IF EXISTS pg_catalog.<~>(bit, bit) CASCADE; +DROP OPERATOR IF EXISTS pg_catalog.<%>(bit, bit) CASCADE; +DROP ACCESS METHOD IF EXISTS ivfflat CASCADE; +DROP ACCESS METHOD IF EXISTS hnsw CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_avg(_float8) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.vector_combine(_float8, _float8) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_int4, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float4, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float8, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_numeric, int4, boolean) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuild(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuildempty(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvacuumcleanup(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatoptions(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvalidate(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbeginscan(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatgettuple(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflatendscan(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflathandler(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.ivfflat_bit_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbuild(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbuildempty(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbulkdelete(internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswvacuumcleanup(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswoptions(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswvalidate(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswbeginscan(internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswrescan(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswdelete(internal, internal, internal, internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswgettuple(internal, internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswendscan(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnswhandler(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnsw_bit_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hnsw_sparsevec_support(internal) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.hamming_distance(bit, bit) CASCADE; +DROP FUNCTION IF EXISTS pg_catalog.jaccard_distance(bit, bit) CASCADE; + diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_901.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_901.sql index ff791870274dc48356824cf8334e13af3a02d113..ff2d3320b04a4207231637f8e8367c569a31d0f3 100644 --- a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_901.sql +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_92_901.sql @@ -3,7 +3,7 @@ RETURNS void AS $$ DECLARE query_str text; BEGIN -query_str := 'update pg_catalog.pg_am set amsupport = 3, amhandler = 0 where amname = ''btree'' or amname = ''ubtree'''; +query_str := 'update pg_catalog.pg_am set amsupport = 3, amhandler = 0, amdelete = 0 where amname = ''btree'' or amname = ''ubtree'''; EXECUTE(query_str); return; END; $$ LANGUAGE 'plpgsql'; @@ -83,4 +83,4 @@ return; END; $$ LANGUAGE 'plpgsql'; SELECT Insert_pg_amproc_temp(); -DROP FUNCTION Insert_pg_amproc_temp(); \ No newline at end of file +DROP FUNCTION Insert_pg_amproc_temp(); diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_93_019.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_93_019.sql new file mode 100644 index 0000000000000000000000000000000000000000..c0036af1bca1ef57f48767f5868f6e1a61cba93b --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_maindb/upgrade-post_catalog_maindb_93_019.sql @@ -0,0 +1,1437 @@ +DROP TYPE IF EXISTS pg_catalog.vector CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_TYPE, 8305, 8308, b; +CREATE TYPE pg_catalog.vector; + +DROP FUNCTION IF EXISTS pg_catalog.vector_in(cstring, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8423; +CREATE FUNCTION pg_catalog.vector_in(cstring, oid, int4) +RETURNS vector +AS 'vector_in' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_out(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8424; +CREATE FUNCTION pg_catalog.vector_out(vector) +RETURNS cstring +AS 'vector_out' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_typmod_in(_cstring) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8425; +CREATE FUNCTION pg_catalog.vector_typmod_in(_cstring) +RETURNS int4 +AS 'vector_typmod_in' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_recv(internal, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8426; +CREATE FUNCTION pg_catalog.vector_recv(internal, oid, int4) +RETURNS vector +AS 'vector_recv' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_send(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8427; +CREATE FUNCTION pg_catalog.vector_send(vector) +RETURNS bytea +AS 'vector_send' +LANGUAGE INTERNAL +STABLE STRICT; + +CREATE TYPE pg_catalog.vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +DROP FUNCTION IF EXISTS pg_catalog.l2_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8433; +CREATE FUNCTION pg_catalog.l2_distance(vector, vector) +RETURNS float8 +AS 'l2_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.inner_product(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8437; +CREATE FUNCTION pg_catalog.inner_product(vector, vector) +RETURNS float8 +AS 'inner_product' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8435; +CREATE FUNCTION pg_catalog.cosine_distance(vector, vector) +RETURNS float8 +AS 'cosine_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l1_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8436; +CREATE FUNCTION pg_catalog.l1_distance(vector, vector) +RETURNS float8 +AS 'l1_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_dims(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8428; +CREATE FUNCTION pg_catalog.vector_dims(vector) +RETURNS int4 +AS 'vector_dims' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_norm(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8438; +CREATE FUNCTION pg_catalog.vector_norm(vector) +RETURNS float8 +AS 'vector_norm' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8200; +CREATE FUNCTION pg_catalog.l2_normalize(vector) +RETURNS vector +AS 'l2_normalize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.binary_quantize(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8201; +CREATE FUNCTION pg_catalog.binary_quantize(vector) +RETURNS varbit +AS 'binary_quantize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.subvector(vector, int, int) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8202; +CREATE FUNCTION pg_catalog.subvector(vector, int, int) +RETURNS vector +AS 'subvector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_add(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8439; +CREATE FUNCTION pg_catalog.vector_add(vector, vector) +RETURNS vector +AS 'vector_add' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_sub(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8440; +CREATE FUNCTION pg_catalog.vector_sub(vector, vector) +RETURNS vector +AS 'vector_sub' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_mul(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8203; +CREATE FUNCTION pg_catalog.vector_mul(vector, vector) +RETURNS vector +AS 'vector_mul' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_concat(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8204; +CREATE FUNCTION pg_catalog.vector_concat(vector, vector) +RETURNS vector +AS 'vector_concat' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_lt(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8441; +CREATE FUNCTION pg_catalog.vector_lt(vector, vector) +RETURNS bool +AS 'vector_lt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_le(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8442; +CREATE FUNCTION pg_catalog.vector_le(vector, vector) +RETURNS bool +AS 'vector_le' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_eq(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8443; +CREATE FUNCTION pg_catalog.vector_eq(vector, vector) +RETURNS bool +AS 'vector_eq' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_ne(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8444; +CREATE FUNCTION pg_catalog.vector_ne(vector, vector) +RETURNS bool +AS 'vector_ne' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_ge(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8445; +CREATE FUNCTION pg_catalog.vector_ge(vector, vector) +RETURNS bool +AS 'vector_ge' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_gt(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8446; +CREATE FUNCTION pg_catalog.vector_gt(vector, vector) +RETURNS bool +AS 'vector_gt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_cmp(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8450; +CREATE FUNCTION pg_catalog.vector_cmp(vector, vector) +RETURNS int4 +AS 'vector_cmp' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_l2_squared_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8431; +CREATE FUNCTION pg_catalog.vector_l2_squared_distance(vector, vector) +RETURNS float8 +AS 'vector_l2_squared_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_negative_inner_product(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8434; +CREATE FUNCTION pg_catalog.vector_negative_inner_product(vector, vector) +RETURNS float8 +AS 'vector_negative_inner_product' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_spherical_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8432; +CREATE FUNCTION pg_catalog.vector_spherical_distance(vector, vector) +RETURNS float8 +AS 'vector_spherical_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_accum(_float8, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8447; +CREATE FUNCTION pg_catalog.vector_accum(_float8, vector) +RETURNS _float8 +AS 'vector_accum' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_avg(_float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8449; +CREATE FUNCTION pg_catalog.vector_avg(_float8) +RETURNS vector +AS 'vector_avg' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_combine(_float8, _float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8448; +CREATE FUNCTION pg_catalog.vector_combine(_float8, _float8) +RETURNS _float8 +AS 'vector_combine' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8214; +CREATE FUNCTION pg_catalog.vector(vector, int4, boolean) +RETURNS vector +AS 'vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_int4, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8215; +CREATE FUNCTION pg_catalog.array_to_vector(_int4, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float4, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8216; +CREATE FUNCTION pg_catalog.array_to_vector(_float4, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float8, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8217; +CREATE FUNCTION pg_catalog.array_to_vector(_float8, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_numeric, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8218; +CREATE FUNCTION pg_catalog.array_to_vector(_numeric, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_float4(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8219; +CREATE FUNCTION pg_catalog.vector_to_float4(vector, int4, boolean) +RETURNS _float4 +AS 'vector_to_float4' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_int4(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8212; +CREATE FUNCTION pg_catalog.vector_to_int4(vector) +RETURNS _int4 +AS 'vector_to_int4' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_float8(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8213; +CREATE FUNCTION pg_catalog.vector_to_float8(vector) +RETURNS _float8 +AS 'vector_to_float8' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_numeric(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8221; +CREATE FUNCTION pg_catalog.vector_to_numeric(vector) +RETURNS _numeric +AS 'vector_to_numeric' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_text(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8222; +CREATE FUNCTION pg_catalog.vector_to_text(vector) +RETURNS _text +AS 'vector_to_text' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_varchar(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8223; +CREATE FUNCTION pg_catalog.vector_to_varchar(vector) +RETURNS _varchar +AS 'vector_to_varchar' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuild(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8417; +CREATE FUNCTION pg_catalog.ivfflatbuild(internal, internal, internal) +RETURNS internal +AS 'ivfflatbuild' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuildempty(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8418; +CREATE FUNCTION pg_catalog.ivfflatbuildempty(internal) +RETURNS void +AS 'ivfflatbuildempty' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8412; +CREATE FUNCTION pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) +RETURNS boolean +AS 'ivfflatinsert' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8419; +CREATE FUNCTION pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) +RETURNS internal +AS 'ivfflatbulkdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvacuumcleanup(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8420; +CREATE FUNCTION pg_catalog.ivfflatvacuumcleanup(internal, internal) +RETURNS internal +AS 'ivfflatvacuumcleanup' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8421; +CREATE FUNCTION pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) +RETURNS void +AS 'ivfflatcostestimate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatoptions(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8422; +CREATE FUNCTION pg_catalog.ivfflatoptions(internal, internal) +RETURNS internal +AS 'ivfflatoptions' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvalidate(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8205; +CREATE FUNCTION pg_catalog.ivfflatvalidate(internal) +RETURNS boolean +AS 'ivfflatvalidate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbeginscan(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8413; +CREATE FUNCTION pg_catalog.ivfflatbeginscan(internal, internal, internal) +RETURNS internal +AS 'ivfflatbeginscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8415; +CREATE FUNCTION pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) +RETURNS void +AS 'ivfflatrescan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatgettuple(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8414; +CREATE FUNCTION pg_catalog.ivfflatgettuple(internal, internal) +RETURNS boolean +AS 'ivfflatgettuple' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatendscan(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8416; +CREATE FUNCTION pg_catalog.ivfflatendscan(internal) +RETURNS void +AS 'ivfflatendscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflathandler(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8206; +CREATE FUNCTION pg_catalog.ivfflathandler(internal) +RETURNS internal +AS 'ivfflathandler' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbuild(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8406; +CREATE FUNCTION pg_catalog.hnswbuild(internal, internal, internal) +RETURNS internal +AS 'hnswbuild' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbuildempty(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8407; +CREATE FUNCTION pg_catalog.hnswbuildempty(internal) +RETURNS void +AS 'hnswbuildempty' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8401; +CREATE FUNCTION pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) +RETURNS boolean +AS 'hnswinsert' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbulkdelete(internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8408; +CREATE FUNCTION pg_catalog.hnswbulkdelete(internal, internal, internal, internal) +RETURNS internal +AS 'hnswbulkdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswvacuumcleanup(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8409; +CREATE FUNCTION pg_catalog.hnswvacuumcleanup(internal, internal) +RETURNS internal +AS 'hnswvacuumcleanup' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8410; +CREATE FUNCTION pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) +RETURNS void +AS 'hnswcostestimate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswoptions(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8411; +CREATE FUNCTION pg_catalog.hnswoptions(internal, internal) +RETURNS internal +AS 'hnswoptions' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswvalidate(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8207; +CREATE FUNCTION pg_catalog.hnswvalidate(internal) +RETURNS boolean +AS 'hnswvalidate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbeginscan(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8402; +CREATE FUNCTION pg_catalog.hnswbeginscan(internal, internal, internal) +RETURNS internal +AS 'hnswbeginscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswrescan(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8404; +CREATE FUNCTION pg_catalog.hnswrescan(internal, internal, internal, internal, internal) +RETURNS void +AS 'hnswrescan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswdelete(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8429; +CREATE FUNCTION pg_catalog.hnswdelete(internal, internal, internal, internal, internal) +RETURNS boolean +AS 'hnswdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswgettuple(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8403; +CREATE FUNCTION pg_catalog.hnswgettuple(internal, internal) +RETURNS boolean +AS 'hnswgettuple' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswendscan(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8405; +CREATE FUNCTION pg_catalog.hnswendscan(internal) +RETURNS void +AS 'hnswendscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswhandler(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8208; +CREATE FUNCTION pg_catalog.hnswhandler(internal) +RETURNS internal +AS 'hnswhandler' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflat_bit_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8210; +CREATE FUNCTION pg_catalog.ivfflat_bit_support(internal) +RETURNS internal +AS 'ivfflat_bit_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnsw_bit_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8209; +CREATE FUNCTION pg_catalog.hnsw_bit_support(internal) +RETURNS internal +AS 'hnsw_bit_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnsw_sparsevec_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8479; +CREATE FUNCTION pg_catalog.hnsw_sparsevec_support(internal) +RETURNS internal +AS 'hnsw_sparsevec_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hamming_distance(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8469; +CREATE FUNCTION pg_catalog.hamming_distance(bit, bit) +RETURNS float8 +AS 'hamming_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.jaccard_distance(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8468; +CREATE FUNCTION pg_catalog.jaccard_distance(bit, bit) +RETURNS float8 +AS 'jaccard_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP TYPE IF EXISTS pg_catalog.sparsevec CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_TYPE, 8307, 8310, b; +CREATE TYPE pg_catalog.sparsevec; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_in(cstring, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8458; +CREATE FUNCTION pg_catalog.sparsevec_in(cstring, oid, int4) +RETURNS sparsevec +AS 'sparsevec_in' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_out(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8459; +CREATE FUNCTION pg_catalog.sparsevec_out(sparsevec) +RETURNS cstring +AS 'sparsevec_out' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_typmod_in(_cstring) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8460; +CREATE FUNCTION pg_catalog.sparsevec_typmod_in(_cstring) +RETURNS int4 +AS 'sparsevec_typmod_in' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_recv(internal, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8461; +CREATE FUNCTION pg_catalog.sparsevec_recv(internal, oid, int4) +RETURNS sparsevec +AS 'sparsevec_recv' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_send(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8462; +CREATE FUNCTION pg_catalog.sparsevec_send(sparsevec) +RETURNS bytea +AS 'sparsevec_send' +LANGUAGE INTERNAL +STABLE STRICT; + +CREATE TYPE pg_catalog.sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +DROP FUNCTION IF EXISTS pg_catalog.l2_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8465; +CREATE FUNCTION pg_catalog.l2_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l2_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.inner_product(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8471; +CREATE FUNCTION pg_catalog.inner_product(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_inner_product' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8466; +CREATE FUNCTION pg_catalog.cosine_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_cosine_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l1_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8467; +CREATE FUNCTION pg_catalog.l1_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l1_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_norm(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8478; +CREATE FUNCTION pg_catalog.l2_norm(sparsevec) +RETURNS float8 +AS 'sparsevec_l2_norm' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8211; +CREATE FUNCTION pg_catalog.l2_normalize(sparsevec) +RETURNS sparsevec +AS 'sparsevec_l2_normalize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_lt(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8472; +CREATE FUNCTION pg_catalog.sparsevec_lt(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_lt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_le(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8473; +CREATE FUNCTION pg_catalog.sparsevec_le(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_le' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_eq(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8474; +CREATE FUNCTION pg_catalog.sparsevec_eq(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_eq' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ne(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8475; +CREATE FUNCTION pg_catalog.sparsevec_ne(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_ne' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ge(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8476; +CREATE FUNCTION pg_catalog.sparsevec_ge(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_ge' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_gt(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8477; +CREATE FUNCTION pg_catalog.sparsevec_gt(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_gt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_cmp(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8464; +CREATE FUNCTION pg_catalog.sparsevec_cmp(sparsevec, sparsevec) +RETURNS int4 +AS 'sparsevec_cmp' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8470; +CREATE FUNCTION pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l2_squared_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8463; +CREATE FUNCTION pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_negative_inner_product' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec(sparsevec, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8228; +CREATE FUNCTION pg_catalog.sparsevec(sparsevec, int4, boolean) +RETURNS sparsevec +AS 'sparsevec' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_sparsevec(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8229; +CREATE FUNCTION pg_catalog.vector_to_sparsevec(vector, int4, boolean) +RETURNS sparsevec +AS 'vector_to_sparsevec' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8230; +CREATE FUNCTION pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) +RETURNS vector +AS 'sparsevec_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +COMMENT ON FUNCTION pg_catalog.vector_in(cstring, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_out(vector) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_typmod_in(_cstring) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_recv(internal, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_send(vector) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.l2_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.inner_product(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.cosine_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l1_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_dims(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_norm(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_normalize(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.binary_quantize(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.subvector(vector, int, int) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_add(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_sub(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_mul(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_concat(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_lt(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_le(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_eq(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_ne(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_ge(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_gt(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_cmp(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_l2_squared_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_negative_inner_product(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_spherical_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_accum(_float8, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_avg(_float8) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_combine(_float8,_float8) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_int4, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_float4, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_float8, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_numeric, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_float4(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_int4(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_float8(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_numeric(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_text(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_varchar(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbuild(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbuildempty(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatvacuumcleanup(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatoptions(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatvalidate(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbeginscan(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatgettuple(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatendscan(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflathandler(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbuild(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbuildempty(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbulkdelete(internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswvacuumcleanup(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswoptions(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswvalidate(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbeginscan(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswrescan(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswdelete(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswgettuple(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswendscan(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswhandler(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflat_bit_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnsw_bit_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnsw_sparsevec_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hamming_distance(bit, bit) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.jaccard_distance(bit, bit) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_in(cstring, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_out(sparsevec) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_typmod_in(_cstring) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_recv(internal, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_send(sparsevec) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.l2_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.inner_product(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.cosine_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l1_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_norm(sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_normalize(sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_lt(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_le(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_eq(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_ne(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_ge(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_gt(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_cmp(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec(sparsevec, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_sparsevec(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) IS 'NULL'; + +drop aggregate if exists pg_catalog.avg(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8241; +create aggregate pg_catalog.avg(vector) (SFUNC=vector_accum, STYPE= _float8, finalfunc = vector_avg,CFUNC = vector_combine,INITCOND = '{0}', INITCOLLECT='{0}'); +COMMENT ON aggregate pg_catalog.avg(vector) IS 'concatenate aggregate input into an array'; + +drop aggregate if exists pg_catalog.sum(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8242; +create aggregate pg_catalog.sum(vector) (SFUNC=vector_add, STYPE= vector, CFUNC = vector_add); +COMMENT ON aggregate pg_catalog.sum(vector) IS 'the average (arithmetic mean) as numeric of all bigint values'; + +DROP CAST IF EXISTS (vector AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8299; +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _varchar) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8289; +CREATE CAST (vector AS _varchar) + WITH FUNCTION vector_to_varchar(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _text) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8290; +CREATE CAST (vector AS _text) + WITH FUNCTION vector_to_text(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _numeric) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8291; +CREATE CAST (vector AS _numeric) + WITH FUNCTION vector_to_numeric(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8292; +CREATE CAST (vector AS _float8) + WITH FUNCTION vector_to_float8(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8293; +CREATE CAST (vector AS _int4) + WITH FUNCTION vector_to_int4(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _float4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8294; +CREATE CAST (vector AS _float4) + WITH FUNCTION vector_to_float4(vector, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (_int4 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8298; +CREATE CAST (_int4 AS vector) + WITH FUNCTION array_to_vector(_int4, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_float4 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8297; +CREATE CAST (_float4 AS vector) + WITH FUNCTION array_to_vector(_float4, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_float8 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8296; +CREATE CAST (_float8 AS vector) + WITH FUNCTION array_to_vector(_float8, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_numeric AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8295; +CREATE CAST (_numeric AS vector) + WITH FUNCTION array_to_vector(_numeric, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (sparsevec AS sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8285; +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (sparsevec AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8283; +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (vector AS sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8284; +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, int4, boolean) AS IMPLICIT; + +DROP ACCESS METHOD IF EXISTS ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8301; +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +DROP ACCESS METHOD IF EXISTS hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8300; +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +SET search_path = 'pg_catalog'; + +DROP OPERATOR IF EXISTS pg_catalog.<->(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8311; +CREATE OPERATOR pg_catalog.<->( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.l2_distance, + COMMUTATOR = '<->' +); + +DROP OPERATOR IF EXISTS pg_catalog.<#>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8312; +CREATE OPERATOR pg_catalog.<#>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<=>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8313; +CREATE OPERATOR pg_catalog.<=>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.cosine_distance, + COMMUTATOR = '<=>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<+>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8314; +CREATE OPERATOR pg_catalog.<+>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.l1_distance, + COMMUTATOR = '<+>' +); + +DROP OPERATOR IF EXISTS pg_catalog.+(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8325; +CREATE OPERATOR pg_catalog.+( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_add, + COMMUTATOR = '+' +); + +DROP OPERATOR IF EXISTS pg_catalog.-(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8326; +CREATE OPERATOR pg_catalog.-( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_sub, + COMMUTATOR = '-' +); + +DROP OPERATOR IF EXISTS pg_catalog.*(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8349; +CREATE OPERATOR pg_catalog.*( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_mul, + COMMUTATOR = '*' +); + +DROP OPERATOR IF EXISTS pg_catalog.||(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8339; +CREATE OPERATOR pg_catalog.||( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_concat +); + +DROP OPERATOR IF EXISTS pg_catalog.<(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8327; +CREATE OPERATOR pg_catalog.<( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_lt, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8328; +CREATE OPERATOR pg_catalog.<=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_le, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8331; +CREATE OPERATOR pg_catalog.=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_eq, + COMMUTATOR = '=' , + RESTRICT = eqsel, JOIN = eqjoinsel, HASHES +); + +DROP OPERATOR IF EXISTS pg_catalog.<>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8332; +CREATE OPERATOR pg_catalog.<>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_ne, + COMMUTATOR = '<>' , NEGATOR = '=' , + RESTRICT = neqsel, JOIN = neqjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8330; +CREATE OPERATOR pg_catalog.>=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_ge, + COMMUTATOR = '<=' , NEGATOR = '<' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8329; +CREATE OPERATOR pg_catalog.>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_gt, + COMMUTATOR = '<' , NEGATOR = '<=' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<~>(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8323; +CREATE OPERATOR pg_catalog.<~>( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = pg_catalog.hamming_distance, + COMMUTATOR = '<~>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<%>(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8324; +CREATE OPERATOR pg_catalog.<%>( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = pg_catalog.jaccard_distance, + COMMUTATOR = '<%>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<->(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8319; +CREATE OPERATOR pg_catalog.<->( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.l2_distance, + COMMUTATOR = '<->' +); + +DROP OPERATOR IF EXISTS pg_catalog.<#>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8320; +CREATE OPERATOR pg_catalog.<#>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<=>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8321; +CREATE OPERATOR pg_catalog.<=>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.cosine_distance, + COMMUTATOR = '<=>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<+>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8322; +CREATE OPERATOR pg_catalog.<+>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.l1_distance, + COMMUTATOR = '<+>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8333; +CREATE OPERATOR pg_catalog.<( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_lt, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8334; +CREATE OPERATOR pg_catalog.<=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_le, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8337; +CREATE OPERATOR pg_catalog.=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_eq, + COMMUTATOR = '=' , + RESTRICT = eqsel, JOIN = eqjoinsel, HASHES +); + +DROP OPERATOR IF EXISTS pg_catalog.<>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8338; +CREATE OPERATOR pg_catalog.<>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_ne, + COMMUTATOR = '<>' , NEGATOR = '=' , + RESTRICT = neqsel, JOIN = neqjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8336; +CREATE OPERATOR pg_catalog.>=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_ge, + COMMUTATOR = '<=' , NEGATOR = '<' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8335; +CREATE OPERATOR pg_catalog.>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_gt, + COMMUTATOR = '<' , NEGATOR = '<=' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +COMMENT ON OPERATOR pg_catalog.<->(vector,vector) IS 'l2_distance'; +COMMENT ON OPERATOR pg_catalog.<#>(vector,vector) IS 'vector_negative_inner_product'; +COMMENT ON OPERATOR pg_catalog.<=>(vector,vector) IS 'cosine_distance'; +COMMENT ON OPERATOR pg_catalog.<+>(vector,vector) IS 'l1_distance'; +COMMENT ON OPERATOR pg_catalog.||(vector,vector) IS 'vector_concat'; +COMMENT ON OPERATOR pg_catalog.+(vector,vector) IS 'vector_add'; +COMMENT ON OPERATOR pg_catalog.-(vector,vector) IS 'vector_sub'; +COMMENT ON OPERATOR pg_catalog.*(vector,vector) IS 'vector_mul'; +COMMENT ON OPERATOR pg_catalog.<(vector,vector) IS 'vector less than'; +COMMENT ON OPERATOR pg_catalog.<=(vector,vector) IS 'vector less than or equal'; +COMMENT ON OPERATOR pg_catalog.>(vector,vector) IS 'vector greater than'; +COMMENT ON OPERATOR pg_catalog.>=(vector,vector) IS 'vector greater than or equal'; +COMMENT ON OPERATOR pg_catalog.=(vector,vector) IS 'vector equal'; +COMMENT ON OPERATOR pg_catalog.<>(vector,vector) IS 'vector unequal'; +COMMENT ON OPERATOR pg_catalog.<~>(bit,bit) IS 'hamming_distance'; +COMMENT ON OPERATOR pg_catalog.<%>(bit,bit) IS 'jaccard_distance'; +COMMENT ON OPERATOR pg_catalog.<->(sparsevec,sparsevec) IS 'sparsevec_l2_distance'; +COMMENT ON OPERATOR pg_catalog.<#>(sparsevec,sparsevec) IS 'sparsevec_negative_inner_product'; +COMMENT ON OPERATOR pg_catalog.<=>(sparsevec,sparsevec) IS 'sparsevec_cosine_distance'; +COMMENT ON OPERATOR pg_catalog.<+>(sparsevec,sparsevec) IS 'sparsevec_l1_distance'; +COMMENT ON OPERATOR pg_catalog.<(sparsevec,sparsevec) IS 'sparsevec less than'; +COMMENT ON OPERATOR pg_catalog.<=(sparsevec,sparsevec) IS 'sparsevec less than or equal'; +COMMENT ON OPERATOR pg_catalog.>(sparsevec,sparsevec) IS 'sparsevec greater than'; +COMMENT ON OPERATOR pg_catalog.>=(sparsevec,sparsevec) IS 'sparsevec greater than or equal'; +COMMENT ON OPERATOR pg_catalog.=(sparsevec,sparsevec) IS 'sparsevec equal'; +COMMENT ON OPERATOR pg_catalog.<>(sparsevec,sparsevec) IS 'sparsevec unequal'; + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8392; +CREATE OPERATOR FAMILY pg_catalog.vector_ops USING btree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8977; +CREATE OPERATOR CLASS pg_catalog.vector_ops DEFAULT + FOR TYPE vector USING btree as + OPERATOR 1 pg_catalog.<(vector, vector), + OPERATOR 2 pg_catalog.<=(vector, vector), + OPERATOR 3 pg_catalog.=(vector, vector), + OPERATOR 4 pg_catalog.>=(vector, vector), + OPERATOR 5 pg_catalog.>(vector, vector), + FUNCTION 1 pg_catalog.vector_cmp(vector,vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8375; +CREATE OPERATOR FAMILY pg_catalog.vector_ops USING ubtree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8951; +CREATE OPERATOR CLASS pg_catalog.vector_ops DEFAULT + FOR TYPE vector USING ubtree AS + OPERATOR 1 pg_catalog.<(vector, vector), + OPERATOR 2 pg_catalog.<=(vector, vector), + OPERATOR 3 pg_catalog.=(vector, vector), + OPERATOR 4 pg_catalog.>=(vector, vector), + OPERATOR 5 pg_catalog.>(vector, vector), + FUNCTION 1 pg_catalog.vector_cmp(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8385; +CREATE OPERATOR FAMILY pg_catalog.vector_l2_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8914; +CREATE OPERATOR CLASS pg_catalog.vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<->(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_l2_squared_distance(vector, vector), + FUNCTION 3 pg_catalog.l2_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8386; +CREATE OPERATOR FAMILY pg_catalog.vector_ip_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8915; +CREATE OPERATOR CLASS pg_catalog.vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<#>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 3 pg_catalog.vector_spherical_distance(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8387; +CREATE OPERATOR FAMILY pg_catalog.vector_cosine_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8916; +CREATE OPERATOR CLASS pg_catalog.vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<=>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 2 pg_catalog.vector_norm(vector), + FUNCTION 3 pg_catalog.vector_spherical_distance(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8371; +CREATE OPERATOR FAMILY pg_catalog.vector_l2_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8900; +CREATE OPERATOR CLASS pg_catalog.vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<->(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_l2_squared_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8372; +CREATE OPERATOR FAMILY pg_catalog.vector_ip_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8999; +CREATE OPERATOR CLASS pg_catalog.vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<#>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8373; +CREATE OPERATOR FAMILY pg_catalog.vector_cosine_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8902; +CREATE OPERATOR CLASS pg_catalog.vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<=>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 2 pg_catalog.vector_norm(vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8374; +CREATE OPERATOR FAMILY pg_catalog.vector_l1_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8903; +CREATE OPERATOR CLASS pg_catalog.vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<+>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.l1_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8394; +CREATE OPERATOR FAMILY pg_catalog.bit_hamming_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8923; +CREATE OPERATOR CLASS pg_catalog.bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 pg_catalog.<~>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.hamming_distance(bit, bit), + FUNCTION 3 pg_catalog.hamming_distance(bit, bit), + FUNCTION 5 pg_catalog.ivfflat_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8380; +CREATE OPERATOR FAMILY pg_catalog.bit_hamming_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8909; +CREATE OPERATOR CLASS pg_catalog.bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 pg_catalog.<~>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.hamming_distance(bit, bit), + FUNCTION 3 pg_catalog.hnsw_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8379; +CREATE OPERATOR FAMILY pg_catalog.bit_jaccard_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8908; +CREATE OPERATOR CLASS pg_catalog.bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 pg_catalog.<%>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.jaccard_distance(bit, bit), + FUNCTION 3 pg_catalog.hnsw_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8397; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ops USING btree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8979; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 pg_catalog.<(sparsevec, sparsevec), + OPERATOR 2 pg_catalog.<=(sparsevec, sparsevec), + OPERATOR 3 pg_catalog.=(sparsevec, sparsevec), + OPERATOR 4 pg_catalog.>=(sparsevec, sparsevec), + OPERATOR 5 pg_catalog.>(sparsevec, sparsevec), + FUNCTION 1 pg_catalog.sparsevec_cmp(sparsevec, sparsevec); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8376; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ops USING ubtree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8952; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ops DEFAULT + FOR TYPE sparsevec USING ubtree AS + OPERATOR 1 pg_catalog.<(sparsevec, sparsevec), + OPERATOR 2 pg_catalog.<=(sparsevec, sparsevec), + OPERATOR 3 pg_catalog.=(sparsevec, sparsevec), + OPERATOR 4 pg_catalog.>=(sparsevec, sparsevec), + OPERATOR 5 pg_catalog.>(sparsevec, sparsevec), + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8381; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_l2_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8910; +CREATE OPERATOR CLASS pg_catalog.sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<->(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8382; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ip_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8911; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<#>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8383; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_cosine_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8912; +CREATE OPERATOR CLASS pg_catalog.sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<=>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 pg_catalog.l2_norm(sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8384; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_l1_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8913; +CREATE OPERATOR CLASS pg_catalog.sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<+>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.l1_distance(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); diff --git a/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_93_019.sql b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_93_019.sql new file mode 100644 index 0000000000000000000000000000000000000000..c0036af1bca1ef57f48767f5868f6e1a61cba93b --- /dev/null +++ b/src/include/catalog/upgrade_sql/upgrade_catalog_otherdb/upgrade-post_catalog_otherdb_93_019.sql @@ -0,0 +1,1437 @@ +DROP TYPE IF EXISTS pg_catalog.vector CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_TYPE, 8305, 8308, b; +CREATE TYPE pg_catalog.vector; + +DROP FUNCTION IF EXISTS pg_catalog.vector_in(cstring, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8423; +CREATE FUNCTION pg_catalog.vector_in(cstring, oid, int4) +RETURNS vector +AS 'vector_in' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_out(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8424; +CREATE FUNCTION pg_catalog.vector_out(vector) +RETURNS cstring +AS 'vector_out' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_typmod_in(_cstring) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8425; +CREATE FUNCTION pg_catalog.vector_typmod_in(_cstring) +RETURNS int4 +AS 'vector_typmod_in' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_recv(internal, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8426; +CREATE FUNCTION pg_catalog.vector_recv(internal, oid, int4) +RETURNS vector +AS 'vector_recv' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_send(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8427; +CREATE FUNCTION pg_catalog.vector_send(vector) +RETURNS bytea +AS 'vector_send' +LANGUAGE INTERNAL +STABLE STRICT; + +CREATE TYPE pg_catalog.vector ( + INPUT = vector_in, + OUTPUT = vector_out, + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send, + STORAGE = external +); + +DROP FUNCTION IF EXISTS pg_catalog.l2_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8433; +CREATE FUNCTION pg_catalog.l2_distance(vector, vector) +RETURNS float8 +AS 'l2_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.inner_product(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8437; +CREATE FUNCTION pg_catalog.inner_product(vector, vector) +RETURNS float8 +AS 'inner_product' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8435; +CREATE FUNCTION pg_catalog.cosine_distance(vector, vector) +RETURNS float8 +AS 'cosine_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l1_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8436; +CREATE FUNCTION pg_catalog.l1_distance(vector, vector) +RETURNS float8 +AS 'l1_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_dims(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8428; +CREATE FUNCTION pg_catalog.vector_dims(vector) +RETURNS int4 +AS 'vector_dims' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_norm(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8438; +CREATE FUNCTION pg_catalog.vector_norm(vector) +RETURNS float8 +AS 'vector_norm' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8200; +CREATE FUNCTION pg_catalog.l2_normalize(vector) +RETURNS vector +AS 'l2_normalize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.binary_quantize(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8201; +CREATE FUNCTION pg_catalog.binary_quantize(vector) +RETURNS varbit +AS 'binary_quantize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.subvector(vector, int, int) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8202; +CREATE FUNCTION pg_catalog.subvector(vector, int, int) +RETURNS vector +AS 'subvector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_add(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8439; +CREATE FUNCTION pg_catalog.vector_add(vector, vector) +RETURNS vector +AS 'vector_add' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_sub(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8440; +CREATE FUNCTION pg_catalog.vector_sub(vector, vector) +RETURNS vector +AS 'vector_sub' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_mul(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8203; +CREATE FUNCTION pg_catalog.vector_mul(vector, vector) +RETURNS vector +AS 'vector_mul' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_concat(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8204; +CREATE FUNCTION pg_catalog.vector_concat(vector, vector) +RETURNS vector +AS 'vector_concat' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_lt(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8441; +CREATE FUNCTION pg_catalog.vector_lt(vector, vector) +RETURNS bool +AS 'vector_lt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_le(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8442; +CREATE FUNCTION pg_catalog.vector_le(vector, vector) +RETURNS bool +AS 'vector_le' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_eq(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8443; +CREATE FUNCTION pg_catalog.vector_eq(vector, vector) +RETURNS bool +AS 'vector_eq' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_ne(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8444; +CREATE FUNCTION pg_catalog.vector_ne(vector, vector) +RETURNS bool +AS 'vector_ne' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_ge(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8445; +CREATE FUNCTION pg_catalog.vector_ge(vector, vector) +RETURNS bool +AS 'vector_ge' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_gt(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8446; +CREATE FUNCTION pg_catalog.vector_gt(vector, vector) +RETURNS bool +AS 'vector_gt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_cmp(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8450; +CREATE FUNCTION pg_catalog.vector_cmp(vector, vector) +RETURNS int4 +AS 'vector_cmp' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_l2_squared_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8431; +CREATE FUNCTION pg_catalog.vector_l2_squared_distance(vector, vector) +RETURNS float8 +AS 'vector_l2_squared_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_negative_inner_product(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8434; +CREATE FUNCTION pg_catalog.vector_negative_inner_product(vector, vector) +RETURNS float8 +AS 'vector_negative_inner_product' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_spherical_distance(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8432; +CREATE FUNCTION pg_catalog.vector_spherical_distance(vector, vector) +RETURNS float8 +AS 'vector_spherical_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_accum(_float8, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8447; +CREATE FUNCTION pg_catalog.vector_accum(_float8, vector) +RETURNS _float8 +AS 'vector_accum' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_avg(_float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8449; +CREATE FUNCTION pg_catalog.vector_avg(_float8) +RETURNS vector +AS 'vector_avg' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_combine(_float8, _float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8448; +CREATE FUNCTION pg_catalog.vector_combine(_float8, _float8) +RETURNS _float8 +AS 'vector_combine' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8214; +CREATE FUNCTION pg_catalog.vector(vector, int4, boolean) +RETURNS vector +AS 'vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_int4, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8215; +CREATE FUNCTION pg_catalog.array_to_vector(_int4, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float4, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8216; +CREATE FUNCTION pg_catalog.array_to_vector(_float4, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_float8, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8217; +CREATE FUNCTION pg_catalog.array_to_vector(_float8, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.array_to_vector(_numeric, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8218; +CREATE FUNCTION pg_catalog.array_to_vector(_numeric, int4, boolean) +RETURNS vector +AS 'array_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_float4(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8219; +CREATE FUNCTION pg_catalog.vector_to_float4(vector, int4, boolean) +RETURNS _float4 +AS 'vector_to_float4' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_int4(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8212; +CREATE FUNCTION pg_catalog.vector_to_int4(vector) +RETURNS _int4 +AS 'vector_to_int4' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_float8(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8213; +CREATE FUNCTION pg_catalog.vector_to_float8(vector) +RETURNS _float8 +AS 'vector_to_float8' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_numeric(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8221; +CREATE FUNCTION pg_catalog.vector_to_numeric(vector) +RETURNS _numeric +AS 'vector_to_numeric' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_text(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8222; +CREATE FUNCTION pg_catalog.vector_to_text(vector) +RETURNS _text +AS 'vector_to_text' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_varchar(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8223; +CREATE FUNCTION pg_catalog.vector_to_varchar(vector) +RETURNS _varchar +AS 'vector_to_varchar' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuild(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8417; +CREATE FUNCTION pg_catalog.ivfflatbuild(internal, internal, internal) +RETURNS internal +AS 'ivfflatbuild' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbuildempty(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8418; +CREATE FUNCTION pg_catalog.ivfflatbuildempty(internal) +RETURNS void +AS 'ivfflatbuildempty' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8412; +CREATE FUNCTION pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) +RETURNS boolean +AS 'ivfflatinsert' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8419; +CREATE FUNCTION pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) +RETURNS internal +AS 'ivfflatbulkdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvacuumcleanup(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8420; +CREATE FUNCTION pg_catalog.ivfflatvacuumcleanup(internal, internal) +RETURNS internal +AS 'ivfflatvacuumcleanup' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8421; +CREATE FUNCTION pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) +RETURNS void +AS 'ivfflatcostestimate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatoptions(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8422; +CREATE FUNCTION pg_catalog.ivfflatoptions(internal, internal) +RETURNS internal +AS 'ivfflatoptions' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatvalidate(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8205; +CREATE FUNCTION pg_catalog.ivfflatvalidate(internal) +RETURNS boolean +AS 'ivfflatvalidate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatbeginscan(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8413; +CREATE FUNCTION pg_catalog.ivfflatbeginscan(internal, internal, internal) +RETURNS internal +AS 'ivfflatbeginscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8415; +CREATE FUNCTION pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) +RETURNS void +AS 'ivfflatrescan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatgettuple(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8414; +CREATE FUNCTION pg_catalog.ivfflatgettuple(internal, internal) +RETURNS boolean +AS 'ivfflatgettuple' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflatendscan(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8416; +CREATE FUNCTION pg_catalog.ivfflatendscan(internal) +RETURNS void +AS 'ivfflatendscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflathandler(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8206; +CREATE FUNCTION pg_catalog.ivfflathandler(internal) +RETURNS internal +AS 'ivfflathandler' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbuild(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8406; +CREATE FUNCTION pg_catalog.hnswbuild(internal, internal, internal) +RETURNS internal +AS 'hnswbuild' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbuildempty(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8407; +CREATE FUNCTION pg_catalog.hnswbuildempty(internal) +RETURNS void +AS 'hnswbuildempty' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8401; +CREATE FUNCTION pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) +RETURNS boolean +AS 'hnswinsert' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbulkdelete(internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8408; +CREATE FUNCTION pg_catalog.hnswbulkdelete(internal, internal, internal, internal) +RETURNS internal +AS 'hnswbulkdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswvacuumcleanup(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8409; +CREATE FUNCTION pg_catalog.hnswvacuumcleanup(internal, internal) +RETURNS internal +AS 'hnswvacuumcleanup' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8410; +CREATE FUNCTION pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) +RETURNS void +AS 'hnswcostestimate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswoptions(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8411; +CREATE FUNCTION pg_catalog.hnswoptions(internal, internal) +RETURNS internal +AS 'hnswoptions' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswvalidate(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8207; +CREATE FUNCTION pg_catalog.hnswvalidate(internal) +RETURNS boolean +AS 'hnswvalidate' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswbeginscan(internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8402; +CREATE FUNCTION pg_catalog.hnswbeginscan(internal, internal, internal) +RETURNS internal +AS 'hnswbeginscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswrescan(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8404; +CREATE FUNCTION pg_catalog.hnswrescan(internal, internal, internal, internal, internal) +RETURNS void +AS 'hnswrescan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswdelete(internal, internal, internal, internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8429; +CREATE FUNCTION pg_catalog.hnswdelete(internal, internal, internal, internal, internal) +RETURNS boolean +AS 'hnswdelete' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswgettuple(internal, internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8403; +CREATE FUNCTION pg_catalog.hnswgettuple(internal, internal) +RETURNS boolean +AS 'hnswgettuple' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswendscan(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8405; +CREATE FUNCTION pg_catalog.hnswendscan(internal) +RETURNS void +AS 'hnswendscan' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnswhandler(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8208; +CREATE FUNCTION pg_catalog.hnswhandler(internal) +RETURNS internal +AS 'hnswhandler' +LANGUAGE INTERNAL +STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.ivfflat_bit_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8210; +CREATE FUNCTION pg_catalog.ivfflat_bit_support(internal) +RETURNS internal +AS 'ivfflat_bit_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnsw_bit_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8209; +CREATE FUNCTION pg_catalog.hnsw_bit_support(internal) +RETURNS internal +AS 'hnsw_bit_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hnsw_sparsevec_support(internal) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8479; +CREATE FUNCTION pg_catalog.hnsw_sparsevec_support(internal) +RETURNS internal +AS 'hnsw_sparsevec_support' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.hamming_distance(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8469; +CREATE FUNCTION pg_catalog.hamming_distance(bit, bit) +RETURNS float8 +AS 'hamming_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.jaccard_distance(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8468; +CREATE FUNCTION pg_catalog.jaccard_distance(bit, bit) +RETURNS float8 +AS 'jaccard_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP TYPE IF EXISTS pg_catalog.sparsevec CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_TYPE, 8307, 8310, b; +CREATE TYPE pg_catalog.sparsevec; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_in(cstring, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8458; +CREATE FUNCTION pg_catalog.sparsevec_in(cstring, oid, int4) +RETURNS sparsevec +AS 'sparsevec_in' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_out(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8459; +CREATE FUNCTION pg_catalog.sparsevec_out(sparsevec) +RETURNS cstring +AS 'sparsevec_out' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_typmod_in(_cstring) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8460; +CREATE FUNCTION pg_catalog.sparsevec_typmod_in(_cstring) +RETURNS int4 +AS 'sparsevec_typmod_in' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_recv(internal, oid, int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8461; +CREATE FUNCTION pg_catalog.sparsevec_recv(internal, oid, int4) +RETURNS sparsevec +AS 'sparsevec_recv' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_send(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8462; +CREATE FUNCTION pg_catalog.sparsevec_send(sparsevec) +RETURNS bytea +AS 'sparsevec_send' +LANGUAGE INTERNAL +STABLE STRICT; + +CREATE TYPE pg_catalog.sparsevec ( + INPUT = sparsevec_in, + OUTPUT = sparsevec_out, + TYPMOD_IN = sparsevec_typmod_in, + RECEIVE = sparsevec_recv, + SEND = sparsevec_send, + STORAGE = external +); + +DROP FUNCTION IF EXISTS pg_catalog.l2_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8465; +CREATE FUNCTION pg_catalog.l2_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l2_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.inner_product(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8471; +CREATE FUNCTION pg_catalog.inner_product(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_inner_product' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.cosine_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8466; +CREATE FUNCTION pg_catalog.cosine_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_cosine_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l1_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8467; +CREATE FUNCTION pg_catalog.l1_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l1_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_norm(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8478; +CREATE FUNCTION pg_catalog.l2_norm(sparsevec) +RETURNS float8 +AS 'sparsevec_l2_norm' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.l2_normalize(sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8211; +CREATE FUNCTION pg_catalog.l2_normalize(sparsevec) +RETURNS sparsevec +AS 'sparsevec_l2_normalize' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_lt(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8472; +CREATE FUNCTION pg_catalog.sparsevec_lt(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_lt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_le(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8473; +CREATE FUNCTION pg_catalog.sparsevec_le(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_le' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_eq(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8474; +CREATE FUNCTION pg_catalog.sparsevec_eq(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_eq' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ne(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8475; +CREATE FUNCTION pg_catalog.sparsevec_ne(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_ne' +LANGUAGE INTERNAL +STABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_ge(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8476; +CREATE FUNCTION pg_catalog.sparsevec_ge(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_ge' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_gt(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8477; +CREATE FUNCTION pg_catalog.sparsevec_gt(sparsevec, sparsevec) +RETURNS bool +AS 'sparsevec_gt' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_cmp(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8464; +CREATE FUNCTION pg_catalog.sparsevec_cmp(sparsevec, sparsevec) +RETURNS int4 +AS 'sparsevec_cmp' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8470; +CREATE FUNCTION pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_l2_squared_distance' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8463; +CREATE FUNCTION pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) +RETURNS float8 +AS 'sparsevec_negative_inner_product' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec(sparsevec, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8228; +CREATE FUNCTION pg_catalog.sparsevec(sparsevec, int4, boolean) +RETURNS sparsevec +AS 'sparsevec' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.vector_to_sparsevec(vector, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8229; +CREATE FUNCTION pg_catalog.vector_to_sparsevec(vector, int4, boolean) +RETURNS sparsevec +AS 'vector_to_sparsevec' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +DROP FUNCTION IF EXISTS pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8230; +CREATE FUNCTION pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) +RETURNS vector +AS 'sparsevec_to_vector' +LANGUAGE INTERNAL +IMMUTABLE STRICT; + +COMMENT ON FUNCTION pg_catalog.vector_in(cstring, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_out(vector) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_typmod_in(_cstring) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_recv(internal, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.vector_send(vector) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.l2_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.inner_product(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.cosine_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l1_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_dims(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_norm(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_normalize(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.binary_quantize(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.subvector(vector, int, int) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_add(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_sub(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_mul(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_concat(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_lt(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_le(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_eq(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_ne(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_ge(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_gt(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_cmp(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_l2_squared_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_negative_inner_product(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_spherical_distance(vector, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_accum(_float8, vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_avg(_float8) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_combine(_float8,_float8) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_int4, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_float4, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_float8, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.array_to_vector(_numeric, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_float4(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_int4(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_float8(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_numeric(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_text(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_varchar(vector) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbuild(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbuildempty(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatinsert(internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbulkdelete(internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatvacuumcleanup(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatcostestimate(internal, internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatoptions(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatvalidate(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatbeginscan(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatrescan(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatgettuple(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflatendscan(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflathandler(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbuild(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbuildempty(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswinsert(internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbulkdelete(internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswvacuumcleanup(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswcostestimate(internal, internal, internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswoptions(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswvalidate(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswbeginscan(internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswrescan(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswdelete(internal, internal, internal, internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswgettuple(internal, internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswendscan(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnswhandler(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.ivfflat_bit_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnsw_bit_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hnsw_sparsevec_support(internal) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.hamming_distance(bit, bit) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.jaccard_distance(bit, bit) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_in(cstring, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_out(sparsevec) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_typmod_in(_cstring) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_recv(internal, oid, int4) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.sparsevec_send(sparsevec) IS 'I/O'; +COMMENT ON FUNCTION pg_catalog.l2_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.inner_product(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.cosine_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l1_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_norm(sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.l2_normalize(sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_lt(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_le(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_eq(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_ne(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_ge(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_gt(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_cmp(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec(sparsevec, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.vector_to_sparsevec(vector, int4, boolean) IS 'NULL'; +COMMENT ON FUNCTION pg_catalog.sparsevec_to_vector(sparsevec, int4, boolean) IS 'NULL'; + +drop aggregate if exists pg_catalog.avg(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8241; +create aggregate pg_catalog.avg(vector) (SFUNC=vector_accum, STYPE= _float8, finalfunc = vector_avg,CFUNC = vector_combine,INITCOND = '{0}', INITCOLLECT='{0}'); +COMMENT ON aggregate pg_catalog.avg(vector) IS 'concatenate aggregate input into an array'; + +drop aggregate if exists pg_catalog.sum(vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_PROC, 8242; +create aggregate pg_catalog.sum(vector) (SFUNC=vector_add, STYPE= vector, CFUNC = vector_add); +COMMENT ON aggregate pg_catalog.sum(vector) IS 'the average (arithmetic mean) as numeric of all bigint values'; + +DROP CAST IF EXISTS (vector AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8299; +CREATE CAST (vector AS vector) + WITH FUNCTION vector(vector, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _varchar) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8289; +CREATE CAST (vector AS _varchar) + WITH FUNCTION vector_to_varchar(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _text) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8290; +CREATE CAST (vector AS _text) + WITH FUNCTION vector_to_text(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _numeric) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8291; +CREATE CAST (vector AS _numeric) + WITH FUNCTION vector_to_numeric(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _float8) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8292; +CREATE CAST (vector AS _float8) + WITH FUNCTION vector_to_float8(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _int4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8293; +CREATE CAST (vector AS _int4) + WITH FUNCTION vector_to_int4(vector) AS IMPLICIT; + +DROP CAST IF EXISTS (vector AS _float4) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8294; +CREATE CAST (vector AS _float4) + WITH FUNCTION vector_to_float4(vector, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (_int4 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8298; +CREATE CAST (_int4 AS vector) + WITH FUNCTION array_to_vector(_int4, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_float4 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8297; +CREATE CAST (_float4 AS vector) + WITH FUNCTION array_to_vector(_float4, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_float8 AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8296; +CREATE CAST (_float8 AS vector) + WITH FUNCTION array_to_vector(_float8, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (_numeric AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8295; +CREATE CAST (_numeric AS vector) + WITH FUNCTION array_to_vector(_numeric, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (sparsevec AS sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8285; +CREATE CAST (sparsevec AS sparsevec) + WITH FUNCTION sparsevec(sparsevec, int4, boolean) AS IMPLICIT; + +DROP CAST IF EXISTS (sparsevec AS vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8283; +CREATE CAST (sparsevec AS vector) + WITH FUNCTION sparsevec_to_vector(sparsevec, int4, boolean) AS ASSIGNMENT; + +DROP CAST IF EXISTS (vector AS sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8284; +CREATE CAST (vector AS sparsevec) + WITH FUNCTION vector_to_sparsevec(vector, int4, boolean) AS IMPLICIT; + +DROP ACCESS METHOD IF EXISTS ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8301; +CREATE ACCESS METHOD ivfflat TYPE INDEX HANDLER ivfflathandler; + +DROP ACCESS METHOD IF EXISTS hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8300; +CREATE ACCESS METHOD hnsw TYPE INDEX HANDLER hnswhandler; + +SET search_path = 'pg_catalog'; + +DROP OPERATOR IF EXISTS pg_catalog.<->(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8311; +CREATE OPERATOR pg_catalog.<->( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.l2_distance, + COMMUTATOR = '<->' +); + +DROP OPERATOR IF EXISTS pg_catalog.<#>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8312; +CREATE OPERATOR pg_catalog.<#>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_negative_inner_product, + COMMUTATOR = '<#>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<=>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8313; +CREATE OPERATOR pg_catalog.<=>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.cosine_distance, + COMMUTATOR = '<=>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<+>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8314; +CREATE OPERATOR pg_catalog.<+>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.l1_distance, + COMMUTATOR = '<+>' +); + +DROP OPERATOR IF EXISTS pg_catalog.+(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8325; +CREATE OPERATOR pg_catalog.+( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_add, + COMMUTATOR = '+' +); + +DROP OPERATOR IF EXISTS pg_catalog.-(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8326; +CREATE OPERATOR pg_catalog.-( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_sub, + COMMUTATOR = '-' +); + +DROP OPERATOR IF EXISTS pg_catalog.*(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8349; +CREATE OPERATOR pg_catalog.*( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_mul, + COMMUTATOR = '*' +); + +DROP OPERATOR IF EXISTS pg_catalog.||(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8339; +CREATE OPERATOR pg_catalog.||( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_concat +); + +DROP OPERATOR IF EXISTS pg_catalog.<(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8327; +CREATE OPERATOR pg_catalog.<( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_lt, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8328; +CREATE OPERATOR pg_catalog.<=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_le, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8331; +CREATE OPERATOR pg_catalog.=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_eq, + COMMUTATOR = '=' , + RESTRICT = eqsel, JOIN = eqjoinsel, HASHES +); + +DROP OPERATOR IF EXISTS pg_catalog.<>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8332; +CREATE OPERATOR pg_catalog.<>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_ne, + COMMUTATOR = '<>' , NEGATOR = '=' , + RESTRICT = neqsel, JOIN = neqjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>=(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8330; +CREATE OPERATOR pg_catalog.>=( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_ge, + COMMUTATOR = '<=' , NEGATOR = '<' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>(vector, vector) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8329; +CREATE OPERATOR pg_catalog.>( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = pg_catalog.vector_gt, + COMMUTATOR = '<' , NEGATOR = '<=' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<~>(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8323; +CREATE OPERATOR pg_catalog.<~>( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = pg_catalog.hamming_distance, + COMMUTATOR = '<~>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<%>(bit, bit) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8324; +CREATE OPERATOR pg_catalog.<%>( + LEFTARG = bit, RIGHTARG = bit, PROCEDURE = pg_catalog.jaccard_distance, + COMMUTATOR = '<%>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<->(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8319; +CREATE OPERATOR pg_catalog.<->( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.l2_distance, + COMMUTATOR = '<->' +); + +DROP OPERATOR IF EXISTS pg_catalog.<#>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8320; +CREATE OPERATOR pg_catalog.<#>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_negative_inner_product, + COMMUTATOR = '<#>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<=>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8321; +CREATE OPERATOR pg_catalog.<=>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.cosine_distance, + COMMUTATOR = '<=>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<+>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8322; +CREATE OPERATOR pg_catalog.<+>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.l1_distance, + COMMUTATOR = '<+>' +); + +DROP OPERATOR IF EXISTS pg_catalog.<(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8333; +CREATE OPERATOR pg_catalog.<( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_lt, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.<=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8334; +CREATE OPERATOR pg_catalog.<=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_le, + RESTRICT = scalarltsel, JOIN = scalarltjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8337; +CREATE OPERATOR pg_catalog.=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_eq, + COMMUTATOR = '=' , + RESTRICT = eqsel, JOIN = eqjoinsel, HASHES +); + +DROP OPERATOR IF EXISTS pg_catalog.<>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8338; +CREATE OPERATOR pg_catalog.<>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_ne, + COMMUTATOR = '<>' , NEGATOR = '=' , + RESTRICT = neqsel, JOIN = neqjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>=(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8336; +CREATE OPERATOR pg_catalog.>=( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_ge, + COMMUTATOR = '<=' , NEGATOR = '<' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +DROP OPERATOR IF EXISTS pg_catalog.>(sparsevec, sparsevec) CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8335; +CREATE OPERATOR pg_catalog.>( + LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = pg_catalog.sparsevec_gt, + COMMUTATOR = '<' , NEGATOR = '<=' , + RESTRICT = scalargtsel, JOIN = scalargtjoinsel +); + +COMMENT ON OPERATOR pg_catalog.<->(vector,vector) IS 'l2_distance'; +COMMENT ON OPERATOR pg_catalog.<#>(vector,vector) IS 'vector_negative_inner_product'; +COMMENT ON OPERATOR pg_catalog.<=>(vector,vector) IS 'cosine_distance'; +COMMENT ON OPERATOR pg_catalog.<+>(vector,vector) IS 'l1_distance'; +COMMENT ON OPERATOR pg_catalog.||(vector,vector) IS 'vector_concat'; +COMMENT ON OPERATOR pg_catalog.+(vector,vector) IS 'vector_add'; +COMMENT ON OPERATOR pg_catalog.-(vector,vector) IS 'vector_sub'; +COMMENT ON OPERATOR pg_catalog.*(vector,vector) IS 'vector_mul'; +COMMENT ON OPERATOR pg_catalog.<(vector,vector) IS 'vector less than'; +COMMENT ON OPERATOR pg_catalog.<=(vector,vector) IS 'vector less than or equal'; +COMMENT ON OPERATOR pg_catalog.>(vector,vector) IS 'vector greater than'; +COMMENT ON OPERATOR pg_catalog.>=(vector,vector) IS 'vector greater than or equal'; +COMMENT ON OPERATOR pg_catalog.=(vector,vector) IS 'vector equal'; +COMMENT ON OPERATOR pg_catalog.<>(vector,vector) IS 'vector unequal'; +COMMENT ON OPERATOR pg_catalog.<~>(bit,bit) IS 'hamming_distance'; +COMMENT ON OPERATOR pg_catalog.<%>(bit,bit) IS 'jaccard_distance'; +COMMENT ON OPERATOR pg_catalog.<->(sparsevec,sparsevec) IS 'sparsevec_l2_distance'; +COMMENT ON OPERATOR pg_catalog.<#>(sparsevec,sparsevec) IS 'sparsevec_negative_inner_product'; +COMMENT ON OPERATOR pg_catalog.<=>(sparsevec,sparsevec) IS 'sparsevec_cosine_distance'; +COMMENT ON OPERATOR pg_catalog.<+>(sparsevec,sparsevec) IS 'sparsevec_l1_distance'; +COMMENT ON OPERATOR pg_catalog.<(sparsevec,sparsevec) IS 'sparsevec less than'; +COMMENT ON OPERATOR pg_catalog.<=(sparsevec,sparsevec) IS 'sparsevec less than or equal'; +COMMENT ON OPERATOR pg_catalog.>(sparsevec,sparsevec) IS 'sparsevec greater than'; +COMMENT ON OPERATOR pg_catalog.>=(sparsevec,sparsevec) IS 'sparsevec greater than or equal'; +COMMENT ON OPERATOR pg_catalog.=(sparsevec,sparsevec) IS 'sparsevec equal'; +COMMENT ON OPERATOR pg_catalog.<>(sparsevec,sparsevec) IS 'sparsevec unequal'; + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8392; +CREATE OPERATOR FAMILY pg_catalog.vector_ops USING btree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8977; +CREATE OPERATOR CLASS pg_catalog.vector_ops DEFAULT + FOR TYPE vector USING btree as + OPERATOR 1 pg_catalog.<(vector, vector), + OPERATOR 2 pg_catalog.<=(vector, vector), + OPERATOR 3 pg_catalog.=(vector, vector), + OPERATOR 4 pg_catalog.>=(vector, vector), + OPERATOR 5 pg_catalog.>(vector, vector), + FUNCTION 1 pg_catalog.vector_cmp(vector,vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8375; +CREATE OPERATOR FAMILY pg_catalog.vector_ops USING ubtree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8951; +CREATE OPERATOR CLASS pg_catalog.vector_ops DEFAULT + FOR TYPE vector USING ubtree AS + OPERATOR 1 pg_catalog.<(vector, vector), + OPERATOR 2 pg_catalog.<=(vector, vector), + OPERATOR 3 pg_catalog.=(vector, vector), + OPERATOR 4 pg_catalog.>=(vector, vector), + OPERATOR 5 pg_catalog.>(vector, vector), + FUNCTION 1 pg_catalog.vector_cmp(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8385; +CREATE OPERATOR FAMILY pg_catalog.vector_l2_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8914; +CREATE OPERATOR CLASS pg_catalog.vector_l2_ops + DEFAULT FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<->(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_l2_squared_distance(vector, vector), + FUNCTION 3 pg_catalog.l2_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8386; +CREATE OPERATOR FAMILY pg_catalog.vector_ip_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8915; +CREATE OPERATOR CLASS pg_catalog.vector_ip_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<#>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 3 pg_catalog.vector_spherical_distance(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8387; +CREATE OPERATOR FAMILY pg_catalog.vector_cosine_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8916; +CREATE OPERATOR CLASS pg_catalog.vector_cosine_ops + FOR TYPE vector USING ivfflat AS + OPERATOR 1 pg_catalog.<=>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 2 pg_catalog.vector_norm(vector), + FUNCTION 3 pg_catalog.vector_spherical_distance(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8371; +CREATE OPERATOR FAMILY pg_catalog.vector_l2_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8900; +CREATE OPERATOR CLASS pg_catalog.vector_l2_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<->(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_l2_squared_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8372; +CREATE OPERATOR FAMILY pg_catalog.vector_ip_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8999; +CREATE OPERATOR CLASS pg_catalog.vector_ip_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<#>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8373; +CREATE OPERATOR FAMILY pg_catalog.vector_cosine_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8902; +CREATE OPERATOR CLASS pg_catalog.vector_cosine_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<=>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.vector_negative_inner_product(vector, vector), + FUNCTION 2 pg_catalog.vector_norm(vector), + FUNCTION 4 pg_catalog.vector_norm(vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8374; +CREATE OPERATOR FAMILY pg_catalog.vector_l1_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.vector_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8903; +CREATE OPERATOR CLASS pg_catalog.vector_l1_ops + FOR TYPE vector USING hnsw AS + OPERATOR 1 pg_catalog.<+>(vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.l1_distance(vector, vector); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8394; +CREATE OPERATOR FAMILY pg_catalog.bit_hamming_ops USING ivfflat; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING ivfflat CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8923; +CREATE OPERATOR CLASS pg_catalog.bit_hamming_ops + FOR TYPE bit USING ivfflat AS + OPERATOR 1 pg_catalog.<~>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.hamming_distance(bit, bit), + FUNCTION 3 pg_catalog.hamming_distance(bit, bit), + FUNCTION 5 pg_catalog.ivfflat_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8380; +CREATE OPERATOR FAMILY pg_catalog.bit_hamming_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_hamming_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8909; +CREATE OPERATOR CLASS pg_catalog.bit_hamming_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 pg_catalog.<~>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.hamming_distance(bit, bit), + FUNCTION 3 pg_catalog.hnsw_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8379; +CREATE OPERATOR FAMILY pg_catalog.bit_jaccard_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.bit_jaccard_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8908; +CREATE OPERATOR CLASS pg_catalog.bit_jaccard_ops + FOR TYPE bit USING hnsw AS + OPERATOR 1 pg_catalog.<%>(bit, bit) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.jaccard_distance(bit, bit), + FUNCTION 3 pg_catalog.hnsw_bit_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8397; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ops USING btree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING btree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8979; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ops + DEFAULT FOR TYPE sparsevec USING btree AS + OPERATOR 1 pg_catalog.<(sparsevec, sparsevec), + OPERATOR 2 pg_catalog.<=(sparsevec, sparsevec), + OPERATOR 3 pg_catalog.=(sparsevec, sparsevec), + OPERATOR 4 pg_catalog.>=(sparsevec, sparsevec), + OPERATOR 5 pg_catalog.>(sparsevec, sparsevec), + FUNCTION 1 pg_catalog.sparsevec_cmp(sparsevec, sparsevec); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8376; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ops USING ubtree; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ops USING ubtree CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8952; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ops DEFAULT + FOR TYPE sparsevec USING ubtree AS + OPERATOR 1 pg_catalog.<(sparsevec, sparsevec), + OPERATOR 2 pg_catalog.<=(sparsevec, sparsevec), + OPERATOR 3 pg_catalog.=(sparsevec, sparsevec), + OPERATOR 4 pg_catalog.>=(sparsevec, sparsevec), + OPERATOR 5 pg_catalog.>(sparsevec, sparsevec), + FUNCTION 1 sparsevec_cmp(sparsevec, sparsevec); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8381; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_l2_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l2_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8910; +CREATE OPERATOR CLASS pg_catalog.sparsevec_l2_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<->(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_l2_squared_distance(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8382; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_ip_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_ip_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8911; +CREATE OPERATOR CLASS pg_catalog.sparsevec_ip_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<#>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8383; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_cosine_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_cosine_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8912; +CREATE OPERATOR CLASS pg_catalog.sparsevec_cosine_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<=>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.sparsevec_negative_inner_product(sparsevec, sparsevec), + FUNCTION 2 pg_catalog.l2_norm(sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); + +DROP OPERATOR FAMILY IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8384; +CREATE OPERATOR FAMILY pg_catalog.sparsevec_l1_ops USING hnsw; + +DROP OPERATOR CLASS IF EXISTS pg_catalog.sparsevec_l1_ops USING hnsw CASCADE; +SET LOCAL inplace_upgrade_next_system_object_oids=IUO_GENERAL, 8913; +CREATE OPERATOR CLASS pg_catalog.sparsevec_l1_ops + FOR TYPE sparsevec USING hnsw AS + OPERATOR 1 pg_catalog.<+>(sparsevec, sparsevec) FOR ORDER BY float_ops, + FUNCTION 1 pg_catalog.l1_distance(sparsevec, sparsevec), + FUNCTION 3 pg_catalog.hnsw_sparsevec_support(internal); diff --git a/src/include/executor/node/nodeAnnIndexscan.h b/src/include/executor/node/nodeAnnIndexscan.h new file mode 100644 index 0000000000000000000000000000000000000000..5bba25d792c3954d0123d5b5e306bb8624f30c4c --- /dev/null +++ b/src/include/executor/node/nodeAnnIndexscan.h @@ -0,0 +1,31 @@ +/* ------------------------------------------------------------------------- + * Copyright (c) 2024 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * ------------------------------------------------------------------------- + * src/include/executor/nodeAnnIndexscan.h + */ +#ifndef NODEANNINDEXSCAN_H +#define NODEANNINDEXSCAN_H + +#include "executor/exec/execStream.h" +#include "nodes/execnodes.h" + +extern AnnIndexScanState* ExecInitAnnIndexScan(AnnIndexScan* node, EState* estate, int eflags); +extern void ExecEndAnnIndexScan(AnnIndexScanState* node); +extern void ExecAnnIndexMarkPos(AnnIndexScanState* node); +extern void ExecAnnIndexRestrPos(AnnIndexScanState* node); +extern void ExecReScanAnnIndexScan(AnnIndexScanState* node); + +extern void ExecInitPartitionForAnnIndexScan(AnnIndexScanState* indexstate, EState* estate); + +#endif /* NODEANNINDEXSCAN_H */ diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index e3f7c3318c5e0ba7e95328e265b1c97531d52700..19697f92b62dd61f17b2290a5e1c3e3a919b1706 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -246,6 +246,7 @@ typedef struct knl_instance_attr_storage { int uwal_truncate_interval; bool uwal_async_append_switch; + bool enable_pq; int parallel_recovery_dispatch_algorithm; } knl_instance_attr_storage; diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index 95f092f0cdb938d5d38c0fea0444530f20206644..d43937843657159fd55b1fb6667c32fed282c10b 100755 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -1451,6 +1451,7 @@ typedef struct knl_instance_context { knl_g_listen_context listen_cxt; knl_g_datadir_context datadir_cxt; knl_g_dms_context dms_cxt; + bool pq_inited; #ifdef USE_SPQ knl_g_spq_context spq_cxt; #endif diff --git a/src/include/knl/knl_session.h b/src/include/knl/knl_session.h index 90aa611167b54bab3a7f2dd35550ac51a2c1992b..2a7f33474d5a987449cbde66813c129e56b5f526 100644 --- a/src/include/knl/knl_session.h +++ b/src/include/knl/knl_session.h @@ -3006,6 +3006,13 @@ typedef struct knl_u_ndp_context { char *crl_path; } knl_u_ndp_context; +typedef struct knl_u_datavec_context { + int hnsw_ef_search; + int hnsw_earlystop_threshold; + int ivfflat_probes; + int ivfpq_kreorder; +} knl_u_datavec_context; + typedef struct knl_session_context { volatile knl_session_status status; /* used for threadworker, elem in m_readySessionList */ @@ -3160,6 +3167,8 @@ typedef struct knl_session_context { /* standby write. */ knl_u_libsw_context libsw_cxt; + knl_u_datavec_context datavec_ctx; + } knl_session_context; enum stp_xact_err_type { diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3240dc40b827ee342f25f1c0fd60b7f5da2fc9f8..06656add548a04dc13745abae6a5790e10e7327a 100755 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2080,6 +2080,41 @@ typedef struct BitmapHeapScanState { CBIScanDesc cbi_scan; /* for crossbucket index scan */ } BitmapHeapScanState; +/* ---------------- + * AnnIndexScanState information + * + * indexqualorig execution state for indexqualorig expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * ---------------- + */ +typedef struct AnnIndexScanState { + ScanState ss; /* its first field is NodeTag */ + List* indexqualorig; + ScanKey iss_ScanKeys; + int iss_NumScanKeys; + ScanKey iss_OrderByKeys; + int iss_NumOrderByKeys; + IndexRuntimeKeyInfo* iss_RuntimeKeys; + int iss_NumRuntimeKeys; + bool iss_RuntimeKeysReady; + ExprContext* iss_RuntimeContext; + Relation iss_RelationDesc; + IndexScanDesc iss_ScanDesc; + List* iss_IndexPartitionList; + LOCKMODE lockMode; + Relation iss_CurrentIndexPartition; + double annCount; // limitValue/selectvity +} AnnIndexScanState; + /* ---------------- * TidScanState information * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 447e758ba0d2fa4e9d7d873d6f82a7ee4fbc7a6d..09bb28f148e061de7a275287ae6a858e67debe37 100755 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -69,6 +69,7 @@ typedef enum NodeTag { #endif T_IndexScan, T_IndexOnlyScan, + T_AnnIndexScan, T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, @@ -188,6 +189,7 @@ typedef enum NodeTag { #endif T_IndexScanState, T_IndexOnlyScanState, + T_AnnIndexScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 86aa0cc067b5b2c1335dc54e4c9de0bed7eb791b..9ae7ddc48d58edb97205fee3cef6d700305c89f3 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -861,6 +861,31 @@ typedef struct CStoreIndexAnd : public BitmapAnd { typedef struct CStoreIndexOr : public BitmapOr { } CStoreIndexOr; +/* ---------------- + * Vector ann index scan node + * use vector index scan and list of qual expressions to scan data. + */ +typedef struct AnnIndexScan { + Scan scan; + Oid indexid; /* OID of index to scan */ + char* indexname; /* Index name of index to scan */ + List* indexqual; /* list of index quals (usually OpExprs) */ + List* indexqualorig; /* the same in original form */ + List* indexorderby; /* list of index ORDER BY exprs */ + List* indexorderbyorig; /* the same in original form */ + ScanDirection indexorderdir; /* forward or backward or don't care */ + Index indexscan_relid; /* Hack for column store index, treat the index as normal relation */ + List* idx_cstorequal; /* For column store, this contains only quals pushdownable to + storage engine */ + List* cstorequal; /* quals that can be pushdown to cstore base table */ + List* targetlist; /* Hack for column store index, target list to be computed at this node */ + bool index_only_scan; + bool is_ustore; + double selectivity; + bool is_partial; + double annCount; +} AnnIndexScan; + /* ---------------- * tid scan node * diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 855c207da85279d7c27d81de31a8caa9c827df20..d233ac0cf4bf9453376017f7403cb9472b158ced 100755 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -929,6 +929,7 @@ typedef struct IndexOptInfo { List* indextlist; /* targetlist representing index columns */ bool isGlobal; /* true if index is global partition index */ + bool isAnnIndex; /* true if index is vector index */ bool crossbucket; /* true if index is crossbucket */ bool predOK; /* true if predicate matches query */ bool unique; /* true if a unique index */ @@ -1226,6 +1227,13 @@ typedef struct IndexPath { ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; + bool isAnnIndex; + List* annQuals; + List* annQualCols; + Cost annQualTotalCost; + Selectivity annQualSelectivity; + double annCount; // restore limitValue/sel + Cost allcost; // index cost + qual cost bool is_ustore; } IndexPath; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index bc74c7f3032a5f146b2bf976e8b0a4e1df7470bb..1b489af74612f1053fd2ee2eb066d36784782e8b 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -24,6 +24,10 @@ #define STREAM_COST_THRESHOLD 100000.0 +/* + * we add ann index cost, it used to with limit, cost=limit/ann_index_cost + */ +#define ANN_INDEX_COST 0.25 /* * For columnar scan, the cpu cost is less than seq scan, so * we are definition that the cost of scanning one tuple is 1/10 times. diff --git a/src/include/optimizer/learn.h b/src/include/optimizer/learn.h index 24f6c50ded8e7c7916d1432af17e6c18f214bdc6..9f23fae434bccbe9c91088c581bf68d2c0b0a086 100644 --- a/src/include/optimizer/learn.h +++ b/src/include/optimizer/learn.h @@ -110,6 +110,7 @@ #define TEXT_STRATEGY_SCAN_SEQ "SEQ" #define TEXT_STRATEGY_SCAN_INDEX "INDEX" #define TEXT_STRATEGY_SCAN_INDEX_ONLY "INDEX_ONLY" +#define TEXT_STRATEGY_SCAN_ANN_INDEX "ANN_INDEX" #define TEXT_STRATEGY_SCAN_BITMAP_INDEX "BITMAP_INDEX" #define TEXT_STRATEGY_SCAN_BITMAP_HEAP "BITMAP_HEAP" #define TEXT_STRATEGY_SCAN_TID "TID" diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h index 3c82cec89fa21b8d92ad77c0f39eeeb5a56eb7ac..0a3d53fb960d02736c4ec6364dee7f3d3016954f 100644 --- a/src/include/utils/numeric.h +++ b/src/include/utils/numeric.h @@ -157,6 +157,7 @@ typedef struct NumericData* Numeric; #define NUMERIC_MAX_SCALE 1000 #define NUMERIC_MIN_SCALE -84 #define IS_FLOAT_AS_NUMERIC(scale) ((scale) == INT16_MIN) +#define NUMERIC_TYPEMOD_MASK 0xffff /* * Internal limits on the scales chosen for calculation results diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 3474b977592c51248382192c5c03bf1d1be1bb9c..f6b2988e6546958ed91af65deaa203ead5dea41b 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -299,6 +299,10 @@ typedef struct RelationData { bool come_from_partrel; /* used only for gsc, keep it preserved if you modify the rel, otherwise set it null */ struct LocalRelationEntry *entry; + + /* used only for datavec pq */ + char *pqTable; + float *pqDistanceTable; } RelationData; /* diff --git a/src/test/regress/expected/ai_vector.out b/src/test/regress/expected/ai_vector.out new file mode 100644 index 0000000000000000000000000000000000000000..447a73c68e0fe1ef5847afc1d2212195cdea25e9 --- /dev/null +++ b/src/test/regress/expected/ai_vector.out @@ -0,0 +1,159 @@ +create database test; +\c test +CREATE TABLE dimens3_scan_l2_100 ( + id serial PRIMARY KEY, + name text, + embedding vector(3) -- 假设向量的维度为512 +); +NOTICE: CREATE TABLE will create implicit sequence "dimens3_scan_l2_100_id_seq" for serial column "dimens3_scan_l2_100.id" +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "dimens3_scan_l2_100_pkey" for table "dimens3_scan_l2_100" +CREATE INDEX ON dimens3_scan_l2_100 USING hnsw (embedding vector_l2_ops); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item1', '[0.211557005, 0.076130312, 0.048887434]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item2', '[0.822932576, 0.487093015, 0.748730227]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item3', '[0.564357002, 0.926498683, 0.818280197]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item4', '[0.791316587, 0.460895557, 0.067207831]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item5', '[0.974898792, 0.371204436, 0.090201541]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item6', '[0.972023039, 0.902695732, 0.104805143]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item7', '[0.340851511, 0.661115819, 0.979039013]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item8', '[0.492086968, 0.671430133, 0.715509519]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item9', '[0.815762744, 0.638665527, 0.940596042]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item10', '[0.598264407, 0.626925064, 0.837062887]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item11', '[0.605249576, 0.62827664, 0.776847171]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item12', '[0.608682007, 0.608462876, 0.816056557]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item13', '[0.361436461, 0.552522446, 0.010793276]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item14', '[0.856674254, 0.395471228, 0.988986555]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item15', '[0.694758094, 0.875162429, 0.232631982]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item16', '[0.021518759, 0.565872631, 0.643064199]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item17', '[0.766803278, 0.164139074, 0.516602078]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item18', '[0.764914268, 0.523333259, 0.449667669]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item19', '[0.212920548, 0.985840962, 0.093416858]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item20', '[0.789230643, 0.564697507, 0.134702261]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item21', '[0.717458865, 0.267429064, 0.178979577]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item22', '[0.509153041, 0.641964123, 0.920561135]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item23', '[0.431243764, 0.963082203, 0.727807302]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item24', '[0.827596123, 0.134412498, 0.004161737]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item25', '[0.258203395, 0.659945327, 0.597459666]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item26', '[0.944380157, 0.702332123, 0.545305512]'); +set hnsw_ef_search=10; +analyze dimens3_scan_l2_100; +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]' LIMIT 5; + id | name | embedding +----+--------+------------------------------------ + 14 | item14 | [0.85667425,0.39547122,0.98898655] + 9 | item9 | [0.81576276,0.6386655,0.94059604] + 26 | item26 | [0.94438016,0.70233214,0.5453055] + 2 | item2 | [0.8229326,0.487093,0.74873024] + 10 | item10 | [0.5982644,0.62692505,0.8370629] +(5 rows) + +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]' LIMIT 20; + id | name | embedding +----+--------+------------------------------------ + 14 | item14 | [0.85667425,0.39547122,0.98898655] + 9 | item9 | [0.81576276,0.6386655,0.94059604] + 26 | item26 | [0.94438016,0.70233214,0.5453055] + 2 | item2 | [0.8229326,0.487093,0.74873024] + 10 | item10 | [0.5982644,0.62692505,0.8370629] + 12 | item12 | [0.60868204,0.60846287,0.81605655] + 3 | item3 | [0.564357,0.9264987,0.8182802] + 11 | item11 | [0.6052496,0.62827665,0.7768472] + 22 | item22 | [0.50915307,0.64196414,0.92056113] + 18 | item18 | [0.7649143,0.52333325,0.44966766] + 6 | item6 | [0.972023,0.9026957,0.10480514] + 17 | item17 | [0.76680326,0.16413908,0.5166021] + 8 | item8 | [0.49208698,0.6714301,0.71550953] + 5 | item5 | [0.9748988,0.37120444,0.09020154] + 23 | item23 | [0.43124378,0.9630822,0.7278073] + 7 | item7 | [0.34085152,0.6611158,0.979039] + 15 | item15 | [0.6947581,0.8751624,0.23263198] + 20 | item20 | [0.78923064,0.5646975,0.13470227] + 4 | item4 | [0.79131657,0.46089557,0.06720783] + 21 | item21 | [0.71745884,0.26742905,0.17897958] +(20 rows) + +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]'; + id | name | embedding +----+--------+------------------------------------ + 14 | item14 | [0.85667425,0.39547122,0.98898655] + 9 | item9 | [0.81576276,0.6386655,0.94059604] + 26 | item26 | [0.94438016,0.70233214,0.5453055] + 2 | item2 | [0.8229326,0.487093,0.74873024] + 10 | item10 | [0.5982644,0.62692505,0.8370629] + 12 | item12 | [0.60868204,0.60846287,0.81605655] + 3 | item3 | [0.564357,0.9264987,0.8182802] + 11 | item11 | [0.6052496,0.62827665,0.7768472] + 22 | item22 | [0.50915307,0.64196414,0.92056113] + 18 | item18 | [0.7649143,0.52333325,0.44966766] + 6 | item6 | [0.972023,0.9026957,0.10480514] + 17 | item17 | [0.76680326,0.16413908,0.5166021] + 8 | item8 | [0.49208698,0.6714301,0.71550953] + 5 | item5 | [0.9748988,0.37120444,0.09020154] + 23 | item23 | [0.43124378,0.9630822,0.7278073] + 7 | item7 | [0.34085152,0.6611158,0.979039] + 15 | item15 | [0.6947581,0.8751624,0.23263198] + 20 | item20 | [0.78923064,0.5646975,0.13470227] + 4 | item4 | [0.79131657,0.46089557,0.06720783] + 21 | item21 | [0.71745884,0.26742905,0.17897958] + 24 | item24 | [0.8275961,0.1344125,0.004161737] + 25 | item25 | [0.2582034,0.6599453,0.5974597] + 16 | item16 | [0.02151876,0.5658726,0.6430642] + 13 | item13 | [0.36143646,0.5525224,0.010793276] + 19 | item19 | [0.21292055,0.985841,0.093416855] + 1 | item1 | [0.211557,0.076130316,0.048887435] +(26 rows) + +CREATE TABLE items (id int, embedding vector(3)); +INSERT INTO items VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]'), (4, '[10,11,12]'), (5, '[13,14,15]'); +CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +set enable_seqscan=off; +SELECT /*+ indexscan(embedding, items_embedding_idx) */embedding FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; + embedding +------------ + [1,2,3] + [4,5,6] + [7,8,9] + [10,11,12] + [13,14,15] +(5 rows) + +set ivfflat_probes = 3; +SELECT /*+ indexscan(embedding, items_embedding_idx) */embedding FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; + embedding +------------ + [1,2,3] + [4,5,6] + [7,8,9] + [10,11,12] + [13,14,15] +(5 rows) + +CREATE TABLE t1 (val vector(4)); +INSERT INTO t1 (val) VALUES ('[0,0,0,0]'), ('[1,2,3,0]'), ('[1,1,1,0]'); +INSERT INTO t1 (val) VALUES ('[4,0,0,0]'), ('[4,2,3,0]'), ('[4,1,1,0]'); +INSERT INTO t1 (val) VALUES ('[25,20,20,22]'), ('[25,22,23,26]'), ('[25,21,21,21]'); +INSERT INTO t1 (val) VALUES ('[24,20,20,20]'), ('[24,22,23,20]'), ('[24,21,21,20]'); +CREATE INDEX ON t1 USING ivfflat (val vector_l2_ops) WITH (lists=2); +set enable_seqscan=off; +set ivfflat_probes = 1; +SELECT * FROM t1 ORDER BY val <-> '[1,2,3,4]'; + val +--------------- + [1,2,3,0] + [1,1,1,0] + [4,2,3,0] + [0,0,0,0] + [4,1,1,0] + [4,0,0,0] + [24,20,20,20] + [24,21,21,20] + [25,20,20,22] + [25,21,21,21] + [24,22,23,20] + [25,22,23,26] +(12 rows) + +\c regression +drop database test; diff --git a/src/test/regress/expected/insert_update_001_ustore_1.out b/src/test/regress/expected/insert_update_001_ustore_1.out index 0513028b2b6a35401d99cc3dbb8b2c3087a7ee4d..d6089a18373f3f7a26e1d4effba02900d20841c3 100644 --- a/src/test/regress/expected/insert_update_001_ustore_1.out +++ b/src/test/regress/expected/insert_update_001_ustore_1.out @@ -260,9 +260,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) with(storage_type=ustore); INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/expected/opr_sanity_2.out b/src/test/regress/expected/opr_sanity_2.out index c065a4418307cc4128383150280ab5341fd6d002..84422e9476d92cddfa0a0b91744f6dbe64088075 100644 --- a/src/test/regress/expected/opr_sanity_2.out +++ b/src/test/regress/expected/opr_sanity_2.out @@ -261,7 +261,17 @@ ORDER BY 1, 2, 3; 4439 | 5 | ~>~ 4444 | 1 | @@ 4444 | 2 | @@@ -(80 rows) + 8300 | 1 | <#> + 8300 | 1 | <%> + 8300 | 1 | <+> + 8300 | 1 | <-> + 8300 | 1 | <=> + 8300 | 1 | <~> + 8301 | 1 | <#> + 8301 | 1 | <-> + 8301 | 1 | <=> + 8301 | 1 | <~> +(90 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing @@ -420,6 +430,7 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND NOT BETWEEN (CASE WHEN p1.amname IN ('gist', 'gin') THEN p1.amsupport - 1 WHEN p1.amname IN ('btree', 'ubtree') THEN p1.amsupport - 2 + WHEN p1.amname IN ('ivfflat', 'hnsw') THEN p1.amsupport - 3 ELSE p1.amsupport END) AND p1.amsupport; amname | opfname | amproclefttype | amprocrighttype @@ -434,6 +445,7 @@ FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype WHERE am.amname <> 'btree' AND am.amname <> 'gist' AND am.amname <> 'gin' AND am.amname <> 'ubtree' + AND am.amname <> 'hnsw' AND am.amname <> 'ivfflat' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; amname | opcname | count diff --git a/src/test/regress/expected/single_node_test_null_operator.out b/src/test/regress/expected/single_node_test_null_operator.out index bc2b1f34047c797eaa50c233439664ab79ca7332..6218306bf646a35c2e2173e0de66d962a7058315 100644 --- a/src/test/regress/expected/single_node_test_null_operator.out +++ b/src/test/regress/expected/single_node_test_null_operator.out @@ -5,15 +5,15 @@ LINE 1: SELECT 1 <=> 1; ^ HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. SELECT '' <=> NULL; -ERROR: operator does not exist: unknown <=> unknown +ERROR: operator is not unique: unknown <=> unknown LINE 1: SELECT '' <=> NULL; ^ -HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. +HINT: Could not choose a best candidate operator. You might need to add explicit type casts. SELECT NULL <=> NULL; -ERROR: operator does not exist: unknown <=> unknown +ERROR: operator is not unique: unknown <=> unknown LINE 1: SELECT NULL <=> NULL; ^ -HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts. +HINT: Could not choose a best candidate operator. You might need to add explicit type casts. SELECT (1,2) <=> (1,2); ERROR: operator does not exist: integer <=> integer LINE 1: SELECT (1,2) <=> (1,2); diff --git a/src/test/regress/expected/upsert_001.out b/src/test/regress/expected/upsert_001.out index 4b9d909621db471b2267852fafe4f1537366a01c..08b2d666f4a9ad26bd5e78cac1470d895f53f744 100755 --- a/src/test/regress/expected/upsert_001.out +++ b/src/test/regress/expected/upsert_001.out @@ -279,9 +279,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) ; INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/expected/upsert_restriction.out b/src/test/regress/expected/upsert_restriction.out index eb88a58be1b641273a0b9387ff2ef13f00083d68..2a69cb8b46369a6ce0433f6304b9d6583bbf805b 100644 --- a/src/test/regress/expected/upsert_restriction.out +++ b/src/test/regress/expected/upsert_restriction.out @@ -9,7 +9,6 @@ index DEFERABLE index insert stmt - returning with with recur update stmt @@ -55,9 +54,6 @@ ERROR: INSERT ON DUPLICATE KEY UPDATE does not support deferrable unique constr insert into up_neg_03 values(1) on duplicate key update c1 = 1; ERROR: INSERT ON DUPLICATE KEY UPDATE does not support deferrable unique constraints/exclusion constraints. -- insert stmt -----returning -insert into up_neg_04 values(1,1,1) on duplicate key update c1 = 1 returning c1; -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. ----with with sub as (select *from up_neg_04) insert into up_neg_04 select *from sub on duplicate key update c1 =1; @@ -66,9 +62,6 @@ ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE with RECURSIVE sub as (select *from up_neg_04) insert into up_neg_04 select *from sub on duplicate key update c1 =1; ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. -with sub as (select *from up_neg_04) -insert into up_neg_04 select *from sub on duplicate key update c1 =1 returning c1; -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. -- update stmt ---- VALUES with expr insert into up_neg_05 values(1,1,1,1) on duplicate key update c3 = values(1+100); diff --git a/src/test/regress/expected/upsert_returning.out b/src/test/regress/expected/upsert_returning.out new file mode 100644 index 0000000000000000000000000000000000000000..50db044075d016667487a321fc832500cccaf85c --- /dev/null +++ b/src/test/regress/expected/upsert_returning.out @@ -0,0 +1,99 @@ +DROP SCHEMA test_upsert_returning CASCADE; +ERROR: schema "test_upsert_returning" does not exist +CREATE SCHEMA test_upsert_returning; +CREATE SCHEMA +SET CURRENT_SCHEMA TO test_upsert_returning; +CREATE TEMP TABLE foo (f1 int unique, f2 text, f3 int default 42); +NOTICE: CREATE TABLE / UNIQUE will create implicit index "foo_f1_key" for table "foo" +CREATE TEMP TABLE foo_2 (a int, b int); +INSERT INTO foo_2 select generate_series(1, 5), generate_series(1, 2); +INSERT INTO foo (f1, f2, f3) + VALUES (1, 'test', DEFAULT), (2, 'More', 11), (3, upper('more'), 7+9) ON DUPLICATE KEY UPDATE f3 = f3+1 + RETURNING *, f1+f3 AS sum; + f1 | f2 | f3 | sum +----+------+----+----- + 1 | test | 42 | 43 + 2 | More | 11 | 13 + 3 | MORE | 16 | 19 +(3 rows) + +SELECT * FROM foo ORDER BY f1; + f1 | f2 | f3 +----+------+---- + 1 | test | 42 + 2 | More | 11 + 3 | MORE | 16 +(3 rows) + +with t as +( + INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 on ON DUPLICATE KEY UPDATE f3 = f3+10 RETURNING foo.*, '1' +) +select * from t; + f1 | f2 | f3 | ?column? +----+------+----+---------- + 3 | MORE | 26 | 1 + 4 | MORE | 18 | 1 + 2 | More | 21 | 1 +(3 rows) + +-- error +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f1 = f1+1 RETURNING f3, f2, f1, least(f1,f3); +ERROR: INSERT ON DUPLICATE KEY UPDATE don't allow update on primary key or unique key. +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f3 = f3+1 RETURNING f3, f2, f1, least(f1,f3); + f3 | f2 | f1 | least +----+------+----+------- + 20 | MORE | 5 | 5 + 27 | MORE | 3 | 3 + 19 | MORE | 4 | 4 + 22 | More | 2 | 2 +(4 rows) + +-- update nothing +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f3 = f3 RETURNING f3, f2, f1, least(f1,f3); + f3 | f2 | f1 | least +----+------+----+------- + 20 | MORE | 5 | 5 + 22 | MORE | 6 | 6 + 27 | MORE | 3 | 3 + 19 | MORE | 4 | 4 + 22 | More | 2 | 2 +(5 rows) + +SELECT * FROM foo ORDER BY f1; + f1 | f2 | f3 +----+------+---- + 1 | test | 42 + 2 | More | 22 + 3 | MORE | 27 + 4 | MORE | 19 + 5 | MORE | 20 + 6 | MORE | 22 +(6 rows) + +-- Subplans and initplans in the RETURNING list +INSERT INTO foo SELECT f1+1, f2, f3+99 FROM foo order by 1, 2, 3 ON DUPLICATE KEY UPDATE f3 =f3+10 RETURNING *, f1 - 3 IN +(SELECT b FROM foo_2) AS subplan, EXISTS(SELECT * FROM foo_2) AS initplan; + f1 | f2 | f3 | subplan | initplan +----+------+-----+---------+---------- + 2 | More | 32 | f | t + 3 | MORE | 37 | f | t + 4 | MORE | 29 | t | t + 5 | MORE | 30 | t | t + 6 | MORE | 32 | f | t + 7 | MORE | 121 | f | t +(6 rows) + +SELECT * FROM foo order by 1,2,3; + f1 | f2 | f3 +----+------+----- + 1 | test | 42 + 2 | More | 32 + 3 | MORE | 37 + 4 | MORE | 29 + 5 | MORE | 30 + 6 | MORE | 32 + 7 | MORE | 121 +(7 rows) + +DROP SCHEMA test_upsert_returning CASCADE; \ No newline at end of file diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index df0d8fb07cae0f872efd32443390ab18a7bd7073..522eb7131673f8c2d1f5fcd07375cee73d6b3703 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -10,6 +10,7 @@ # usecases for improving coverage # -------------------------- # these use cases may affect other usecases, please keep these usecases unique in their parallel groups +test: ai_vector test: timecapsule_partition_ustore_test_1 test: timecapsule_partition_ustore_test_2 test: ddl diff --git a/src/test/regress/pg_regress.cpp b/src/test/regress/pg_regress.cpp index c4bc35f71299b6c77c11fd56ff4d31ffcfba2ecb..e0b1980d9dc94e1d716237b5268d0461706bf0e2 100644 --- a/src/test/regress/pg_regress.cpp +++ b/src/test/regress/pg_regress.cpp @@ -5516,7 +5516,7 @@ static void check_global_variables() } } -#define BASE_PGXC_LIKE_MACRO_NUM 1382 +#define BASE_PGXC_LIKE_MACRO_NUM 1383 static void check_pgxc_like_macros() { #ifdef BUILD_BY_CMAKE diff --git a/src/test/regress/sql/ai_vector.sql b/src/test/regress/sql/ai_vector.sql new file mode 100644 index 0000000000000000000000000000000000000000..ce069defc61ad7f8db4f8f0d9b1e433cff6866af --- /dev/null +++ b/src/test/regress/sql/ai_vector.sql @@ -0,0 +1,59 @@ +create database test; +\c test +CREATE TABLE dimens3_scan_l2_100 ( + id serial PRIMARY KEY, + name text, + embedding vector(3) -- 假设向量的维度为512 +); +CREATE INDEX ON dimens3_scan_l2_100 USING hnsw (embedding vector_l2_ops); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item1', '[0.211557005, 0.076130312, 0.048887434]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item2', '[0.822932576, 0.487093015, 0.748730227]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item3', '[0.564357002, 0.926498683, 0.818280197]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item4', '[0.791316587, 0.460895557, 0.067207831]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item5', '[0.974898792, 0.371204436, 0.090201541]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item6', '[0.972023039, 0.902695732, 0.104805143]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item7', '[0.340851511, 0.661115819, 0.979039013]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item8', '[0.492086968, 0.671430133, 0.715509519]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item9', '[0.815762744, 0.638665527, 0.940596042]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item10', '[0.598264407, 0.626925064, 0.837062887]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item11', '[0.605249576, 0.62827664, 0.776847171]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item12', '[0.608682007, 0.608462876, 0.816056557]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item13', '[0.361436461, 0.552522446, 0.010793276]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item14', '[0.856674254, 0.395471228, 0.988986555]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item15', '[0.694758094, 0.875162429, 0.232631982]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item16', '[0.021518759, 0.565872631, 0.643064199]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item17', '[0.766803278, 0.164139074, 0.516602078]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item18', '[0.764914268, 0.523333259, 0.449667669]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item19', '[0.212920548, 0.985840962, 0.093416858]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item20', '[0.789230643, 0.564697507, 0.134702261]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item21', '[0.717458865, 0.267429064, 0.178979577]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item22', '[0.509153041, 0.641964123, 0.920561135]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item23', '[0.431243764, 0.963082203, 0.727807302]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item24', '[0.827596123, 0.134412498, 0.004161737]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item25', '[0.258203395, 0.659945327, 0.597459666]'); +INSERT INTO dimens3_scan_l2_100 (name, embedding) VALUES ('item26', '[0.944380157, 0.702332123, 0.545305512]'); +set hnsw_ef_search=10; +analyze dimens3_scan_l2_100; +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]' LIMIT 20; +SELECT * FROM dimens3_scan_l2_100 ORDER BY embedding <-> '[3,1,2]'; + +CREATE TABLE items (id int, embedding vector(3)); +INSERT INTO items VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]'), (4, '[10,11,12]'), (5, '[13,14,15]'); +CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100); +set enable_seqscan=off; +SELECT /*+ indexscan(embedding, items_embedding_idx) */embedding FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; +set ivfflat_probes = 3; +SELECT /*+ indexscan(embedding, items_embedding_idx) */embedding FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5; + +CREATE TABLE t1 (val vector(4)); +INSERT INTO t1 (val) VALUES ('[0,0,0,0]'), ('[1,2,3,0]'), ('[1,1,1,0]'); +INSERT INTO t1 (val) VALUES ('[4,0,0,0]'), ('[4,2,3,0]'), ('[4,1,1,0]'); +INSERT INTO t1 (val) VALUES ('[25,20,20,22]'), ('[25,22,23,26]'), ('[25,21,21,21]'); +INSERT INTO t1 (val) VALUES ('[24,20,20,20]'), ('[24,22,23,20]'), ('[24,21,21,20]'); +CREATE INDEX ON t1 USING ivfflat (val vector_l2_ops) WITH (lists=2); +set enable_seqscan=off; +set ivfflat_probes = 1; +SELECT * FROM t1 ORDER BY val <-> '[1,2,3,4]'; +\c regression +drop database test; diff --git a/src/test/regress/sql/opr_sanity_2.sql b/src/test/regress/sql/opr_sanity_2.sql index 8fc4d2a7aacbbbde86b5ffe6ebcd7be4223d0216..73c13da9534e97af3703abac558269fba1edb74a 100644 --- a/src/test/regress/sql/opr_sanity_2.sql +++ b/src/test/regress/sql/opr_sanity_2.sql @@ -287,6 +287,7 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND NOT BETWEEN (CASE WHEN p1.amname IN ('gist', 'gin') THEN p1.amsupport - 1 WHEN p1.amname IN ('btree', 'ubtree') THEN p1.amsupport - 2 + WHEN p1.amname IN ('ivfflat', 'hnsw') THEN p1.amsupport - 3 ELSE p1.amsupport END) AND p1.amsupport; @@ -299,6 +300,7 @@ FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype WHERE am.amname <> 'btree' AND am.amname <> 'gist' AND am.amname <> 'gin' AND am.amname <> 'ubtree' + AND am.amname <> 'hnsw' AND am.amname <> 'ivfflat' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; diff --git a/src/test/regress/sql/upsert_001.sql b/src/test/regress/sql/upsert_001.sql index 0205e4b8ae878b51e9e7fa5f99864f1563b39ff6..2d510229cc51d61552c936f137ded2bc0f9a8792 100644 --- a/src/test/regress/sql/upsert_001.sql +++ b/src/test/regress/sql/upsert_001.sql @@ -145,9 +145,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); - --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) ; INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/sql/upsert_restriction.sql b/src/test/regress/sql/upsert_restriction.sql index 7c5c1012db596c26d916bf6484d03dfb458f6432..2d5bed7c105d87db99fb17caee06137cfb7521e1 100644 --- a/src/test/regress/sql/upsert_restriction.sql +++ b/src/test/regress/sql/upsert_restriction.sql @@ -9,7 +9,6 @@ index DEFERABLE index insert stmt - returning with with recur update stmt @@ -55,8 +54,6 @@ insert into up_neg_03 values(1,2,3) on duplicate key update c1 = 1; insert into up_neg_03 values(1) on duplicate key update c1 = 1; -- insert stmt -----returning -insert into up_neg_04 values(1,1,1) on duplicate key update c1 = 1 returning c1; ----with with sub as (select *from up_neg_04) insert into up_neg_04 select *from sub on duplicate key update c1 =1; @@ -65,9 +62,6 @@ insert into up_neg_04 select *from sub on duplicate key update c1 =1; with RECURSIVE sub as (select *from up_neg_04) insert into up_neg_04 select *from sub on duplicate key update c1 =1; -with sub as (select *from up_neg_04) -insert into up_neg_04 select *from sub on duplicate key update c1 =1 returning c1; - -- update stmt ---- VALUES with expr insert into up_neg_05 values(1,1,1,1) on duplicate key update c3 = values(1+100); diff --git a/src/test/regress/sql/upsert_returning.sql b/src/test/regress/sql/upsert_returning.sql new file mode 100644 index 0000000000000000000000000000000000000000..069b901ab67dd48a483d8a8f385ae6987fcfec3d --- /dev/null +++ b/src/test/regress/sql/upsert_returning.sql @@ -0,0 +1,40 @@ +DROP SCHEMA test_upsert_returning CASCADE; +CREATE SCHEMA test_upsert_returning; +SET CURRENT_SCHEMA TO test_upsert_returning; + + +CREATE TEMP TABLE foo (f1 int unique, f2 text, f3 int default 42); +CREATE TEMP TABLE foo_2 (a int, b int); +INSERT INTO foo_2 select generate_series(1, 5), generate_series(1, 2); + +INSERT INTO foo (f1, f2, f3) + VALUES (1, 'test', DEFAULT), (2, 'More', 11), (3, upper('more'), 7+9) ON DUPLICATE KEY UPDATE f3 = f3+1 + RETURNING *, f1+f3 AS sum; + +SELECT * FROM foo ORDER BY f1; + +with t as +( + INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f3 = f3+10 RETURNING foo.*, '1' +) +select * from t; + + +-- error +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f1 = f1+1 RETURNING f3, f2, f1, least(f1,f3); + +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f3 = f3+1 RETURNING f3, f2, f1, least(f1,f3); + +-- update nothing +INSERT INTO foo SELECT f1+1, f2, f3+2 FROM foo order by f3 ON DUPLICATE KEY UPDATE f3 = f3 RETURNING f3, f2, f1, least(f1,f3); + +SELECT * FROM foo ORDER BY f1; + +-- Subplans and initplans in the RETURNING list + +INSERT INTO foo SELECT f1+1, f2, f3+99 FROM foo order by 1, 2, 3 ON DUPLICATE KEY UPDATE f3 =f3+10 RETURNING *, f1 - 3 IN +(SELECT b FROM foo_2) AS subplan, EXISTS(SELECT * FROM foo_2) AS initplan; + +SELECT * FROM foo order by 1,2,3; + +DROP SCHEMA test_upsert_returning CASCADE; \ No newline at end of file diff --git a/src/test/regress/wastebin/expected/insert_update_001.out b/src/test/regress/wastebin/expected/insert_update_001.out index 138d68059fb71d6c486e4e1808db48a734d0e2e4..69cea0349dd854f12b10eef722fa2eeb139fa86d 100644 --- a/src/test/regress/wastebin/expected/insert_update_001.out +++ b/src/test/regress/wastebin/expected/insert_update_001.out @@ -271,9 +271,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) ; INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/wastebin/expected/insert_update_001_ustore.out b/src/test/regress/wastebin/expected/insert_update_001_ustore.out index 4390c6d958c83d685d521354a089b5ddad50ba24..f49d098323af8935fc63f2df1a45b22c90fe99b8 100644 --- a/src/test/regress/wastebin/expected/insert_update_001_ustore.out +++ b/src/test/regress/wastebin/expected/insert_update_001_ustore.out @@ -252,9 +252,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ERROR: WITH clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); -ERROR: RETURNING clause is not yet supported whithin INSERT ON DUPLICATE KEY UPDATE statement. --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) with(storage_type=ustore); INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/wastebin/sql/insert_update_001.sql b/src/test/regress/wastebin/sql/insert_update_001.sql index 031c4976c82f00aa06a803128233da3581d7d559..4e66781be0e81869115410bbe5c708ab15aa76cc 100644 --- a/src/test/regress/wastebin/sql/insert_update_001.sql +++ b/src/test/regress/wastebin/sql/insert_update_001.sql @@ -150,9 +150,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); - --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) ; INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/test/regress/wastebin/sql/insert_update_001_ustore.sql b/src/test/regress/wastebin/sql/insert_update_001_ustore.sql index 629bb0527dfd92baffb0721dc8e6bda5d8931576..6d29ebf3deec259d5a1a717ef14fa36b4fdaa2da 100644 --- a/src/test/regress/wastebin/sql/insert_update_001_ustore.sql +++ b/src/test/regress/wastebin/sql/insert_update_001_ustore.sql @@ -146,9 +146,6 @@ WITH RECURSIVE rq AS ) INSERT INTO t03 SELECT * FROM rq ON DUPLICATE KEY UPDATE col1 = rq.col1; ---- error: test returning clause -INSERT INTO t01 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 1 RETURNING NOT(1::bool); - --- error: distribute key are not allowed to UPDATE CREATE TABLE t04 (col1 INT, col2 INT) with(storage_type=ustore); INSERT INTO t04 VALUES (1) ON DUPLICATE KEY UPDATE col1 = 5; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a63c8616263a2447067341f649aca93ea6bafd80..e7b989c49a16460b16b4a14a0466a94674a3cbb3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -83,6 +83,8 @@ AnalyzeAttrComputeStatsFunc AnalyzeAttrFetchFunc AnalyzeForeignTable_function AnlIndexData +AnnIndexScan +AnnIndexScanState Append AppendPath AppendRelInfo