diff --git a/common_interfaces/base/common.h b/common_interfaces/base/common.h index 74ecf13ce1cbd8a7be2f5e51b04c1ff6d755a502..d2f908cf173894959fd20dc99c8693933758255d 100755 --- a/common_interfaces/base/common.h +++ b/common_interfaces/base/common.h @@ -96,56 +96,56 @@ namespace panda { NO_MOVE_OPERATOR_CC(TypeName) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_MOVE_CTOR_CC(TypeName) \ -/* NOLINTNEXTLINE(misc-macro-parentheses) */ \ -TypeName(TypeName &&) = default +#define DEFAULT_MOVE_CTOR_CC(TypeName) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + TypeName(TypeName &&) = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_MOVE_OPERATOR_CC(TypeName) \ -/* NOLINTNEXTLINE(misc-macro-parentheses) */ \ -TypeName &operator=(TypeName &&) = default +#define DEFAULT_MOVE_OPERATOR_CC(TypeName) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + TypeName &operator=(TypeName &&) = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_MOVE_SEMANTIC_CC(TypeName) \ -DEFAULT_MOVE_CTOR_CC(TypeName); \ -DEFAULT_MOVE_OPERATOR_CC(TypeName) +#define DEFAULT_MOVE_SEMANTIC_CC(TypeName) \ + DEFAULT_MOVE_CTOR_CC(TypeName); \ + DEFAULT_MOVE_OPERATOR_CC(TypeName) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define DEFAULT_COPY_CTOR_CC(TypeName) TypeName(const TypeName &) = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_COPY_OPERATOR_CC(TypeName) \ -/* NOLINTNEXTLINE(misc-macro-parentheses) */ \ -TypeName &operator=(const TypeName &) = default +#define DEFAULT_COPY_OPERATOR_CC(TypeName) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + TypeName &operator=(const TypeName &) = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_COPY_SEMANTIC_CC(TypeName) \ -DEFAULT_COPY_CTOR_CC(TypeName); \ -DEFAULT_COPY_OPERATOR_CC(TypeName) +#define DEFAULT_COPY_SEMANTIC_CC(TypeName) \ + DEFAULT_COPY_CTOR_CC(TypeName); \ + DEFAULT_COPY_OPERATOR_CC(TypeName) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define DEFAULT_NOEXCEPT_MOVE_CTOR_CC(TypeName) \ -/* NOLINTNEXTLINE(misc-macro-parentheses) */ \ -TypeName(TypeName &&) noexcept = default +#define DEFAULT_NOEXCEPT_MOVE_CTOR_CC(TypeName) \ + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + TypeName(TypeName &&) noexcept = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) \ -/* NOLINTNEXTLINE(misc-macro-parentheses) */ \ -TypeName &operator=(TypeName &&) noexcept = default + /* NOLINTNEXTLINE(misc-macro-parentheses) */ \ + TypeName &operator=(TypeName &&) noexcept = default // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define DEFAULT_NOEXCEPT_MOVE_SEMANTIC_CC(TypeName) \ -DEFAULT_NOEXCEPT_MOVE_CTOR_CC(TypeName); \ -DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) + DEFAULT_NOEXCEPT_MOVE_CTOR_CC(TypeName); \ + DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) #endif // defined(__cplusplus) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define ABORT_AND_UNREACHABLE_COMMON() \ - do { \ - std::cerr << "This line should be unreachable" << std::endl; \ - std::abort(); \ - __builtin_unreachable(); \ +#define ABORT_AND_UNREACHABLE_COMMON() \ + do { \ + std::cerr << "This line should be unreachable" << std::endl; \ + std::abort(); \ + __builtin_unreachable(); \ } while (0) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -154,11 +154,11 @@ DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) #if !defined(NDEBUG) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define ASSERT_FAIL_COMMON(expr) \ - do { \ - /* CC-OFFNXT(G.PRE.02) code readability */ \ - std::cerr << "ASSERT_FAILED: " << (expr) << std::endl; /* NOLINT(misc-static-assert) */ \ - __builtin_unreachable(); \ +#define ASSERT_FAIL_COMMON(expr) \ + do { \ + /* CC-OFFNXT(G.PRE.02) code readability */ \ + std::cerr << "ASSERT_FAILED: " << (expr) << std::endl; /* NOLINT(misc-static-assert) */ \ + __builtin_unreachable(); \ } while (0) // CC-OFFNXT(G.PRE.06) code readability @@ -191,28 +191,28 @@ DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define ASSERT_COMMON(cond) \ - if (UNLIKELY_CC(!(cond))) { \ - ASSERT_FAIL_COMMON(#cond); \ + if (UNLIKELY_CC(!(cond))) { \ + ASSERT_FAIL_COMMON(#cond); \ } // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define ASSERT_DO_COMMON(cond, func) \ - do { \ - /* CC-OFFNXT(G.PRE.02) code readability */ \ - if (auto cond_val = cond; UNLIKELY_CC(!(cond_val))) { \ - func; \ - ASSERT_FAIL_COMMON(#cond); \ - } \ +#define ASSERT_DO_COMMON(cond, func) \ + do { \ + /* CC-OFFNXT(G.PRE.02) code readability */ \ + if (auto cond_val = cond; UNLIKELY_CC(!(cond_val))) { \ + func; \ + ASSERT_FAIL_COMMON(#cond); \ + } \ } while (0) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define ASSERT_PRINT_COMMON(cond, message) \ - do { \ - /* CC-OFFNXT(G.PRE.02) code readability */ \ - if (auto cond_val = cond; UNLIKELY_CC(!(cond_val))) { \ - /* CC-OFFNXT(G.PRE.02) code readability */ \ - std::cerr << (message) << std::endl; \ - ASSERT_FAIL_COMMON(#cond); \ - } \ +#define ASSERT_PRINT_COMMON(cond, message) \ + do { \ + /* CC-OFFNXT(G.PRE.02) code readability */ \ + if (auto cond_val = cond; UNLIKELY_CC(!(cond_val))) { \ + /* CC-OFFNXT(G.PRE.02) code readability */ \ + std::cerr << (message) << std::endl; \ + ASSERT_FAIL_COMMON(#cond); \ + } \ } while (0) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define ASSERT_RETURN_COMMON(cond) assert(cond) @@ -223,20 +223,20 @@ DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) ASSERT_PRINT_COMMON(false, "This line should be unreachable"); /* NOLINT(misc-static-assert) */ \ __builtin_unreachable(); \ } while (0) -#else // NDEBUG -#define ASSERT_COMMON(cond) static_cast(0) // NOLINT(cppcoreguidelines-macro-usage) -#define ASSERT_DO_COMMON(cond, func) static_cast(0) // NOLINT(cppcoreguidelines-macro-usage) +#else // NDEBUG +#define ASSERT_COMMON(cond) static_cast(0) // NOLINT(cppcoreguidelines-macro-usage) +#define ASSERT_DO_COMMON(cond, func) static_cast(0) // NOLINT(cppcoreguidelines-macro-usage) #define ASSERT_PRINT_COMMON(cond, message) static_cast(0) // NOLINT(cppcoreguidelines-macro-usage) #define ASSERT_RETURN_COMMON(cond) static_cast(cond) // NOLINT(cppcoreguidelines-macro-usage) #define UNREACHABLE_COMMON() ABORT_AND_UNREACHABLE_COMMON() // NOLINT(cppcoreguidelines-macro-usage) -#define ASSERT_OP_COMMON(lhs, op, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_LE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_LT_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_GE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_GT_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_EQ_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define CHECK_NE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#endif // !NDEBUG +#define ASSERT_OP_COMMON(lhs, op, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_LE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_LT_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_GE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_GT_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_EQ_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define CHECK_NE_COMMON(lhs, rhs) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#endif // !NDEBUG #ifdef __clang__ // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -247,7 +247,7 @@ DEFAULT_NOEXCEPT_MOVE_OPERATOR_CC(TypeName) #endif // NOLINTNEXTLINE(readability-identifier-naming) -enum class PUBLIC_API LOG_LEVEL : uint8_t { +enum class LOG_LEVEL : uint8_t { DEBUG = 0, INFO = 1, WARN = 2, @@ -257,7 +257,7 @@ enum class PUBLIC_API LOG_LEVEL : uint8_t { }; namespace common { - static constexpr size_t BITS_PER_BYTE = 8; +static constexpr size_t BITS_PER_BYTE = 8; } // namespace common } // namespace panda diff --git a/common_interfaces/base/runtime_param.h b/common_interfaces/base/runtime_param.h index 18bb4916b954fc6942be0976160a6866d85c9a1e..7b5399fe06020c8edb65f026db3363a6fdeb6563 100644 --- a/common_interfaces/base/runtime_param.h +++ b/common_interfaces/base/runtime_param.h @@ -20,161 +20,161 @@ namespace panda { /* -* @struct HeapParam -* @brief Data structure for Arkcommon heap configuration parameters,\n -* including the heap size, region size at runtime, and etc. -*/ + * @struct HeapParam + * @brief Data structure for Arkcommon heap configuration parameters,\n + * including the heap size, region size at runtime, and etc. + */ struct HeapParam { /* - * The reference value of region size, measured in KB, default to 64 KB, must be in range [4KB, 64KB]. - * It will be set to default value if assigned with 0. - */ + * The reference value of region size, measured in KB, default to 64 KB, must be in range [4KB, 64KB]. + * It will be set to default value if assigned with 0. + */ size_t regionSize; /* - * The maximum size of arkcommon heap, measured in KB, default to 256 * 1024 KB, must >= 4MB. - * It will be set to default value if assigned with 0. - */ + * The maximum size of arkcommon heap, measured in KB, default to 256 * 1024 KB, must >= 4MB. + * It will be set to default value if assigned with 0. + */ size_t heapSize; /* - * Threshold used to determine whether a region is exempted (i.e., will not be forwarded). - * If the percentage of live objects in a region is greater than this value, this region will not be exempted. - * Default to 0.8, must be in range (0, 1]. - * It will be set to default value if assigned with 0. - */ + * Threshold used to determine whether a region is exempted (i.e., will not be forwarded). + * If the percentage of live objects in a region is greater than this value, this region will not be exempted. + * Default to 0.8, must be in range (0, 1]. + * It will be set to default value if assigned with 0. + */ double exemptionThreshold; /* - * A hint to guide collector to release physical memory to OS. - * heap utilization = heap-used-memory / total-heap-memory. - * During each gc, collector determines how much memory should be cached, - * and let the heap utilization be close to this value. - * Default to 0.80, must be in range (0, 1]. - * It will be set to default value if assigned with 0. - */ + * A hint to guide collector to release physical memory to OS. + * heap utilization = heap-used-memory / total-heap-memory. + * During each gc, collector determines how much memory should be cached, + * and let the heap utilization be close to this value. + * Default to 0.80, must be in range (0, 1]. + * It will be set to default value if assigned with 0. + */ double heapUtilization; /* - * The ratio to expand heap after each GC. - * GC is probably triggered more often if this value is set to an improperly small number. - * Default to 0.15, must > 0. - * It will be set to default value if assigned with 0. - */ + * The ratio to expand heap after each GC. + * GC is probably triggered more often if this value is set to an improperly small number. + * Default to 0.15, must > 0. + * It will be set to default value if assigned with 0. + */ double heapGrowth; /* - * The rate of allocating memory from heap. - * this value is the lower bound of the real allocation rate. - * allocator maybe wait some time if this value is set with an improperly small number. - * Mesured in MB/s, default to 10240 MB/s, must be > 0 MB/s. - * It will be set to default value if assigned with 0. - */ + * The rate of allocating memory from heap. + * this value is the lower bound of the real allocation rate. + * allocator maybe wait some time if this value is set with an improperly small number. + * Mesured in MB/s, default to 10240 MB/s, must be > 0 MB/s. + * It will be set to default value if assigned with 0. + */ double allocationRate; /* - * The maximum wait time when allocating memory from heap. - * The latter alloction will wait a number of time if the two alloction interval is less than the wait time. - * The real wait time is the minimum of allocationWaitTime and the wait time calculated from real alloction rate. - * Measured in ns, default to 1000 ns, must > 0 ns. - * It will be set to default value if assigned with 0. - */ + * The maximum wait time when allocating memory from heap. + * The latter alloction will wait a number of time if the two alloction interval is less than the wait time. + * The real wait time is the minimum of allocationWaitTime and the wait time calculated from real alloction rate. + * Measured in ns, default to 1000 ns, must > 0 ns. + * It will be set to default value if assigned with 0. + */ size_t allocationWaitTime; }; /* -* @struct GCParam -* @brief Data structure for Arkcommon garbage collection configuration parameters,\n -* including the garbage ratio, garbage collection interval and etc. -*/ + * @struct GCParam + * @brief Data structure for Arkcommon garbage collection configuration parameters,\n + * including the garbage ratio, garbage collection interval and etc. + */ struct GCParam { /* - * Set false to disable GC, default is true - */ + * Set false to disable GC, default is true + */ bool enableGC; /* - * Set true swicth to stop-the-world GC, set false swicth to concurrent-copying GC, default is false - */ + * Set true swicth to stop-the-world GC, set false swicth to concurrent-copying GC, default is false + */ bool enableStwGC; /* - * GC will be triggered when heap allocated size is greater than this threshold. - * Measured in KB, must be > 0. - */ + * GC will be triggered when heap allocated size is greater than this threshold. + * Measured in KB, must be > 0. + */ size_t gcThreshold; /* - * The threshold used to determine whether to collect from-space during GC. - * The from-space will be collected if the percentage of the garbage in from space is greater than this threshold. - * default to 0.5, must be in range [0.1, 1.0]. - */ + * The threshold used to determine whether to collect from-space during GC. + * The from-space will be collected if the percentage of the garbage in from space is greater than this threshold. + * default to 0.5, must be in range [0.1, 1.0]. + */ double garbageThreshold; /* - * Minimum interval each GC request will be responded. If two adjacent GC requests with - * interval less than this value, the latter one is ignored. - * Measured in ns, default to 150 ms, must be > 0 ms. - * It will be set default value if the value is 0. - */ + * Minimum interval each GC request will be responded. If two adjacent GC requests with + * interval less than this value, the latter one is ignored. + * Measured in ns, default to 150 ms, must be > 0 ms. + * It will be set default value if the value is 0. + */ uint64_t gcInterval; /* - * Minimum interval each backup GC request will be responded. - * Backup GC will be triggered if there is no GC during this interval. - * Measured in ns, default to 240 s, must be > 0 s. - * It will be set default value if the value is 0. - */ + * Minimum interval each backup GC request will be responded. + * Backup GC will be triggered if there is no GC during this interval. + * Measured in ns, default to 240 s, must be > 0 s. + * It will be set default value if the value is 0. + */ uint64_t backupGCInterval; /* - * Parameters for adjusting the number of GC threads. - * The number of gc threads is ((the hardware concurrency / this value) - 1). - * default to 8, must be > 0. - * It will be set default value if the value is 0. - */ + * Parameters for adjusting the number of GC threads. + * The number of gc threads is ((the hardware concurrency / this value) - 1). + * default to 8, must be > 0. + * It will be set default value if the value is 0. + */ uint32_t gcThreads; /* - * The maximum grow bytes of next heuristic gc threshold, the default value is 32MB; - */ + * The maximum grow bytes of next heuristic gc threshold, the default value is 32MB; + */ size_t maxGrowBytes; /* - * The minimum grow bytes of next heuristic gc threshold, the default value is 8MB; - */ + * The minimum grow bytes of next heuristic gc threshold, the default value is 8MB; + */ size_t minGrowBytes; /* - * Heruistic gc grow bytes multiplier, The default value of foreground is 1.0, background is 2.0. - */ + * Heruistic gc grow bytes multiplier, The default value of foreground is 1.0, background is 2.0. + */ double multiplier; /* - * Young gc throughput adjustment factor, the default value is 0.5. - */ + * Young gc throughput adjustment factor, the default value is 0.5. + */ double ygcRateAdjustment; /* - * The minimum remaining bytes for next heuristic gc, the default value is 128KB. - */ + * The minimum remaining bytes for next heuristic gc, the default value is 128KB. + */ size_t kMinConcurrentRemainingBytes; /* - * The maximum remaining bytes for next heuristic gc, the default value is 512KB. - */ + * The maximum remaining bytes for next heuristic gc, the default value is 512KB. + */ size_t kMaxConcurrentRemainingBytes; }; /* -* @struct RuntimeParam -* @brief Data structure for Arkcommon runtime parameters,\n -* including the config information of heap, garbage collection, thread and log. -*/ + * @struct RuntimeParam + * @brief Data structure for Arkcommon runtime parameters,\n + * including the config information of heap, garbage collection, thread and log. + */ struct RuntimeParam { struct HeapParam heapParam; struct GCParam gcParam; }; -} // panda +} // namespace panda #endif // COMMON_INTERFACES_BASE_RUNTIME_PARAM_H diff --git a/common_interfaces/base_runtime.h b/common_interfaces/base_runtime.h index 6da45882952a192550f78da28676675aa48ff632..942f9b17cd74ba2a5c6abdead06c82bc6613a3a8 100644 --- a/common_interfaces/base_runtime.h +++ b/common_interfaces/base_runtime.h @@ -35,11 +35,11 @@ enum class GcType : uint8_t { SYNC, FULL, // Waiting finish APPSPAWN, - FULL_WITH_XREF, // Waiting finish + FULL_WITH_XREF, // Waiting finish }; -using HeapVisitor = const std::function; +using HeapVisitor = const std::function; -class PUBLIC_API BaseRuntime { +class PUBLIC_API BaseRuntime { // NOLINT(cppcoreguidelines-special-member-functions) public: BaseRuntime() = default; ~BaseRuntime() = default; @@ -51,19 +51,19 @@ public: void PostFork(); bool HasBeenInitialized(); - void Init(const RuntimeParam ¶m); // Support setting custom parameters - void Init(); // Use default parameters + void Init(const RuntimeParam ¶m); // Support setting custom parameters + void Init(); // Use default parameters void InitFromDynamic(const RuntimeParam ¶m); void Fini(); void FiniFromDynamic(); // Need refactor, move to other file - static void WriteBarrier(void* obj, void* field, void* ref); - static void* ReadBarrier(void* obj, void* field); - static void* ReadBarrier(void* field); - static void* AtomicReadBarrier(void* obj, void* field, std::memory_order order); + static void WriteBarrier(void *obj, void *field, void *ref); + static void *ReadBarrier(void *obj, void *field); + static void *ReadBarrier(void *field); + static void *AtomicReadBarrier(void *obj, void *field, std::memory_order order); static void RequestGC(GcType type); - static bool ForEachObj(HeapVisitor& visitor, bool safe); + static bool ForEachObj(HeapVisitor &visitor, bool safe); static void WaitForGCFinish(); HeapParam &GetHeapParam() @@ -90,17 +90,18 @@ public: { return *heapManager_; } + private: RuntimeParam param_ {}; - HeapManager* heapManager_ = nullptr; - LogManager* logManager_ = nullptr; - MutatorManager* mutatorManager_ = nullptr; - ThreadHolderManager* threadHolderManager_ = nullptr; + HeapManager *heapManager_ = nullptr; + LogManager *logManager_ = nullptr; + MutatorManager *mutatorManager_ = nullptr; + ThreadHolderManager *threadHolderManager_ = nullptr; static std::mutex vmCreationLock_; static BaseRuntime *baseRuntimeInstance_; static bool initialized_; }; } // namespace panda -#endif // COMMON_INTERFACES_BASE_RUNTIME_H +#endif // COMMON_INTERFACES_BASE_RUNTIME_H diff --git a/common_interfaces/objects/readonly_handle.h b/common_interfaces/objects/readonly_handle.h index ec5955a6e34f473a9cd3ca84c3f79f88df13fbf5..ba8c71228bff387be50ec86f68458f218d4c7560 100644 --- a/common_interfaces/objects/readonly_handle.h +++ b/common_interfaces/objects/readonly_handle.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -50,7 +50,7 @@ public: return ReadOnlyHandle(handle.GetAddress()); } - BaseObject* GetBaseObject() const + BaseObject *GetBaseObject() const { if (GetAddress() == 0U) { return nullptr; @@ -89,9 +89,10 @@ public: { return reinterpret_cast(GetBaseObject()); } + private: uintptr_t address_; }; -} +} // namespace panda -#endif //COMMON_INTERFACES_OBJECTS_READONLY_HANDLE_H +#endif // COMMON_INTERFACES_OBJECTS_READONLY_HANDLE_H diff --git a/common_interfaces/objects/string/base_string-inl.h b/common_interfaces/objects/string/base_string-inl.h index 2d26298961885b8812e5a3d39866ea5b0716844e..dda0bc7516da38222b4b5943715609411e162418 100644 --- a/common_interfaces/objects/string/base_string-inl.h +++ b/common_interfaces/objects/string/base_string-inl.h @@ -24,12 +24,13 @@ #include "objects/utils/span.h" #include "securec.h" +// NOLINTBEGIN(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) namespace panda { template size_t BaseString::GetUtf8Length(ReadBarrier &&readBarrier, bool modify, bool isGetBufferSize) const { if (!IsUtf16()) { - return GetLength() + 1; // add place for zero in the end + return GetLength() + 1; // add place for zero in the end } std::vector tmpBuf; const uint16_t *data = GetUtf16DataFlat(std::forward(readBarrier), this, tmpBuf); @@ -71,22 +72,22 @@ inline ObjectType BaseString::GetStringType() const return type; } -template +template uint16_t BaseString::At(ReadBarrier &&readBarrier, int32_t index) const { - int32_t length = static_cast(GetLength()); - if constexpr (verify) { + int32_t length = static_cast(GetLength()); // NOLINT(modernize-use-auto) + if constexpr (VERIFY) { if ((index < 0) || (index >= length)) { return 0; } } switch (GetStringType()) { case ObjectType::LINE_STRING: - return LineString::ConstCast(this)->Get(index); + return LineString::ConstCast(this)->Get(index); case ObjectType::SLICED_STRING: - return SlicedString::ConstCast(this)->Get(std::forward(readBarrier), index); + return SlicedString::ConstCast(this)->Get(std::forward(readBarrier), index); case ObjectType::TREE_STRING: - return TreeString::ConstCast(this)->Get(std::forward(readBarrier), index); + return TreeString::ConstCast(this)->Get(std::forward(readBarrier), index); default: UNREACHABLE_COMMON(); } @@ -153,10 +154,10 @@ void BaseString::WriteToFlat(ReadBarrier &&readBarrier, const BaseString *src, C } case ObjectType::TREE_STRING: { const TreeString *treeSrc = TreeString::ConstCast(src); - BaseString *left = BaseString::Cast( - treeSrc->GetLeftSubString(std::forward(readBarrier))); - BaseString *right = BaseString::Cast( - treeSrc->GetRightSubString(std::forward(readBarrier))); + BaseString *left = + BaseString::Cast(treeSrc->GetLeftSubString(std::forward(readBarrier))); + BaseString *right = + BaseString::Cast(treeSrc->GetRightSubString(std::forward(readBarrier))); uint32_t leftLength = left->GetLength(); uint32_t rightLength = right->GetLength(); // NOLINTNEXTLINE(C_RULE_ID_FUNCTION_NESTING_LEVEL) @@ -181,8 +182,8 @@ void BaseString::WriteToFlat(ReadBarrier &&readBarrier, const BaseString *src, C // CC-OFFNXT(G.FUN.01-CPP) solid logic if (length > leftLength) { if (rightLength == 1) { - buf[leftLength] = static_cast(right->At( - std::forward(readBarrier), 0)); + buf[leftLength] = + static_cast(right->At(std::forward(readBarrier), 0)); } else if ((right->IsLineString()) && right->IsUtf8()) { CopyChars(buf + leftLength, LineString::Cast(right)->GetDataUtf8(), rightLength); } else { @@ -200,13 +201,13 @@ void BaseString::WriteToFlat(ReadBarrier &&readBarrier, const BaseString *src, C BaseString *parent = BaseString::Cast( SlicedString::ConstCast(src)->GetParent(std::forward(readBarrier))); if (src->IsUtf8()) { - CopyChars( - buf, LineString::Cast(parent)->GetDataUtf8() + SlicedString::ConstCast(src)->GetStartIndex(), - length); + CopyChars(buf, + LineString::Cast(parent)->GetDataUtf8() + SlicedString::ConstCast(src)->GetStartIndex(), + length); } else { - CopyChars( - buf, LineString::Cast(parent)->GetDataUtf16() + SlicedString::ConstCast(src)->GetStartIndex(), - length); + CopyChars(buf, + LineString::Cast(parent)->GetDataUtf16() + SlicedString::ConstCast(src)->GetStartIndex(), + length); } return; } @@ -216,13 +217,12 @@ void BaseString::WriteToFlat(ReadBarrier &&readBarrier, const BaseString *src, C } } - template void BaseString::WriteToFlatWithPos(ReadBarrier &&readBarrier, BaseString *src, Char *buf, uint32_t length, uint32_t pos) { // DISALLOW_GARBAGE_COLLECTION; - [[ maybe_unused ]] uint32_t maxLength = src->GetLength(); + [[maybe_unused]] uint32_t maxLength = src->GetLength(); if (length == 0) { return; } @@ -241,8 +241,8 @@ void BaseString::WriteToFlatWithPos(ReadBarrier &&readBarrier, BaseString *src, } case ObjectType::TREE_STRING: { TreeString *treeSrc = TreeString::Cast(src); - BaseString *left = BaseString::Cast( - treeSrc->GetLeftSubString(std::forward(readBarrier))); + BaseString *left = + BaseString::Cast(treeSrc->GetLeftSubString(std::forward(readBarrier))); ASSERT_COMMON(left->IsLineString()); src = left; continue; @@ -251,13 +251,13 @@ void BaseString::WriteToFlatWithPos(ReadBarrier &&readBarrier, BaseString *src, BaseString *parent = BaseString::Cast( SlicedString::Cast(src)->GetParent(std::forward(readBarrier))); if (src->IsUtf8()) { - CopyChars( - buf, LineString::Cast(parent)->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex() + pos, - length); + CopyChars(buf, + LineString::Cast(parent)->GetDataUtf8() + SlicedString::Cast(src)->GetStartIndex() + pos, + length); } else { - CopyChars( - buf, LineString::Cast(parent)->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex() + pos, - length); + CopyChars(buf, + LineString::Cast(parent)->GetDataUtf16() + SlicedString::Cast(src)->GetStartIndex() + pos, + length); } return; } @@ -272,18 +272,17 @@ template size_t BaseString::WriteUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength, bool isWriteBuffer) const { if (maxLength == 0) { - return 1; // maxLength was -1 at napi + return 1; // maxLength was -1 at napi } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) buf[maxLength - 1] = '\0'; return CopyDataRegionUtf8(std::forward(readBarrier), buf, 0, GetLength(), maxLength, true, - isWriteBuffer) + 1; + isWriteBuffer) + + 1; } // It allows user to copy into buffer even if maxLength < length template -size_t BaseString::WriteUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t targetLength, - uint32_t bufLength) const +size_t BaseString::WriteUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const { if (bufLength == 0) { return 0; @@ -292,14 +291,12 @@ size_t BaseString::WriteUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t return CopyDataToUtf16(std::forward(readBarrier), buf, targetLength, bufLength); } - template size_t BaseString::WriteOneByte(ReadBarrier &&readBarrier, uint8_t *buf, size_t maxLength) const { if (maxLength == 0) { return 0; } - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) buf[maxLength - 1] = '\0'; uint32_t length = GetLength(); if (!IsUtf16()) { @@ -322,7 +319,6 @@ size_t BaseString::WriteOneByte(ReadBarrier &&readBarrier, uint8_t *buf, size_t return panda::utf_utils::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); } - template uint32_t BaseString::CopyDataUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t maxLength) const { @@ -331,7 +327,6 @@ uint32_t BaseString::CopyDataUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uin return 0; } if (IsUtf16()) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) std::vector tmpBuf; const uint16_t *data = GetUtf16DataFlat(std::forward(readBarrier), this, tmpBuf); if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { @@ -344,7 +339,6 @@ uint32_t BaseString::CopyDataUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uin return panda::utf_utils::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength); } - template , uint8_t>, int>> common::Span BaseString::ToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify, bool cesu8) @@ -367,7 +361,6 @@ common::Span BaseString::ToUtf8Span(ReadBarrier &&readBarrier, Ve return str; } - template , uint8_t>, int>> common::Span BaseString::DebuggerToUtf8Span(ReadBarrier &&readBarrier, Vec &buf, bool modify) @@ -423,7 +416,6 @@ const uint8_t *BaseString::GetUtf8DataFlat(ReadBarrier &&readBarrier, const Base return LineString::ConstCast(src)->GetDataUtf8(); } - template size_t BaseString::CopyDataRegionUtf8(ReadBarrier &&readBarrier, uint8_t *buf, size_t start, size_t length, size_t maxLength, bool modify, bool isWriteBuffer) const @@ -448,16 +440,14 @@ size_t BaseString::CopyDataRegionUtf8(ReadBarrier &&readBarrier, uint8_t *buf, s std::vector tmpBuf; const uint16_t *data = GetUtf16DataFlat(std::forward(readBarrier), this, tmpBuf); if (length > maxLength) { - return panda::utf_utils::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, - modify, isWriteBuffer); + return panda::utf_utils::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, modify, + isWriteBuffer); } - return panda::utf_utils::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, - modify, isWriteBuffer); + return panda::utf_utils::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, modify, isWriteBuffer); } template -size_t BaseString::CopyDataToUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t length, - uint32_t bufLength) const +size_t BaseString::CopyDataToUtf16(ReadBarrier &&readBarrier, uint16_t *buf, uint32_t length, uint32_t bufLength) const { if (IsUtf16()) { std::vector tmpBuf; @@ -542,7 +532,7 @@ uint32_t BaseString::ComputeRawHashcode(ReadBarrier &&readBarrier) const const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward(readBarrier), this, buf); // String can not convert to integer number, using normal hashcode computing algorithm. return ComputeHashForData(data, length, 0); - } else { + } else { // NOLINT(readability-else-after-return) std::vector buf; const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward(readBarrier), this, buf); // If rawSeed has certain value, and second string uses UTF16 encoding, @@ -551,7 +541,6 @@ uint32_t BaseString::ComputeRawHashcode(ReadBarrier &&readBarrier) const } } - template bool BaseString::EqualToSplicedString(ReadBarrier &&readBarrier, const BaseString *str1, const BaseString *str2) { @@ -573,8 +562,7 @@ bool BaseString::EqualToSplicedString(ReadBarrier &&readBarrier, const BaseStrin if (BaseString::StringIsEqualUint8Data(std::forward(readBarrier), str1, data, str1->GetLength(), this->IsUtf8())) { return BaseString::StringIsEqualUint8Data(std::forward(readBarrier), str2, - data + str1->GetLength(), - str2->GetLength(), this->IsUtf8()); + data + str1->GetLength(), str2->GetLength(), this->IsUtf8()); } } return false; @@ -611,7 +599,6 @@ const uint8_t *BaseString::GetNonTreeUtf8Data(ReadBarrier &&readBarrier, const B return LineString::ConstCast(src)->GetDataUtf8(); } - template const uint16_t *BaseString::GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const BaseString *src) { @@ -626,7 +613,6 @@ const uint16_t *BaseString::GetNonTreeUtf16Data(ReadBarrier &&readBarrier, const return LineString::ConstCast(src)->GetDataUtf16(); } - /* static */ template bool BaseString::StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *left, BaseString *right) @@ -635,33 +621,33 @@ bool BaseString::StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseS std::vector bufRightUft16; std::vector bufLeftUft8; std::vector bufRightUft8; - int32_t lhsCount = static_cast(left->GetLength()); - int32_t rhsCount = static_cast(right->GetLength()); + int32_t lhsCount = static_cast(left->GetLength()); // NOLINT(modernize-use-auto) + int32_t rhsCount = static_cast(right->GetLength()); // NOLINT(modernize-use-auto) if (!left->IsUtf16() && !right->IsUtf16()) { const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward(readBarrier), left, bufLeftUft8); const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward(readBarrier), right, bufRightUft8); common::Span lhsSp(data1, lhsCount); common::Span rhsSp(data2, rhsCount); return BaseString::StringsAreEquals(lhsSp, rhsSp); - } else if (!left->IsUtf16()) { + } else if (!left->IsUtf16()) { // NOLINT(readability-else-after-return) const uint8_t *data1 = BaseString::GetUtf8DataFlat(std::forward(readBarrier), left, bufLeftUft8); - const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward(readBarrier), right, - bufRightUft16); + const uint16_t *data2 = + BaseString::GetUtf16DataFlat(std::forward(readBarrier), right, bufRightUft16); common::Span lhsSp(data1, lhsCount); common::Span rhsSp(data2, rhsCount); return BaseString::StringsAreEquals(lhsSp, rhsSp); - } else if (!right->IsUtf16()) { + } else if (!right->IsUtf16()) { // NOLINT(readability-else-after-return) const uint16_t *data1 = BaseString::GetUtf16DataFlat(std::forward(readBarrier), left, bufLeftUft16); const uint8_t *data2 = BaseString::GetUtf8DataFlat(std::forward(readBarrier), right, bufRightUft8); common::Span lhsSp(data1, lhsCount); common::Span rhsSp(data2, rhsCount); return BaseString::StringsAreEquals(lhsSp, rhsSp); - } else { + } else { // NOLINT(readability-else-after-return) const uint16_t *data1 = BaseString::GetUtf16DataFlat(std::forward(readBarrier), left, bufLeftUft16); - const uint16_t *data2 = BaseString::GetUtf16DataFlat(std::forward(readBarrier), right, - bufRightUft16); + const uint16_t *data2 = + BaseString::GetUtf16DataFlat(std::forward(readBarrier), right, bufRightUft16); common::Span lhsSp(data1, lhsCount); common::Span rhsSp(data2, rhsCount); return BaseString::StringsAreEquals(lhsSp, rhsSp); @@ -695,7 +681,6 @@ bool BaseString::StringsAreEqual(ReadBarrier &&readBarrier, BaseString *str1, Ba return StringsAreEqualDiffUtfEncoding(std::forward(readBarrier), str1, str2); } - /* static */ template bool BaseString::StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseString *str1, const uint8_t *dataAddr, @@ -710,8 +695,7 @@ bool BaseString::StringIsEqualUint8Data(ReadBarrier &&readBarrier, const BaseStr if (str1->IsUtf8()) { std::vector buf; common::Span data1( - BaseString::GetUtf8DataFlat(std::forward(readBarrier), str1, buf), - dataLen); + BaseString::GetUtf8DataFlat(std::forward(readBarrier), str1, buf), dataLen); common::Span data2(dataAddr, dataLen); return BaseString::StringsAreEquals(data1, data2); } @@ -734,7 +718,7 @@ bool BaseString::StringsAreEqualUtf16(ReadBarrier &&readBarrier, const BaseStrin std::vector buf; const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward(readBarrier), str1, buf); return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len); - } else { + } else { // NOLINT(readability-else-after-return) std::vector buf; common::Span data1( BaseString::GetUtf16DataFlat(std::forward(readBarrier), str1, buf), length); @@ -784,7 +768,7 @@ int32_t BaseString::LastIndexOf(common::Span &lhsSp, common::Span int32_t BaseString::IndexOf(common::Span &lhsSp, common::Span &rhsSp, int32_t pos, int32_t max) { - ASSERT_COMMON(rhsSp.size() > 0); + ASSERT_COMMON(rhsSp.size() > 0); // NOLINT(readability-container-size-empty) auto first = static_cast(rhsSp[0]); for (int32_t i = pos; i <= max; i++) { if (static_cast(lhsSp[i]) != first) { @@ -815,12 +799,12 @@ int32_t BaseString::IndexOf(common::Span &lhsSp, common::Span tmpBuffer(utf16Len); - [[maybe_unused]] auto len = panda::utf_utils::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, - utf16Len); + [[maybe_unused]] auto len = + panda::utf_utils::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf16Len); ASSERT_COMMON(len == utf16Len); return ComputeHashForData(tmpBuffer.data(), utf16Len, 0); } @@ -915,7 +900,6 @@ static size_t FixUtf8Len(const uint8_t *utf8, size_t utf8Len) return utf8Len - trimSize; } - /* static */ // CC-OFFNXT(C_RULE_ID_INLINE_FUNCTION_SIZE) Perf critical common runtime code stub // CC-OFFNXT(G.FUD.06) perf critical @@ -940,7 +924,7 @@ inline bool BaseString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Le const uint16_t *utf16End = utf16Data + utf16Len; while (utf8Data < utf8SafeEnd && utf16Data < utf16End) { uint8_t src = *utf8Data; - switch (src & 0xF0) { + switch (src & 0xF0) { // NOLINT(hicpp-signed-bitwise) case 0xF0: { const uint8_t c2 = *(++utf8Data); const uint8_t c3 = *(++utf8Data); @@ -957,6 +941,7 @@ inline bool BaseString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Le if (*utf16Data++ != static_cast((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) { return false; // CC-OFFNXT(G.FUN.01-CPP) solid logic + // NOLINTNEXTLINE(hicpp-signed-bitwise, readability-else-after-return) } else if (*utf16Data++ != static_cast((codePoint & 0x3FF) | L_SURROGATE_START)) { return false; } @@ -1007,6 +992,8 @@ inline bool BaseString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Le } return utf8Data == utf8End && utf16Data == utf16End; } -} // namespace panda::ecmascript +} // namespace panda + +#endif // COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H -#endif //COMMON_INTERFACES_OBJECTS_STRING_BASE_STRING_IMPL_H \ No newline at end of file +// NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) \ No newline at end of file diff --git a/common_interfaces/objects/string/base_string.h b/common_interfaces/objects/string/base_string.h index 3d5eeb637cba6130e6d961db9dab16be54e6cbf4..45f72eee97e6f444164a254c9507223a1f3cbbc5 100644 --- a/common_interfaces/objects/string/base_string.h +++ b/common_interfaces/objects/string/base_string.h @@ -57,6 +57,7 @@ class SlicedString; * * Provides common interface for string types like LineString, TreeString, and SlicedString. */ +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class BaseString : public BaseObject { public: BASE_CAST_CHECK(BaseString, IsString); @@ -221,7 +222,7 @@ public: * @param index The index to retrieve. * @return UTF-16 code unit. */ - template + template uint16_t At(ReadBarrier &&readBarrier, int32_t index) const; /** @@ -486,7 +487,7 @@ public: * @return true if the strings are equal in character content; false otherwise. */ template - static bool StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *str1, BaseString *str2); + static bool StringsAreEqualDiffUtfEncoding(ReadBarrier &&readBarrier, BaseString *left, BaseString *right); /** * @brief Compare a BaseString with raw UTF-8 data for equality. diff --git a/common_interfaces/objects/string/line_string-inl.h b/common_interfaces/objects/string/line_string-inl.h index 65bc52f376b8e6ef44cc143e84864566f01557b7..87144b86a923b00f91314b59f40e85918b4d78d1 100644 --- a/common_interfaces/objects/string/line_string-inl.h +++ b/common_interfaces/objects/string/line_string-inl.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,6 +20,7 @@ #include "objects/string/line_string.h" #include "objects/utils/utf_utils.h" #include "securec.h" +#include namespace panda { template > @@ -30,14 +31,14 @@ LineString *LineString::CreateFromUtf8(Allocator &&allocator, const uint8_t *utf if (canBeCompress) { string = Create(std::forward(allocator), utf8Len, true); ASSERT_COMMON(string != nullptr); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) std::copy(utf8Data, utf8Data + utf8Len, LineString::Cast(string)->GetDataUtf8Writable()); } else { auto utf16Len = panda::utf_utils::Utf8ToUtf16Size(utf8Data, utf8Len); string = Create(allocator, utf16Len, false); ASSERT_COMMON(string != nullptr); - [[maybe_unused]] auto len = - panda::utf_utils::ConvertRegionUtf8ToUtf16(utf8Data, LineString::Cast(string)->GetDataUtf16Writable(), - utf8Len, utf16Len); + [[maybe_unused]] auto len = panda::utf_utils::ConvertRegionUtf8ToUtf16( + utf8Data, LineString::Cast(string)->GetDataUtf16Writable(), utf8Len, utf16Len); ASSERT_COMMON(len == utf16Len); } @@ -47,13 +48,14 @@ LineString *LineString::CreateFromUtf8(Allocator &&allocator, const uint8_t *utf template > LineString *LineString::CreateFromUtf8CompressedSubString(Allocator &&allocator, - const ReadOnlyHandle string, - uint32_t offset, uint32_t utf8Len) + const ReadOnlyHandle string, uint32_t offset, + uint32_t utf8Len) { LineString *subString = Create(std::forward(allocator), utf8Len, true); ASSERT_COMMON(subString != nullptr); - + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) auto *utf8Data = ReadOnlyHandle::Cast(string)->GetDataUtf8() + offset; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) std::copy(utf8Data, utf8Data + utf8Len, subString->GetDataUtf8Writable()); ASSERT_PRINT_COMMON(LineString::CanBeCompressed(subString), "String cannot be compressed!"); return subString; @@ -112,11 +114,11 @@ inline size_t LineString::DataSize(BaseString *str) return str->IsUtf16() ? length * sizeof(uint16_t) : length; } -template +template uint16_t LineString::Get(int32_t index) const { - int32_t length = static_cast(GetLength()); - if constexpr (verify) { + auto length = static_cast(GetLength()); + if constexpr (VERIFY) { if ((index < 0) || (index >= length)) { return 0; } @@ -155,9 +157,9 @@ void LineString::WriteData(ReadBarrier &&readBarrier, BaseString *src, uint32_t UNREACHABLE_COMMON(); } } else if (src->IsUtf8()) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) std::vector buf; const uint8_t *data = BaseString::GetUtf8DataFlat(std::forward(readBarrier), src, buf); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) common::Span to(GetDataUtf16Writable() + start, length); common::Span from(data, length); for (uint32_t i = 0; i < length; i++) { @@ -167,8 +169,8 @@ void LineString::WriteData(ReadBarrier &&readBarrier, BaseString *src, uint32_t std::vector buf; const uint16_t *data = BaseString::GetUtf16DataFlat(std::forward(readBarrier), src, buf); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (length != 0 && memcpy_s(GetDataUtf16Writable() + start, - destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { + if (length != 0 && memcpy_s(GetDataUtf16Writable() + start, destSize * sizeof(uint16_t), data, + length * sizeof(uint16_t)) != EOK) { UNREACHABLE_COMMON(); } } @@ -181,5 +183,6 @@ inline bool LineString::CanBeCompressed(const LineString *string) } return BaseString::CanBeCompressed(string->GetDataUtf16(), string->GetLength()); } -} -#endif //COMMON_INTERFACES_OBJECTS_STRING_LINE_STRING_INL_H \ No newline at end of file +} // namespace panda + +#endif // COMMON_INTERFACES_OBJECTS_STRING_LINE_STRING_INL_H \ No newline at end of file diff --git a/common_interfaces/objects/string/line_string.h b/common_interfaces/objects/string/line_string.h index 2ab355214056a2234133c0ab353f035296b05531..2a34645f2f2e6a9c3aa1af9f33798d84be5b0ba5 100644 --- a/common_interfaces/objects/string/line_string.h +++ b/common_interfaces/objects/string/line_string.h @@ -42,16 +42,16 @@ namespace panda { * Derived from BaseString, this class is used to store and manage character sequences * directly within the object memory layout. It supports UTF-8 and UTF-16 encodings. */ -class LineString : public BaseString { +class LineString : public BaseString { // NOLINT(cppcoreguidelines-special-member-functions) public: BASE_CAST_CHECK(LineString, IsLineString); NO_MOVE_SEMANTIC_CC(LineString); NO_COPY_SEMANTIC_CC(LineString); - static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; + static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; // qNOLINT(hicpp-signed-bitwise) // DATA_OFFSET: the string data stored after the string header. // Data can be stored in utf8 or utf16 form according to compressed bit. - static constexpr size_t DATA_OFFSET = BaseString::SIZE; // DATA_OFFSET equal to Empty String size + static constexpr size_t DATA_OFFSET = BaseString::SIZE; // DATA_OFFSET equal to Empty String size /** * @brief Create a line string from UTF-8 encoded data. @@ -62,8 +62,8 @@ public: * @param canBeCompress Whether data is ASCII-only and compressible. * @return Pointer to created LineString instance. */ - template = 0> - static LineString *CreateFromUtf8(Allocator &&allocate, const uint8_t *utf8Data, uint32_t utf8Len, + template = 0> + static LineString *CreateFromUtf8(Allocator &&allocator, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress); /** @@ -75,7 +75,7 @@ public: * @param utf8Len Number of bytes to extract. * @return Pointer to created LineString instance. */ - template = 0> + template = 0> static LineString *CreateFromUtf8CompressedSubString(Allocator &&allocator, const ReadOnlyHandle string, uint32_t offset, uint32_t utf8Len); @@ -87,7 +87,7 @@ public: * @param compressed Whether to use UTF-8 compression. * @return LineString pointer. */ - template = 0> + template = 0> static LineString *Create(Allocator &&allocator, size_t length, bool compressed); /** @@ -99,7 +99,7 @@ public: * @param canBeCompress Whether string can be compressed. * @return Pointer to created LineString instance. */ - template = 0> + template = 0> static LineString *CreateFromUtf16(Allocator &&allocator, const uint16_t *utf16Data, uint32_t utf16Len, bool canBeCompress); @@ -140,7 +140,7 @@ public: * @param index Index into the character buffer. * @return UTF-16 code unit at the given index. */ - template + template uint16_t Get(int32_t index) const; /** @@ -201,5 +201,5 @@ public: */ static bool CanBeCompressed(const LineString *string); }; -} // namespace panda +} // namespace panda #endif // COMMON_INTERFACES_OBJECTS_STRING_LINE_STRING_H \ No newline at end of file diff --git a/common_interfaces/objects/string/sliced_string-inl.h b/common_interfaces/objects/string/sliced_string-inl.h index c241bae9d310c5382c6fd00b4f6ee06232b917eb..6e2a8b0fca6a65c484ed61456dfa0894fe6bd6d7 100644 --- a/common_interfaces/objects/string/sliced_string-inl.h +++ b/common_interfaces/objects/string/sliced_string-inl.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,14 +20,13 @@ #include "objects/string/sliced_string.h" namespace panda { -template , +template , objects_traits::enable_if_is_write_barrier> -SlicedString *SlicedString::Create(Allocator &&allocate, WriteBarrier &&writeBarrier, +SlicedString *SlicedString::Create(Allocator &&allocator, WriteBarrier &&writeBarrier, ReadOnlyHandle parent) { SlicedString *slicedString = SlicedString::Cast( - std::invoke(std::forward(allocate), SlicedString::SIZE, ObjectType::SLICED_STRING)); + std::invoke(std::forward(allocator), SlicedString::SIZE, ObjectType::SLICED_STRING)); slicedString->SetRawHashcode(0); slicedString->SetParent(std::forward(writeBarrier), parent.GetBaseObject()); return slicedString; @@ -61,11 +60,11 @@ inline void SlicedString::SetHasBackingStore(bool hasBackingStore) } // Minimum length for a sliced string -template +template uint16_t SlicedString::Get(ReadBarrier &&readBarrier, int32_t index) const { - int32_t length = static_cast(GetLength()); - if constexpr (verify) { + auto length = static_cast(GetLength()); + if constexpr (VERIFY) { if ((index < 0) || (index >= length)) { return 0; } @@ -73,11 +72,13 @@ uint16_t SlicedString::Get(ReadBarrier &&readBarrier, int32_t index) const LineString *parent = LineString::Cast(GetParent(std::forward(readBarrier))); ASSERT_COMMON(parent->IsLineString()); if (parent->IsUtf8()) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) common::Span sp(parent->GetDataUtf8() + GetStartIndex(), length); return sp[index]; } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) common::Span sp(parent->GetDataUtf16() + GetStartIndex(), length); return sp[index]; } -} -#endif //COMMON_INTERFACES_OBJECTS_STRING_SLICED_STRING_INL_H \ No newline at end of file +} // namespace panda +#endif // COMMON_INTERFACES_OBJECTS_STRING_SLICED_STRING_INL_H \ No newline at end of file diff --git a/common_interfaces/objects/string/sliced_string.h b/common_interfaces/objects/string/sliced_string.h index d1f9ed809200e867a3d9a87dda15af871d9c4cf2..918cc065c28d54abf67f7a2a652869d227f232ee 100644 --- a/common_interfaces/objects/string/sliced_string.h +++ b/common_interfaces/objects/string/sliced_string.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -53,7 +53,8 @@ namespace panda { * Used for substring operations, SlicedString holds a reference to a parent BaseString * and an offset indicating the starting index of the slice. */ -class SlicedString : public BaseString { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class SlicedString : public BaseString { // NOLINT(readability-inconsistent-declaration-parameter-name) public: BASE_CAST_CHECK(SlicedString, IsSlicedString); NO_MOVE_SEMANTIC_CC(SlicedString); @@ -61,11 +62,12 @@ public: static constexpr uint32_t MIN_SLICED_STRING_LENGTH = 13; static constexpr size_t PARENT_OFFSET = BaseString::SIZE; static constexpr uint32_t START_INDEX_BITS_NUM = 30U; - using HasBackingStoreBit = common::BitField; // 1 - using ReserveBit = HasBackingStoreBit::NextFlag; // 1 - using StartIndexBits = ReserveBit::NextField; // 30 + using HasBackingStoreBit = common::BitField; // 1 + using ReserveBit = HasBackingStoreBit::NextFlag; // 1 + using StartIndexBits = ReserveBit::NextField; // 30 static_assert(StartIndexBits::START_BIT + StartIndexBits::SIZE == sizeof(uint32_t) * common::BITS_PER_BYTE, "StartIndexBits does not match the field size"); + // NOLINTNEXTLINE(misc-redundant-expression) static_assert(StartIndexBits::SIZE == LengthBits::SIZE, "The size of startIndex should be same with Length"); POINTER_FIELD(Parent, PARENT_OFFSET, STARTINDEX_AND_FLAGS_OFFSET) @@ -81,8 +83,8 @@ public: * @return SlicedString pointer. */ template = 0, - objects_traits::enable_if_is_write_barrier = 0> + objects_traits::enable_if_is_allocate = 0, + objects_traits::enable_if_is_write_barrier = 0> static SlicedString *Create(Allocator &&allocator, WriteBarrier &&writeBarrier, ReadOnlyHandle parent); /** * @brief Get the start index of the sliced region. @@ -119,8 +121,8 @@ public: * @param index Index into the sliced string (not the parent). * @return UTF-16 character code unit. */ - template + template uint16_t Get(ReadBarrier &&readBarrier, int32_t index) const; }; -} -#endif //COMMON_INTERFACES_OBJECTS_STRING_SLICED_STRING_H \ No newline at end of file +} // namespace panda +#endif // COMMON_INTERFACES_OBJECTS_STRING_SLICED_STRING_H \ No newline at end of file diff --git a/common_interfaces/objects/string/tree_string-inl.h b/common_interfaces/objects/string/tree_string-inl.h index a578ebb0bf7abc323d6ffb48413159a58d08d463..2cb11b865b5e46fca0638accf9d3b1c9b26b8f4f 100644 --- a/common_interfaces/objects/string/tree_string-inl.h +++ b/common_interfaces/objects/string/tree_string-inl.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,14 +20,13 @@ #include "objects/string/tree_string.h" namespace panda { -template , +template , objects_traits::enable_if_is_write_barrier> TreeString *TreeString::Create(Allocator &&allocator, WriteBarrier &&writeBarrier, ReadOnlyHandle left, ReadOnlyHandle right, uint32_t length, bool compressed) { - auto string = TreeString::Cast( - std::invoke(std::forward(allocator), TreeString::SIZE, ObjectType::TREE_STRING)); + auto string = + TreeString::Cast(std::invoke(std::forward(allocator), TreeString::SIZE, ObjectType::TREE_STRING)); string->InitLengthAndFlags(length, compressed); string->SetRawHashcode(0); string->SetLeftSubString(std::forward(writeBarrier), left.GetBaseObject()); @@ -42,11 +41,11 @@ bool TreeString::IsFlat(ReadBarrier &&readBarrier) const return strRight->GetLength() == 0; } -template +template uint16_t TreeString::Get(ReadBarrier &&readBarrier, int32_t index) const { - int32_t length = static_cast(GetLength()); - if constexpr (verify) { + auto length = static_cast(GetLength()); + if constexpr (VERIFY) { if ((index < 0) || (index >= length)) { return 0; } @@ -54,7 +53,7 @@ uint16_t TreeString::Get(ReadBarrier &&readBarrier, int32_t index) const if (IsFlat(std::forward(readBarrier))) { BaseString *left = BaseString::Cast(GetLeftSubString(std::forward(readBarrier))); - return left->At(std::forward(readBarrier), index); + return left->At(std::forward(readBarrier), index); } const BaseString *string = this; while (true) { @@ -65,15 +64,14 @@ uint16_t TreeString::Get(ReadBarrier &&readBarrier, int32_t index) const string = left; } else { index -= static_cast(left->GetLength()); - string = BaseString::Cast( - TreeString::ConstCast(string)->GetRightSubString( - std::forward(readBarrier))); + string = BaseString::Cast(TreeString::ConstCast(string)->GetRightSubString( + std::forward(readBarrier))); } } else { - return string->At(std::forward(readBarrier), index); + return string->At(std::forward(readBarrier), index); } } UNREACHABLE_COMMON(); } -} -#endif //COMMON_INTERFACES_OBJECTS_STRING_TREE_STRING_INL_H \ No newline at end of file +} // namespace panda +#endif // COMMON_INTERFACES_OBJECTS_STRING_TREE_STRING_INL_H \ No newline at end of file diff --git a/common_interfaces/objects/string/tree_string.h b/common_interfaces/objects/string/tree_string.h index 0d9a6ea89f316e1ea16f86a5d471dbd19b8bd18c..4086c8c92622d24519feffc33caab858827d721e 100644 --- a/common_interfaces/objects/string/tree_string.h +++ b/common_interfaces/objects/string/tree_string.h @@ -42,6 +42,7 @@ namespace panda { * TreeString keeps references to both left-hand and right-hand BaseStrings and calculates * character data on demand. */ +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TreeString : public BaseString { public: BASE_CAST_CHECK(TreeString, IsTreeString); @@ -68,8 +69,8 @@ public: * @return TreeString pointer. */ template = 0, - objects_traits::enable_if_is_write_barrier = 0> + objects_traits::enable_if_is_allocate = 0, + objects_traits::enable_if_is_write_barrier = 0> static TreeString *Create(Allocator &&allocator, WriteBarrier &&writeBarrier, ReadOnlyHandle left, ReadOnlyHandle right, uint32_t length, bool compressed); @@ -93,8 +94,8 @@ public: * @param index Index in the TreeString. * @return UTF-16 code unit at specified index. */ - template + template uint16_t Get(ReadBarrier &&readBarrier, int32_t index) const; }; -} // namespace panda +} // namespace panda #endif // COMMON_INTERFACES_OBJECTS_STRING_TREE_STRING_H \ No newline at end of file diff --git a/common_interfaces/objects/utils/objects_traits.h b/common_interfaces/objects/utils/objects_traits.h index df54c6a7a01de8605fc673f86b0b4f8e31e640bf..d812156ad4f492ef7472276122a0d0dac7772c72 100644 --- a/common_interfaces/objects/utils/objects_traits.h +++ b/common_interfaces/objects/utils/objects_traits.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2025 Huawei Device Co., Ltd. + * Copyright (c) 2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -21,26 +21,25 @@ namespace panda::objects_traits { - template +// NOLINTNEXTLINE(readability-identifier-naming) constexpr bool is_heap_object_v = std::is_base_of_v>; // WriteBarrier: void (void*, size_t, U) template -constexpr bool is_write_barrier_callable_v = - std::is_invocable_r_v; +// NOLINTNEXTLINE(readability-identifier-naming) +constexpr bool is_write_barrier_callable_v = std::is_invocable_r_v; // ReadBarrier: U (void*, size_t) template -constexpr bool is_read_barrier_callable_v = - is_heap_object_v && - std::is_invocable_v && - std::is_convertible_v, U>; +// NOLINTNEXTLINE(readability-identifier-naming) +constexpr bool is_read_barrier_callable_v = is_heap_object_v &&std::is_invocable_v + &&std::is_convertible_v, U>; // Allocator: U (size_t, CommonType) template -constexpr bool is_allocate_callable_v = - is_heap_object_v && std::is_invocable_r_v; +// NOLINTNEXTLINE(readability-identifier-naming) +constexpr bool is_allocate_callable_v = is_heap_object_v &&std::is_invocable_r_v; // ---- enable_if_is_* traits ---- template @@ -50,30 +49,30 @@ template using enable_if_is_read_barrier = std::enable_if_t, int>; template +// NOLINTNEXTLINE(readability-identifier-naming) using enable_if_is_allocate = std::enable_if_t, int>; template -struct is_std_vector_of : std::false_type {}; +struct is_std_vector_of : std::false_type { // NOLINT(readability-identifier-naming) +}; template -struct is_std_vector_of, T> : std::true_type {}; +struct is_std_vector_of, T> : std::true_type { +}; template -constexpr bool is_std_vector_of_v = is_std_vector_of::value; - +constexpr bool is_std_vector_of_v = is_std_vector_of::value; // NOLINT(readability-identifier-naming) template -using get_allocator_type_t = typename std::decay_t::allocator_type; +using get_allocator_type_t = typename std::decay_t::allocator_type; // NOLINT(readability-identifier-naming) template -using rebind_alloc_t = typename std::allocator_traits::template rebind_alloc; - +using rebind_alloc_t = // NOLINT(readability-identifier-naming) + typename std::allocator_traits::template rebind_alloc; template -using vector_with_same_alloc_t = +using vector_with_same_alloc_t = // NOLINT(readability-identifier-naming) std::vector, NewT>>; +} // namespace panda::objects_traits -} // namespace panda::objects_traits - - -#endif //COMMON_INTERFACES_OBJECTS_TRAITS_H +#endif // COMMON_INTERFACES_OBJECTS_TRAITS_H diff --git a/common_interfaces/objects/utils/span.h b/common_interfaces/objects/utils/span.h index cf196918f3fdc52ddc68a40ff86f0b4d5ae6991e..8a3a7d55d408b6aa381201126eee3e4d04571997 100644 --- a/common_interfaces/objects/utils/span.h +++ b/common_interfaces/objects/utils/span.h @@ -30,7 +30,7 @@ template class Span { public: using ElementType = T; - using value_type = std::remove_cv_t; + using value_type = std::remove_cv_t; // NOLINT(readability-identifier-naming) using ValueType = value_type; using Reference = T &; using ConstReference = const T &; @@ -47,7 +47,7 @@ public: // The following constructor is non-explicit to be aligned with std::span template - constexpr Span(U (&array)[N]) : Span(array, N) + explicit constexpr Span(U (&array)[N]) : Span(array, N) // NOLINT(modernize-avoid-c-arrays) { } @@ -258,6 +258,6 @@ Span AsWritableBytes(Span s) noexcept return {reinterpret_cast(s.Data()), s.SizeBytes()}; } -} // namespace panda +} // namespace panda::common #endif // COMMON_INTERFACES_OBJECTS_UTILS_SPAN_H diff --git a/common_interfaces/objects/utils/utf_utils.h b/common_interfaces/objects/utils/utf_utils.h index 6fa4c84175e95b01cf31364de383ef7faf6026dd..25065ee79d9578b7bd362b8119d46c8fc1840a5c 100644 --- a/common_interfaces/objects/utils/utf_utils.h +++ b/common_interfaces/objects/utils/utf_utils.h @@ -19,8 +19,9 @@ #include namespace panda { +// NOLINTNEXTLINE(readability-identifier-naming, modernize-avoid-c-arrays) static constexpr unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; -class utf_utils { +class utf_utils { // NOLINT(readability-identifier-naming) public: static constexpr uint8_t UTF8_1B_MAX = 0x7f; static constexpr uint16_t UTF8_2B_MAX = 0x7ff; @@ -37,8 +38,8 @@ public: static constexpr uint16_t DECODE_TRAIL_LOW = 0xDC00; static constexpr uint16_t DECODE_TRAIL_HIGH = 0xDFFF; static constexpr uint32_t DECODE_SECOND_FACTOR = 0x10000; - static constexpr uint8_t byteMask = 0xbf; - static constexpr uint8_t byteMark = 0x80; + static constexpr uint8_t byteMask = 0xbf; // NOLINT(readability-identifier-naming) + static constexpr uint8_t byteMark = 0x80; // NOLINT(readability-identifier-naming) static constexpr size_t HI_SURROGATE_MIN = 0xD800; static constexpr size_t HI_SURROGATE_MAX = 0xDBFF; static constexpr size_t LO_SURROGATE_MIN = 0xDC00; @@ -66,14 +67,17 @@ public: static size_t FixUtf8Len(const uint8_t *utf8, size_t utf8Len) { size_t trimSize = 0; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) { // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one. trimSize = 1; } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) if (utf8Len >= CONST_2 && utf8[utf8Len - CONST_2] >= 0xE0) { // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two. trimSize = CONST_2; } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) if (utf8Len >= CONST_3 && utf8[utf8Len - CONST_3] >= 0xF0) { // The third to last char claim there are more than 3 bytes next to it, it's invalid, // so drop the last three. @@ -82,13 +86,15 @@ public: return utf8Len - trimSize; } - static size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len) + static size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len) { + // NOLINTBEGIN(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) size_t safeUtf8Len = FixUtf8Len(utf8, utf8Len); size_t inPos = 0; size_t res = 0; while (inPos < safeUtf8Len) { uint8_t src = utf8[inPos]; + // NOLINTNEXTLINE(hicpp-signed-bitwise) switch (src & 0xF0) { case 0xF0: { const uint8_t c2 = utf8[++inPos]; @@ -126,38 +132,36 @@ public: // The remain chars should be treated as single byte char. res += utf8Len - inPos; return res; + // NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) } static size_t Utf16ToUtf8Size(const uint16_t *utf16, uint32_t length, bool modify = true, bool isGetBufferSize = false, bool cesu8 = false) { - size_t res = 1; // zero byte + // NOLINTBEGIN(cppcoreguidelines-pro-bounds-pointer-arithmetic) + size_t res = 1; // zero byte // when utf16 data length is only 1 and code in 0xd800-0xdfff, // means that is a single code point, it needs to be represented by three UTF8 code. - if (length == 1 && utf16[0] >= HI_SURROGATE_MIN && // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - utf16[0] <= LO_SURROGATE_MAX) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (length == 1 && utf16[0] >= HI_SURROGATE_MIN && utf16[0] <= LO_SURROGATE_MAX) { res += UtfLength::THREE; return res; } for (uint32_t i = 0; i < length; ++i) { - if (utf16[i] == 0) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (utf16[i] == 0) { if (isGetBufferSize) { res += UtfLength::ONE; } else if (modify) { - res += UtfLength::TWO; // special case for U+0000 => C0 80 + res += UtfLength::TWO; // special case for U+0000 => C0 80 } - } else if (utf16[i] <= UTF8_1B_MAX) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } else if (utf16[i] <= UTF8_1B_MAX) { res += 1; - } else if (utf16[i] <= UTF8_2B_MAX) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } else if (utf16[i] <= UTF8_2B_MAX) { res += UtfLength::TWO; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) } else if (utf16[i] < HI_SURROGATE_MIN || utf16[i] > HI_SURROGATE_MAX) { res += UtfLength::THREE; } else { - if (!cesu8 && i < length - 1 && - utf16[i + 1] >= LO_SURROGATE_MIN && // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - utf16[i + 1] <= LO_SURROGATE_MAX) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (!cesu8 && i < length - 1 && utf16[i + 1] >= LO_SURROGATE_MIN && utf16[i + 1] <= LO_SURROGATE_MAX) { res += UtfLength::FOUR; ++i; } else { @@ -166,16 +170,19 @@ public: } } return res; + // NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic) } // CC-OFFNXT(huge_depth, huge_method, G.FUN.01-CPP) solid logic static size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf8Len, size_t utf16Len) { + // NOLINTBEGIN(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) size_t safeUtf8Len = FixUtf8Len(utf8In, utf8Len); size_t inPos = 0; size_t outPos = 0; while (inPos < safeUtf8Len && outPos < utf16Len) { uint8_t src = utf8In[inPos]; + // NOLINTNEXTLINE(hicpp-signed-bitwise) switch (src & 0xF0) { case 0xF0: { const uint8_t c2 = utf8In[++inPos]; @@ -191,6 +198,7 @@ public: } codePoint -= SURROGATE_RAIR_START; utf16Out[outPos++] = static_cast((codePoint >> OFFSET_10POS) | H_SURROGATE_START); + // NOLINTNEXTLINE(hicpp-signed-bitwise) utf16Out[outPos++] = static_cast((codePoint & 0x3FF) | L_SURROGATE_START); } else { utf16Out[outPos++] = static_cast(codePoint); @@ -202,7 +210,7 @@ public: const uint8_t c2 = utf8In[++inPos]; const uint8_t c3 = utf8In[++inPos]; utf16Out[outPos++] = static_cast(((src & LOW_4BITS) << OFFSET_12POS) | - ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS)); + ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS)); inPos++; break; } @@ -225,13 +233,14 @@ public: utf16Out[outPos++] = static_cast(utf8In[inPos++]); } return outPos; + // NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic, readability-magic-numbers) } static size_t ConvertRegionUtf16ToLatin1(const uint16_t *utf16In, uint8_t *latin1Out, size_t utf16Len, size_t latin1Len); - static size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, - size_t utf8Len, size_t start, bool modify = true, bool isWriteBuffer = false, + static size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_t utf16Len, size_t utf8Len, + size_t start, bool modify = true, bool isWriteBuffer = false, bool cesu8 = false) { if (utf16In == nullptr || utf8Out == nullptr || utf8Len == 0) { @@ -243,13 +252,14 @@ public: uint32_t codepoint = DecodeUTF16(utf16In, end, &i, cesu8); if (codepoint == 0) { if (isWriteBuffer) { - utf8Out[utf8Pos++] = 0x00U; + // NOLINTNEXTLINE(readability-magic-numbers) + utf8Out[utf8Pos++] = 0x00U; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) continue; } if (modify) { // special case for \u0000 ==> C080 - 1100'0000 1000'0000 - utf8Out[utf8Pos++] = UTF8_2B_FIRST; - utf8Out[utf8Pos++] = UTF8_2B_SECOND; + utf8Out[utf8Pos++] = UTF8_2B_FIRST; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + utf8Out[utf8Pos++] = UTF8_2B_SECOND; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } continue; } @@ -264,15 +274,16 @@ public: // Methods for decode utf16 to unicode static uint32_t DecodeUTF16(uint16_t const *utf16, size_t len, size_t *index, bool cesu8 = false) { - uint16_t high = utf16[*index]; + uint16_t high = utf16[*index]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) if ((high & SURROGATE_MASK) != DECODE_LEAD_LOW || !IsUTF16HighSurrogate(high) || *index == len - 1) { return high; } - uint16_t low = utf16[*index + 1]; + uint16_t low = utf16[*index + 1]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (!IsUTF16LowSurrogate(low) || cesu8) { return high; } (*index)++; + // NOLINTNEXTLINE(hicpp-signed-bitwise) return ((high - DECODE_LEAD_LOW) << UTF16_OFFSET) + (low - DECODE_TRAIL_LOW) + DECODE_SECOND_FACTOR; } static size_t UTF8Length(uint32_t codepoint) @@ -292,10 +303,10 @@ public: { for (size_t j = size - 1; j > 0; j--) { uint8_t cont = ((codepoint | byteMark) & byteMask); - utf8[index + j] = cont; + utf8[index + j] = cont; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) codepoint >>= UTF8_OFFSET; } - utf8[index] = codepoint | firstByteMark[size]; + utf8[index] = codepoint | firstByteMark[size]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) return size; } static bool IsUTF16HighSurrogate(uint16_t ch) @@ -306,8 +317,9 @@ public: { return DECODE_TRAIL_LOW <= ch && ch <= DECODE_TRAIL_HIGH; } + private: static uint32_t HandleAndDecodeInvalidUTF16(uint16_t const *utf16, size_t len, size_t *index); }; -} // namespace panda +} // namespace panda #endif // COMMON_INTERFACES_OBJECTS_UTILS_UTF_H \ No newline at end of file diff --git a/static_core/BUILD.gn b/static_core/BUILD.gn index 525977d2ecb828abf9d3759686e85d639a05483e..fdc35c98f8c3622a3bb0ddf9d3b37d83aae079fc 100644 --- a/static_core/BUILD.gn +++ b/static_core/BUILD.gn @@ -330,6 +330,8 @@ config("ark_config") { if (ark_use_cmc_gc) { defines += [ "ARK_USE_CMC_GC" ] + } else { + defines += [ "BASE_CLASS_32BITS" ] } if (current_cpu == "arm") { diff --git a/static_core/CMakeLists.txt b/static_core/CMakeLists.txt index 7c99057f299d19cbafc4e36c64155d37ccc0871f..324f817bd88af57a2839164a80509754ef23ee82 100644 --- a/static_core/CMakeLists.txt +++ b/static_core/CMakeLists.txt @@ -46,6 +46,7 @@ set(PANDA_THIRD_PARTY_SOURCES_DIR ${PANDA_ROOT}/third_party) set(PANDA_THIRD_PARTY_CONFIG_DIR ${PANDA_ROOT}/cmake/third_party) add_definitions(-DBUILD_FOLDER="${CMAKE_CURRENT_BINARY_DIR}") +add_definitions(-DBASE_CLASS_32BITS) # List for accumulation of all core gtests binary paths. # It's used by CI to archive these binaries into a single artifact diff --git a/static_core/compiler/optimizer/code_generator/CMakeLists.txt b/static_core/compiler/optimizer/code_generator/CMakeLists.txt index d6a3415b7097294b97515bc5c210a67f049eb2e6..fb5e2e468ee8e780e9556a71a751d914648c8f81 100644 --- a/static_core/compiler/optimizer/code_generator/CMakeLists.txt +++ b/static_core/compiler/optimizer/code_generator/CMakeLists.txt @@ -18,6 +18,7 @@ include(${PANDA_ROOT}/compiler/cmake/target.cmake) include_directories( . ${PANDA_ROOT} + ${PANDA_ROOT}/../common_interfaces ) set(GENERATED_DIR ${PANDA_BINARY_ROOT}/compiler/generated) diff --git a/static_core/compiler/optimizer/ir_builder/inst_builder-inl.h b/static_core/compiler/optimizer/ir_builder/inst_builder-inl.h index 30d66cb850adc16345c7b6b83af011eb9ba9c05e..24ddaff959cedae9d3b45405b1839e8c665a97e5 100644 --- a/static_core/compiler/optimizer/ir_builder/inst_builder-inl.h +++ b/static_core/compiler/optimizer/ir_builder/inst_builder-inl.h @@ -373,8 +373,9 @@ void InstBuilder::BuildStringLengthIntrinsic(const BytecodeInstruction *bcInst, Inst *stringLength; if (graph_->GetRuntime()->IsCompressedStringsEnabled()) { - auto constOneInst = graph_->FindOrCreateConstant(1); - stringLength = graph_->CreateInstShr(DataType::INT32, bcAddr, arrayLength, constOneInst); + uint64_t twoShift = 2; + auto constTwoInst = graph_->FindOrCreateConstant(twoShift); + stringLength = graph_->CreateInstShr(DataType::INT32, bcAddr, arrayLength, constTwoInst); AddInstruction(stringLength); } else { stringLength = arrayLength; @@ -1599,8 +1600,9 @@ bool InstBuilder::TryBuildStringCharAtIntrinsic(const BytecodeInstruction *bcIns Inst *stringLength = nullptr; if (compressionEnabled) { - auto constOneInst = graph_->FindOrCreateConstant(1); - stringLength = graph_->CreateInstShr(DataType::INT32, bcAddr, arrayLength, constOneInst); + uint64_t twoShift = 2; + auto constTwoInst = graph_->FindOrCreateConstant(twoShift); + stringLength = graph_->CreateInstShr(DataType::INT32, bcAddr, arrayLength, constTwoInst); AddInstruction(stringLength); } else { stringLength = arrayLength; diff --git a/static_core/compiler/optimizer/optimizations/reserve_string_builder_buffer.cpp b/static_core/compiler/optimizer/optimizations/reserve_string_builder_buffer.cpp index e712dacafd4fa9890824b2429a4c1da544eafeed..1a2daf612102b94b1490cc35f078ec3653533407 100644 --- a/static_core/compiler/optimizer/optimizations/reserve_string_builder_buffer.cpp +++ b/static_core/compiler/optimizer/optimizations/reserve_string_builder_buffer.cpp @@ -258,7 +258,8 @@ Inst *CreateStringBuilderConstructorArgumentLength(Graph *graph, Inst *arg, Inst auto argLength = graph->CreateInstShr(DataType::INT32, ctorCall->GetPc()); argLength->SetInput(ARG_IDX_0, lenArray); - argLength->SetInput(ARG_IDX_1, graph->FindOrCreateConstant(1)); + uint64_t twoShift = 2; + argLength->SetInput(ARG_IDX_1, graph->FindOrCreateConstant(twoShift)); InsertBeforeWithSaveState(argLength, ctorCall->GetSaveState()); return argLength; diff --git a/static_core/irtoc/scripts/common.irt b/static_core/irtoc/scripts/common.irt index 35060879fccadc3af8a6667463a120c058eed70c..151157b7a4597f82dcc888dd6072566ab1e77151 100644 --- a/static_core/irtoc/scripts/common.irt +++ b/static_core/irtoc/scripts/common.irt @@ -204,6 +204,7 @@ module Constants STRING_MUTF8_1B_MAX = "0x7f" STRING_MUTF8_1B_MIN = "0x01" STRING_LENGTH_OFFSET = "ark::coretypes::STRING_LENGTH_OFFSET" + STRING_LENGTH_SHIFT = "2" STRING_HASHCODE_OFFSET = "ark::coretypes::STRING_HASHCODE_OFFSET" STRING_DATA_OFFSET = "ark::coretypes::STRING_DATA_OFFSET" ALIGNMENT_MASK = "~(TLAB_ALIGNMENT - 1)" diff --git a/static_core/irtoc/scripts/string_builder.irt b/static_core/irtoc/scripts/string_builder.irt index 2d3dcb25381943b7084b2fb65428cd6ed67ea0dc..8ea69375bd534fb1017ec41eac503bc0ac394deb 100644 --- a/static_core/irtoc/scripts/string_builder.irt +++ b/static_core/irtoc/scripts/string_builder.irt @@ -140,7 +140,7 @@ def GenerateStringBuilderString(compression) if compression utf16 := AndI(length).Imm(1).u32 - length := ShrI(length).Imm(1).u32 + length := ShrI(length).Imm(Constants::STRING_LENGTH_SHIFT).u32 end # do not do compressed string unpacking diff --git a/static_core/irtoc/scripts/string_helpers.irt b/static_core/irtoc/scripts/string_helpers.irt index ae93a2ffef407672900ce5e87a7f6fa29782bd8a..403efaa162031e447d1d2e02ec85d1fb36049a07 100644 --- a/static_core/irtoc/scripts/string_helpers.irt +++ b/static_core/irtoc/scripts/string_helpers.irt @@ -30,7 +30,7 @@ end def GenerateStringEquals(lang, dynamic, compression) suffix = (compression ? "Compressed" : "") - length_shift = (dynamic ? 2 : 1) + length_shift = 2 mode = [:FastPath] mode.push(:DynamicMethod, :DynamicStub) if dynamic @@ -359,7 +359,7 @@ def GenerateCreateStringFromStringTlab(string_compression_enabled) klass := load_class(str) length := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 hashcode := LoadI(str).Imm(Constants::STRING_HASHCODE_OFFSET).u32 - data_size := unpack_length(Cast(length).u64, string_compression_enabled, 1) + data_size := unpack_length(Cast(length).u64, string_compression_enabled, Constants::STRING_LENGTH_SHIFT) new_str := allocate_string_tlab(klass, Cast(data_size).word) StoreI(new_str, length).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -770,10 +770,10 @@ def GenerateCreateStringFromCharArrayTlab(string_compression_enabled) if string_compression_enabled If(compressable, 1).EQ.Likely.b { compress_u16_to_u8_chars(arr_data, str_data, Cast(char_count).u64) - StoreI(new_str, ShlI(char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } Else { copy_u16_chars(arr_data, str_data, Cast(char_count).u64) - StoreI(new_str, OrI(ShlI(char_count).Imm(1).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, OrI(ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } else copy_u16_chars(arr_data, str_data, Cast(char_count).u64) @@ -830,10 +830,10 @@ def GenerateCreateStringFromZeroBasedCharArrayTlab(string_compression_enabled) if string_compression_enabled If(compressable, 1).EQ.Likely.b { compress_u16_to_u8_chars(arr_data, str_data, Cast(char_count).u64) - StoreI(new_str, ShlI(char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } Else { copy_u16_chars(arr_data, str_data, Cast(char_count).u64) - StoreI(new_str, OrI(ShlI(char_count).Imm(1).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, OrI(ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } else copy_u16_chars(arr_data, str_data, Cast(char_count).u64) @@ -869,7 +869,7 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) # Note, 'str' is checked against nullptr in the InstBuilder (see AddArgNullcheckIfNeeded) length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 if string_compression_enabled - length := ShrI(length_packed).Imm(1).u32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).u32 else length := length_packed end @@ -935,10 +935,10 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) new_str_data := Add(new_str, Cast(Constants::STRING_DATA_OFFSET).u64).ptr If(compressable, 1).EQ.Likely.b { compress_u16_to_u8_chars(src_str_data, new_str_data, Cast(char_count).u64) - StoreI(new_str, ShlI(char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } Else { copy_u16_chars(src_str_data, new_str_data, Cast(char_count).u64) - StoreI(new_str, OrI(ShlI(char_count).Imm(1).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, OrI(ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } # String is supposed to be a constant object, so all its data should be visible by all threads Intrinsic(:DATA_MEMORY_BARRIER_FULL).void @@ -948,7 +948,7 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) new_str := allocate_string_tlab(klass, Cast(char_count).word) new_str_data := Add(new_str, Cast(Constants::STRING_DATA_OFFSET).u64).ptr copy_u8_chars(src_str_data, new_str_data, Cast(char_count).u64) - StoreI(new_str, ShlI(char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, ShlI(char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 # String is supposed to be a constant object, so all its data should be visible by all threads Intrinsic(:DATA_MEMORY_BARRIER_FULL).void Return(new_str).ptr @@ -997,7 +997,7 @@ def GenerateStringGetCharsTlab(string_compression_enabled) # Note, 'str' is checked against nullptr in the InstBuilder (see AddArgNullcheckIfNeeded) length := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32; if string_compression_enabled - If(Cast(end_index).u32, ShrI(length).Imm(1).u32).A.Unlikely.b { + If(Cast(end_index).u32, ShrI(length).Imm(Constants::STRING_LENGTH_SHIFT).u32).A.Unlikely.b { Goto(:SlowPathEntrypoint) # Out of range } not_compressed := AndI(length).Imm(1).u32 @@ -1102,7 +1102,7 @@ end str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 if string_compression_enabled - length := ShrI(length_packed).Imm(1).u32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).u32 not_compressed := AndI(length_packed).Imm(1).u32 If(not_compressed, 0).EQ.Likely.b { # String contains 8-bit chars diff --git a/static_core/irtoc/scripts/strings.irt b/static_core/irtoc/scripts/strings.irt index 269bfbf331ed63c79a77828dff70a1e6afd245bd..10670793183515ab5ae2471df2f43621c17c3772 100644 --- a/static_core/irtoc/scripts/strings.irt +++ b/static_core/irtoc/scripts/strings.irt @@ -50,12 +50,12 @@ function(:StringConcat2Tlab, # any of the strings is uncompressed (resulted string is uncompressed) has_uncompressed := AndI(Or(length1, length2).u32).Imm(1).u32 - count1 := ShrI(length1).Imm(1).u32 - count2 := ShrI(length2).Imm(1).u32 + count1 := ShrI(length1).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count2 := ShrI(length2).Imm(Constants::STRING_LENGTH_SHIFT).u32 size := Add(count1, count2).u32 data_size := Shl(size, has_uncompressed).u32 - length := Or(ShlI(size).Imm(1).u32, has_uncompressed).u32 + length := Or(ShlI(size).Imm(Constants::STRING_LENGTH_SHIFT).u32, has_uncompressed).u32 new_str := allocate_string_tlab(klass, data_size) StoreI(new_str, length).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -123,13 +123,13 @@ function(:StringConcat3Tlab, has_uncompressed := AndI(Or(length3, Or(length1, length2).u32).u32).Imm(1).u32 - count1 := ShrI(length1).Imm(1).u32 - count2 := ShrI(length2).Imm(1).u32 - count3 := ShrI(length3).Imm(1).u32 + count1 := ShrI(length1).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count2 := ShrI(length2).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count3 := ShrI(length3).Imm(Constants::STRING_LENGTH_SHIFT).u32 size := Add(count1, Add(count2, count3).u32).u32 data_size := Shl(size, has_uncompressed).u32 - length := Or(ShlI(size).Imm(1).u32, has_uncompressed).u32 + length := Or(ShlI(size).Imm(Constants::STRING_LENGTH_SHIFT).u32, has_uncompressed).u32 new_str := allocate_string_tlab(klass, data_size) StoreI(new_str, length).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -215,14 +215,14 @@ function(:StringConcat4Tlab, has_uncompressed := Or(length3, Or(length1, length2).u32).u32 has_uncompressed := AndI(Or(length4, has_uncompressed).u32).Imm(1).u32 - count1 := ShrI(length1).Imm(1).u32 - count2 := ShrI(length2).Imm(1).u32 - count3 := ShrI(length3).Imm(1).u32 - count4 := ShrI(length4).Imm(1).u32 + count1 := ShrI(length1).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count2 := ShrI(length2).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count3 := ShrI(length3).Imm(Constants::STRING_LENGTH_SHIFT).u32 + count4 := ShrI(length4).Imm(Constants::STRING_LENGTH_SHIFT).u32 size := Add(count1, Add(count2, Add(count3, count4).u32).u32).u32 data_size := Shl(size, has_uncompressed).u32 - length := Or(ShlI(size).Imm(1).u32, has_uncompressed).u32 + length := Or(ShlI(size).Imm(Constants::STRING_LENGTH_SHIFT).u32, has_uncompressed).u32 new_str := allocate_string_tlab(klass, data_size) StoreI(new_str, length).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -333,7 +333,7 @@ function(:StringCompareTo, buf2 := AddI(str2).Imm(Constants::STRING_DATA_OFFSET).ptr # get the least length in chars - length := ShrI(Cast(Min(length1, length2).i32).u64).Imm(1).u64 + length := ShrI(Cast(Min(length1, length2).i32).u64).Imm(Constants::STRING_LENGTH_SHIFT).u64 utf_1 := AndI(length1).Imm(1).u32 utf_2 := AndI(length2).Imm(1).u32 diff --git a/static_core/plugins/ets/compiler/CMakeLists.txt b/static_core/plugins/ets/compiler/CMakeLists.txt index 91ee93f8526b8e035fba60f68bc69534b323eb66..9847147bb0f4421a204083b47a1182d65b6680c6 100644 --- a/static_core/plugins/ets/compiler/CMakeLists.txt +++ b/static_core/plugins/ets/compiler/CMakeLists.txt @@ -55,6 +55,7 @@ panda_target_sources(arkcompiler PRIVATE ${COMPILER_SOURCES}) panda_target_include_directories(arkcompiler PUBLIC ${GENERATED_DIR} + ${PANDA_ROOT}/../common_interfaces ) add_inst_templates(${CMAKE_CURRENT_SOURCE_DIR}/optimizer/ir_builder/ets_inst_templates.yaml) diff --git a/static_core/plugins/ets/compiler/codegen_intrinsics_ets.cpp b/static_core/plugins/ets/compiler/codegen_intrinsics_ets.cpp index 198446c6eff2eb465c667d17410f8f89ffd740f4..ebda1a8cd24bcff2d5b45a9cef53a852bccfab3e 100644 --- a/static_core/plugins/ets/compiler/codegen_intrinsics_ets.cpp +++ b/static_core/plugins/ets/compiler/codegen_intrinsics_ets.cpp @@ -229,7 +229,8 @@ static void EncodeSbAppendString(Codegen *cg, IntrinsicInst *inst, const SbAppen enc->EncodeAdd(reg2, reg2, Imm(1)); enc->EncodeStr(reg2, args.SbIndexAddr()); // Unpack length of string - enc->EncodeShr(reg1, reg1, Imm(1)); + int64_t twoShift = 2; + enc->EncodeShr(reg1, reg1, Imm(twoShift)); // Add length of string to the current length of StringBuilder enc->EncodeLdr(reg2, false, args.SbLengthAddr()); enc->EncodeAdd(reg2, reg2, reg1); diff --git a/static_core/plugins/ets/compiler/ir_build_intrinsics_ets.cpp b/static_core/plugins/ets/compiler/ir_build_intrinsics_ets.cpp index 2e3ebe38c7de66010153ae9e799b5c3afc147622..96f8d20071e1682400bb35704d6663e9436147f9 100644 --- a/static_core/plugins/ets/compiler/ir_build_intrinsics_ets.cpp +++ b/static_core/plugins/ets/compiler/ir_build_intrinsics_ets.cpp @@ -449,9 +449,11 @@ void InstBuilder::BuildStringSizeInBytes(const BytecodeInstruction *bcInst, bool auto graph = GetGraph(); auto offset = FindOrCreateConstant(runtime->GetStringLengthOffset(graph->GetArch())); auto one = FindOrCreateConstant(1U); + uint64_t twoShift = 2; + auto two = FindOrCreateConstant(twoShift); auto len = graph->CreateInstLoadNative(DataType::INT32, bcAddr, str, offset); - auto size = graph->CreateInstShr(DataType::INT32, bcAddr, len, one); + auto size = graph->CreateInstShr(DataType::INT32, bcAddr, len, two); auto shift = graph->CreateInstAnd(DataType::INT32, bcAddr, len, one); auto add = graph->CreateInstAdd(DataType::INT32, bcAddr, size, shift); auto result = graph->CreateInstShl(DataType::INT32, bcAddr, add, shift); diff --git a/static_core/plugins/ets/compiler/optimizer/ets_intrinsics_peephole.cpp b/static_core/plugins/ets/compiler/optimizer/ets_intrinsics_peephole.cpp index 0600985f10e1670a724fe9c6f32e5d3f54dc46c6..0466f8b7445cf4a52943585870581fae5536d509 100644 --- a/static_core/plugins/ets/compiler/optimizer/ets_intrinsics_peephole.cpp +++ b/static_core/plugins/ets/compiler/optimizer/ets_intrinsics_peephole.cpp @@ -108,7 +108,8 @@ Inst *GetStringFromLength(Inst *inst) return nullptr; } auto input1 = inst->GetInput(1).GetInst(); - if (!input1->IsConst() || input1->CastToConstant()->GetRawValue() != 1) { + uint64_t twoShift = 2; + if (!input1->IsConst() || input1->CastToConstant()->GetRawValue() != twoShift) { return nullptr; } lenArray = inst->GetInput(0).GetInst(); diff --git a/static_core/plugins/ets/irtoc_scripts/string.irt b/static_core/plugins/ets/irtoc_scripts/string.irt index 664527fc90ebc9693b1a179db97245fb0af20c87..96bd72101223c85a22f69048e45675b1921e7997 100644 --- a/static_core/plugins/ets/irtoc_scripts/string.irt +++ b/static_core/plugins/ets/irtoc_scripts/string.irt @@ -69,18 +69,18 @@ Label(:_L1) _new_str_data := Add(_new_str1, Cast(Constants::STRING_DATA_OFFSET).u64).ptr If(_compressable, 1).EQ.Likely.b { compress_u16_to_u8_chars(_src_str_data, _new_str_data, Cast(_char_count).u64) - StoreI(_new_str1, ShlI(_char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(_new_str1, ShlI(_char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 Goto(:_Fast_Substring_Result) } copy_u16_chars(_src_str_data, _new_str_data, Cast(_char_count).u64) - StoreI(_new_str1, OrI(ShlI(_char_count).Imm(1).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(_new_str1, OrI(ShlI(_char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 Goto(:_Fast_Substring_Result) } # Source string is already compressed _new_str2 := allocate_string_tlab(_klass, Cast(_char_count).word) _new_str_data2 := Add(_new_str2, Cast(Constants::STRING_DATA_OFFSET).u64).ptr copy_u8_chars(_src_str_data, _new_str_data2, Cast(_char_count).u64) - StoreI(_new_str2, ShlI(_char_count).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(_new_str2, ShlI(_char_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 Label(:_Fast_Substring_Result) # String is supposed to be a constant object, so all its data should be visible by all threads Intrinsic(:DATA_MEMORY_BARRIER_FULL).void @@ -273,7 +273,7 @@ function(:StringTrimLeftBase, length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr not_compressed := AndI(length_packed).Imm(1).i32 - length := ShrI(length_packed).Imm(1).i32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 start_index := Cast(1).i32 If(not_compressed, 0).EQ.Likely.b { # String contains 8-bit chars @@ -339,7 +339,7 @@ Label(:L1) If(ws, 0).EQ.Likely.b { Return(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr).ptr } - If(ShrI(length_packed).Imm(1).i32, 1).EQ.Unlikely.b { + If(ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32, 1).EQ.Unlikely.b { LiveOut(str).DstReg(regmap[:arg0]).ref entrypoint1 = get_entrypoint_offset("STRING_EMPTY") Intrinsic(:TAIL_CALL).AddImm(entrypoint1).MethodAsImm("StringEmpty").Terminator.ptr @@ -365,7 +365,7 @@ function(:StringTrimRightBase, end length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 - length := ShrI(length_packed).Imm(1).i32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 start_index := SubI(length).Imm(2).i32 str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr not_compressed := AndI(length_packed).Imm(1).i32 @@ -424,7 +424,7 @@ function(:StringTrimRight, } str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr not_compressed := AndI(length_packed).Imm(1).i32 - length := ShrI(length_packed).Imm(1).i32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 last_char_index := SubI(length).Imm(1).i32 If(not_compressed, 0).EQ.Likely.b { ws1 := is_white_space_u8(Load(str_data, last_char_index).u8) @@ -462,7 +462,7 @@ function(:StringTrimBase, end length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 - length := ShrI(length_packed).Imm(1).i32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 left := 0 right := SubI(length).Imm(2).i32 str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr @@ -539,7 +539,7 @@ function(:StringTrim, } str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr not_compressed := AndI(length_packed).Imm(1).i32 - length := ShrI(length_packed).Imm(1).i32 + length := ShrI(length_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 # length == 1 If(length, 1).EQ.b { If(not_compressed, 0).EQ.Likely.b { @@ -624,7 +624,7 @@ function(:StringStartsWithBase, str_not_compressed := AndI(str_len_packed).Imm(1).i32 str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr pfx_len_packed := LoadI(pfx).Imm(Constants::STRING_LENGTH_OFFSET).u32 - pfx_len := ShrI(pfx_len_packed).Imm(1).i32 + pfx_len := ShrI(pfx_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 pfx_not_compressed := AndI(pfx_len_packed).Imm(1).i32 pfx_data := Add(Cast(pfx).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr @@ -680,8 +680,8 @@ function(:StringStartsWith, } from_index2 := Phi(from_index, from_index1).i32 - str_len := ShrI(str_len_packed).Imm(1).i32 - pfx_len := ShrI(pfx_len_packed).Imm(1).i32 + str_len := ShrI(str_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 + pfx_len := ShrI(pfx_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 If(from_index2, Sub(str_len, pfx_len).i32).GT.Unlikely.b { # Return 'false' in this case, as we know that 'pfx' is not empty @@ -715,7 +715,7 @@ function(:StringEndsWithBase, sfx_len_packed := LoadI(sfx).Imm(Constants::STRING_LENGTH_OFFSET).u32 sfx_not_compressed := AndI(sfx_len_packed).Imm(1).i32 sfx_data := Add(Cast(sfx).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr - sfx_len := ShrI(sfx_len_packed).Imm(1).i32 + sfx_len := ShrI(sfx_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 from_index := Sub(end_index, sfx_len).i32; sfx_i1 := 0 @@ -769,14 +769,14 @@ function(:StringEndsWith, Return(0).b } - str_len := ShrI(str_len_packed).Imm(1).i32 + str_len := ShrI(str_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 # If 'end_index' is greater than length of 'str' make it equal to length of 'str'. If(end_index, str_len).GT.Unlikely.b { end_index1 := str_len } end_index2 := Phi(end_index, end_index1).i32 - sfx_len := ShrI(sfx_len_packed).Imm(1).i32 + sfx_len := ShrI(sfx_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 from_index := Sub(end_index2, sfx_len).i32; IfImm(Compare(from_index, 0).LT.b).Imm(0).NE.Unlikely.b { # Return 'false' in this case, as 'sfx' length is greater than 'end_index'. @@ -813,7 +813,7 @@ function(:StringGetBytesTlab, # Note, 'str' is checked against nullptr in the InstBuilder (see AddArgNullcheckIfNeeded) length := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32; uncompressed := AndI(length).Imm(1).u32; - length := ShrI(length).Imm(1).u32; + length := ShrI(length).Imm(Constants::STRING_LENGTH_SHIFT).u32; If(Cast(end_index).u32, length).A.Unlikely.b { Goto(:SlowPathEntrypoint) # Out of range @@ -1042,10 +1042,10 @@ def GenerateCreateStringFromCharCodeTlab(string_compression_enabled) if string_compression_enabled If(compressible, 1).EQ.Likely { convert_char_codes_to_u8_chars(codes_data, str_data, Cast(codes_count).u64) - StoreI(new_str, ShlI(codes_count).Imm(Constants::LOG2_BYTES_PER_U16).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, ShlI(codes_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } Else { convert_char_codes_to_u16_chars(codes_data, str_data, Cast(codes_count).u64) - StoreI(new_str, OrI(ShlI(codes_count).Imm(Constants::LOG2_BYTES_PER_U16).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, OrI(ShlI(codes_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } else convert_char_codes_to_u16_chars(codes_data, str_data, Cast(codes_count).u64) @@ -1116,10 +1116,10 @@ def GenerateCreateStringFromCharCodeSingleTlab(string_compression_enabled) if string_compression_enabled If(compressible, 1).EQ.Likely { Store(str_data, Cast(0).u64, char).u8 - StoreI(new_str, Cast(2).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, Cast(4).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } Else { Store(str_data, Cast(0).u64, char).u16 - StoreI(new_str, OrI(Cast(2).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + StoreI(new_str, OrI(Cast(4).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 } else Store(str_data, Cast(0).u64, char).u16 @@ -1431,7 +1431,7 @@ function(:StringIndexOf, str_data := Add(Cast(str).SrcType(Constants::COMPILER_REFERENCE).ptr, Cast(Constants::STRING_DATA_OFFSET).word).ptr IfImm(Compare(AndI(str_len_packed).Imm(1).i32, 1).EQ.b).Imm(0).NE.Unlikely.b { - str_data_size_u16 := AndI(str_len_packed).Imm(0xFFFFFFFE).u32 + str_data_size_u16 := AndI(str_len_packed).Imm(0xFFFFFFFC).u32 # 0 < data size < 16 If(str_data_size_u16, 16).LT.Unlikely.b { LiveOut(str_data).DstReg(regmap[:arg0]).ptr @@ -1460,7 +1460,7 @@ function(:StringIndexOf, Return(-1).i32 } - str_data_size_u8 := ShrI(str_len_packed).Imm(1).u32 + str_data_size_u8 := ShrI(str_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).u32 # 0 < data size < 8 If(str_data_size_u8, 8).LT.Unlikely.b { LiveOut(str_data).DstReg(regmap[:arg0]).ptr @@ -1518,7 +1518,7 @@ function(:StringIndexOfAfter, Return(-1).i32 } - str_len := ShrI(str_len_packed).Imm(1).i32 + str_len := ShrI(str_len_packed).Imm(Constants::STRING_LENGTH_SHIFT).i32 # Return '-1' if 'start_index' is out of range If(start_index, str_len).GE.Unlikely.b { @@ -1625,8 +1625,10 @@ function(:StringRepeatTlab, klass := load_class(str) old_buf := AddI(str).Imm(Constants::STRING_DATA_OFFSET).ptr uncompressed := AndI(length).Imm(1).u32 - length := Shl(ShrI(length).Imm(1).u32, uncompressed).u32 + codes_count := ShrI(length).Imm(Constants::STRING_LENGTH_SHIFT).u32 + length := Shl(ShrI(length).Imm(Constants::STRING_LENGTH_SHIFT).u32, uncompressed).u32 size := Mul(length, count).u32 + codes_count := Mul(codes_count, count).u32 new_str := allocate_string_tlab(klass, Cast(size).u64); new_buf := AddI(new_str).Imm(Constants::STRING_DATA_OFFSET).ptr @@ -1656,11 +1658,11 @@ Label(:outer) Goto(:outer) Label(:End) - # shift left if it was a compressed string - # mark the (unset) lowest bit with uncompressed - compress := Neg(SubI(uncompressed).Imm(1).u32).u32 - length := Or(Shl(size, compress).u32, uncompressed).u32 - StoreI(new_str, length).Imm(Constants::STRING_LENGTH_OFFSET).u32 + If(uncompressed, 0).EQ.Likely { + StoreI(new_str, ShlI(codes_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + } Else { + StoreI(new_str, OrI(ShlI(codes_count).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + } Return(new_str).ptr Label(:SlowPathEntrypoint) @@ -1683,7 +1685,7 @@ function(:WriteStringToMem, buf := Bitcast(mem).ptr len := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 utf16 := AndI(len).Imm(1).u32 - len_0 := ShrI(len).Imm(1).u32 + len_0 := ShrI(len).Imm(Constants::STRING_LENGTH_SHIFT).u32 len := len_0 If(len, 0).EQ.Unlikely.b { @@ -1736,7 +1738,7 @@ function(:CreateStringFromMem, str := allocate_string_tlab_no_debug(klass, size) str_data := AddI(str).Imm(Constants::STRING_DATA_OFFSET).ptr copy_u8_chars(addr, str_data, size); - size_0 := ShlI(size).Imm(1).u32 + size_0 := ShlI(size).Imm(Constants::STRING_LENGTH_SHIFT).u32 If(mark, Cast(0xFEFF).u16).EQ.Unlikely.b { # UTF-16 string size_1 := OrI(ShrI(size_0).Imm(1).u32).Imm(1).u32 diff --git a/static_core/plugins/ets/irtoc_scripts/string_builder.irt b/static_core/plugins/ets/irtoc_scripts/string_builder.irt index 2c289c2c2ffb68a5564c7ef352d6a319c3296f2c..c3c250db50573fdc133d7930dca25e2c5c140693 100644 --- a/static_core/plugins/ets/irtoc_scripts/string_builder.irt +++ b/static_core/plugins/ets/irtoc_scripts/string_builder.irt @@ -414,19 +414,19 @@ def GenerateStringBuilderAppendStrings(num_args, sync_type) # 7. Update 'length' field length := LoadI(sb).Imm(Constants::ETS_SB_LENGTH_OFFSET).i32 if num_args > 0 - length0 := ShrI(length0).Imm(1).i32 + length0 := ShrI(length0).Imm(Constants::STRING_LENGTH_SHIFT).i32 length := Add(length, length0).i32 end if num_args > 1 - length1 := ShrI(length1).Imm(1).i32 + length1 := ShrI(length1).Imm(Constants::STRING_LENGTH_SHIFT).i32 length := Add(length, length1).i32 end if num_args > 2 - length2 := ShrI(length2).Imm(1).i32 + length2 := ShrI(length2).Imm(Constants::STRING_LENGTH_SHIFT).i32 length := Add(length, length2).i32 end if num_args > 3 - length3 := ShrI(length3).Imm(1).i32 + length3 := ShrI(length3).Imm(Constants::STRING_LENGTH_SHIFT).i32 length := Add(length, length3).i32 end @@ -486,20 +486,23 @@ function(:StringBuilderToString, # Compute data size and length of a new string. n_chars := LoadI(sb).Imm(Constants::ETS_SB_LENGTH_OFFSET).u32 - len_compressed := ShlI(n_chars).Imm(1).u32 # set 'uncompressed' bit to 0 sb_compress := LoadI(sb).Imm(Constants::ETS_SB_COMPRESS_OFFSET).u8 - If(sb_compress, 0).EQ.Unlikely.b { - size := len_compressed - len_uncompressed := OrI(len_compressed).Imm(1).u32 # set 'uncompressed' bit to 1 + If(sb_compress, 1).EQ.Likely.b { + data_size1 := Cast(n_chars).word + } Else { + data_size2 := Cast(ShlI(n_chars).Imm(1).u32).word } - data_size := Phi(n_chars, size).u32 - packed_length := Phi(len_compressed, len_uncompressed).u32 + data_size := Phi(data_size1, data_size2).u32 # Allocate a string new_str := allocate_string_tlab(string_klass, Cast(data_size).word) # Let the memory writes (TLAB) be visible to other threads Intrinsic(:DATA_MEMORY_BARRIER_FULL).void # Set new string's length - StoreI(new_str,packed_length).Imm(Constants::STRING_LENGTH_OFFSET).u32 + If(sb_compress, 1).EQ.Likely.b { + StoreI(new_str, ShlI(n_chars).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + } Else { + StoreI(new_str, OrI(ShlI(n_chars).Imm(Constants::STRING_LENGTH_SHIFT).u32).Imm(1).u32).Imm(Constants::STRING_LENGTH_OFFSET).u32 + } # Set new string's hashcode to 0, so as not to spend time on its computation StoreI(new_str, Cast(0).u32).Imm(Constants::STRING_HASHCODE_OFFSET).u32 # If string is empty, then there is nothing to do anymore @@ -527,7 +530,7 @@ Label(:ForEachBufferSlot) # ------------------- str_len := LoadI(obj).Imm(Constants::STRING_LENGTH_OFFSET).i32 src_data := AddI(Cast(obj).SrcType(Constants::COMPILER_REFERENCE).ptr).Imm(Constants::STRING_DATA_OFFSET).ptr - src_len := ShrI(str_len).Imm(1).i32 + src_len := ShrI(str_len).Imm(Constants::STRING_LENGTH_SHIFT).i32 If(sb_compress, 0).EQ.Unlikely.b { Goto(:DoNotCompressString) } diff --git a/static_core/plugins/ets/libllvmbackend/ets_llvm_ir_constructor_gen.inl b/static_core/plugins/ets/libllvmbackend/ets_llvm_ir_constructor_gen.inl index 248049e3fcc858e4b5519541694dbe98384fb195..217d13a85e7908dfa526dc414405eab7f0e652d6 100644 --- a/static_core/plugins/ets/libllvmbackend/ets_llvm_ir_constructor_gen.inl +++ b/static_core/plugins/ets/libllvmbackend/ets_llvm_ir_constructor_gen.inl @@ -274,7 +274,7 @@ void LLVMIrConstructor::StringBuilderAppendStringMain(Inst *inst, llvm::Value *s auto strLengthOffset = builder_.CreateConstInBoundsGEP1_32(builder_.getInt8Ty(), str, runtime->GetStringLengthOffset(arch)); auto strLength = builder_.CreateLoad(builder_.getInt32Ty(), strLengthOffset); - auto strLengthShr = builder_.CreateLShr(strLength, builder_.getInt32(1)); + auto strLengthShr = builder_.CreateLShr(strLength, builder_.getInt32(2)); auto strLengthZero = builder_.CreateICmpEQ(strLengthShr, builder_.getInt32(0)); builder_.CreateCondBr(strLengthZero, contBb, mainBb); diff --git a/static_core/plugins/ets/runtime/ets_class_linker_extension.cpp b/static_core/plugins/ets/runtime/ets_class_linker_extension.cpp index 8fb4c03e946c80d39f567bf19651aa112d9f3e0e..06a0f5853192c068b2cec463716951577b461cf3 100644 --- a/static_core/plugins/ets/runtime/ets_class_linker_extension.cpp +++ b/static_core/plugins/ets/runtime/ets_class_linker_extension.cpp @@ -129,10 +129,113 @@ void EtsClassLinkerExtension::InitializeClassRoots() InitializeArrayClassRoot(ClassRoot::ARRAY_F32, ClassRoot::F32, "[F"); InitializeArrayClassRoot(ClassRoot::ARRAY_F64, ClassRoot::F64, "[D"); InitializeArrayClassRoot(ClassRoot::ARRAY_TAGGED, ClassRoot::TAGGED, "[A"); - InitializeArrayClassRoot(ClassRoot::ARRAY_STRING, ClassRoot::STRING, + InitializeArrayClassRoot(ClassRoot::ARRAY_STRING, ClassRoot::BASE_STRING, utf::Mutf8AsCString(langCtx_.GetStringArrayClassDescriptor())); } +Class *EtsClassLinkerExtension::CreateStringSubClass(const uint8_t *descriptor, Class *stringClass, ClassRoot type) +{ + ClassLinker *classLinker = Runtime::GetCurrent()->GetClassLinker(); + ClassLinkerContext *context = stringClass->GetLoadContext(); + + uint32_t accessFlags = stringClass->GetAccessFlags() | ACC_FINAL; + + Span fields {}; + Span methodsSpan {}; + Span interfacesSpan {}; + Class *subClass = classLinker->BuildClass(descriptor, true, accessFlags, methodsSpan, fields, stringClass, + interfacesSpan, context, false); + + subClass->SetState(Class::State::INITIALIZING); + subClass->SetStringClass(); + switch (type) { + case ClassRoot::STRING: + subClass->SetLineStringClass(); + break; + case ClassRoot::SLICED_STRING: + subClass->SetSlicedStringClass(); + break; + case ClassRoot::TREE_STRING: + subClass->SetTreeStringClass(); + break; + default: + subClass->SetLineStringClass(); + break; + } + + subClass->SetState(Class::State::INITIALIZED); + return subClass; +} + +const uint8_t *EtsClassLinkerExtension::GetStringClassDescriptor(ClassRoot strCls) +{ + const char *data = nullptr; + switch (strCls) { + case ClassRoot::STRING: { + data = panda_file_items::class_descriptors::LINE_STRING.data(); + break; + } + + case ClassRoot::SLICED_STRING: { + data = panda_file_items::class_descriptors::SLICED_STRING.data(); + break; + } + + case ClassRoot::TREE_STRING: { + data = panda_file_items::class_descriptors::TREE_STRING.data(); + break; + } + + default: { + data = panda_file_items::class_descriptors::LINE_STRING.data(); + break; + } + } + return utf::CStringAsMutf8(data); +} + +bool EtsClassLinkerExtension::InitializeStringClass([[maybe_unused]] Class *classClass) +{ + auto *strCls = GetClassRoot(ClassRoot::BASE_STRING); + + // 1. set LineString Class and make String pointes to it in ClassRoot , so AllocStringObject() can create LineString + // directly + auto *lineStrCls = CreateStringSubClass(GetStringClassDescriptor(ClassRoot::STRING), strCls, ClassRoot::STRING); + if (lineStrCls == nullptr) { + LOG(ERROR, CLASS_LINKER) << "Cannot create LineString class '" << GetStringClassDescriptor(ClassRoot::STRING) + << "'"; + return false; + } + lineStrCls->SetFinal(); + EtsClass::FromRuntimeClass(lineStrCls)->AsObject()->GetCoreType()->SetClass(classClass); + SetClassRoot(ClassRoot::STRING, lineStrCls); + + // 2. set SlicedString Class + auto *slicedStrCls = + CreateStringSubClass(GetStringClassDescriptor(ClassRoot::SLICED_STRING), strCls, ClassRoot::SLICED_STRING); + if (slicedStrCls == nullptr) { + LOG(ERROR, CLASS_LINKER) << "Cannot create SlicedString class '" + << GetStringClassDescriptor(ClassRoot::SLICED_STRING) << "'"; + return false; + } + slicedStrCls->SetFinal(); + EtsClass::FromRuntimeClass(slicedStrCls)->AsObject()->GetCoreType()->SetClass(classClass); + SetClassRoot(ClassRoot::SLICED_STRING, slicedStrCls); + + // 3. set TreeString Class + auto *treeStrCls = + CreateStringSubClass(GetStringClassDescriptor(ClassRoot::TREE_STRING), strCls, ClassRoot::TREE_STRING); + if (treeStrCls == nullptr) { + LOG(ERROR, CLASS_LINKER) << "Cannot create TreeString class '" + << GetStringClassDescriptor(ClassRoot::TREE_STRING) << "'"; + return false; + } + treeStrCls->SetFinal(); + EtsClass::FromRuntimeClass(treeStrCls)->AsObject()->GetCoreType()->SetClass(classClass); + SetClassRoot(ClassRoot::TREE_STRING, treeStrCls); + return true; +} + bool EtsClassLinkerExtension::InitializeImpl(bool compressedStringEnabled) { // NOLINTNEXTLINE(google-build-using-namespace) @@ -144,14 +247,14 @@ bool EtsClassLinkerExtension::InitializeImpl(bool compressedStringEnabled) auto *objectClass = GetClassLinker()->GetClass(langCtx_.GetObjectClassDescriptor(), false, GetBootContext()); if (objectClass == nullptr) { - LOG(ERROR, CLASS_LINKER) << "Cannot create class '" << langCtx_.GetObjectClassDescriptor() << "'"; + LOG(ERROR, CLASS_LINKER) << "Cannot create object class '" << langCtx_.GetObjectClassDescriptor() << "'"; return false; } SetClassRoot(ClassRoot::OBJECT, objectClass); auto *classClass = GetClassLinker()->GetClass(langCtx_.GetClassClassDescriptor(), false, GetBootContext()); if (classClass == nullptr) { - LOG(ERROR, CLASS_LINKER) << "Cannot create class '" << langCtx_.GetClassClassDescriptor() << "'"; + LOG(ERROR, CLASS_LINKER) << "Cannot create class class '" << langCtx_.GetClassClassDescriptor() << "'"; return false; } SetClassRoot(ClassRoot::CLASS, classClass); @@ -163,10 +266,18 @@ bool EtsClassLinkerExtension::InitializeImpl(bool compressedStringEnabled) auto *stringClass = GetClassLinker()->GetClass(langCtx_.GetStringClassDescriptor(), false, GetBootContext()); if (stringClass == nullptr) { - LOG(ERROR, CLASS_LINKER) << "Cannot create class '" << langCtx_.GetStringClassDescriptor() << "'"; + LOG(ERROR, CLASS_LINKER) << "Cannot create string class '" << langCtx_.GetStringClassDescriptor() << "'"; + return false; + } + stringClass->SetUnFinal(); + EtsClass::FromRuntimeClass(stringClass)->AsObject()->GetCoreType()->SetClass(classClass); + SetClassRoot(ClassRoot::BASE_STRING, stringClass); + + // Set String Classes + if (!InitializeStringClass(classClass)) { + LOG(ERROR, CLASS_LINKER) << "Cannot create String classes"; return false; } - SetClassRoot(ClassRoot::STRING, stringClass); stringClass->SetStringClass(); auto *jsValueClass = GetClassLinker()->GetClass(utf::CStringAsMutf8(JS_VALUE.data()), false, GetBootContext()); @@ -175,9 +286,7 @@ bool EtsClassLinkerExtension::InitializeImpl(bool compressedStringEnabled) return false; } jsValueClass->SetXRefClass(); - InitializeClassRoots(); - return true; } @@ -275,7 +384,11 @@ size_t EtsClassLinkerExtension::GetClassVTableSize(ClassRoot root) case ClassRoot::ARRAY_STRING: return GetArrayClassVTableSize(); case ClassRoot::OBJECT: + case ClassRoot::BASE_STRING: case ClassRoot::STRING: + // case ClassRoot::LINE_STRING: + case ClassRoot::SLICED_STRING: + case ClassRoot::TREE_STRING: return GetClassRoot(root)->GetVTableSize(); case ClassRoot::CLASS: return 0; @@ -324,7 +437,11 @@ size_t EtsClassLinkerExtension::GetClassIMTSize(ClassRoot root) return GetArrayClassIMTSize(); case ClassRoot::OBJECT: case ClassRoot::CLASS: + case ClassRoot::BASE_STRING: case ClassRoot::STRING: + // case ClassRoot::LINE_STRING: + case ClassRoot::SLICED_STRING: + case ClassRoot::TREE_STRING: return 0; default: { break; @@ -371,7 +488,11 @@ size_t EtsClassLinkerExtension::GetClassSize(ClassRoot root) return GetArrayClassSize(); case ClassRoot::OBJECT: case ClassRoot::CLASS: + case ClassRoot::BASE_STRING: case ClassRoot::STRING: + // case ClassRoot::LINE_STRING: + case ClassRoot::SLICED_STRING: + case ClassRoot::TREE_STRING: return Class::ComputeClassSize(GetClassVTableSize(root), GetClassIMTSize(root), 0, 0, 0, 0, 0, 0); default: { break; @@ -589,6 +710,9 @@ void EtsClassLinkerExtension::InitializeBuiltinSpecialClasses() using namespace panda_file_items::class_descriptors; CacheClass(STRING, [](auto *c) { c->SetValueTyped(); }); + CacheClass(LINE_STRING, [](auto *c) { c->SetValueTyped(); }); + CacheClass(SLICED_STRING, [](auto *c) { c->SetValueTyped(); }); + CacheClass(TREE_STRING, [](auto *c) { c->SetValueTyped(); }); CacheClass(NULL_VALUE, [](auto *c) { c->SetNullValue(); c->SetValueTyped(); diff --git a/static_core/plugins/ets/runtime/ets_class_linker_extension.h b/static_core/plugins/ets/runtime/ets_class_linker_extension.h index a63e0281b7adbc92c594b9521aada86164938644..ac541be1818240e13a8d6d3831e8ec3a5f4590c1 100644 --- a/static_core/plugins/ets/runtime/ets_class_linker_extension.h +++ b/static_core/plugins/ets/runtime/ets_class_linker_extension.h @@ -69,6 +69,12 @@ public: void InitializeClassRoots(); + Class *CreateStringSubClass(const uint8_t *descriptor, Class *stringClass, ClassRoot type); + + const uint8_t *GetStringClassDescriptor(ClassRoot strCls); + + bool InitializeStringClass(Class *classClass); + bool InitializeClass(Class *klass) override; bool InitializeClass(Class *klass, ClassLinkerErrorHandler *handler) override; diff --git a/static_core/plugins/ets/runtime/ets_class_root.cpp b/static_core/plugins/ets/runtime/ets_class_root.cpp index 4d9e1956be2f818e142e9537db4e790e01b73bbd..0ea45f70563b8686090f392de939df11cc0f572e 100644 --- a/static_core/plugins/ets/runtime/ets_class_root.cpp +++ b/static_core/plugins/ets/runtime/ets_class_root.cpp @@ -66,7 +66,7 @@ EtsClassRoot ToEtsClassRoot(ClassRoot classRoot) return EtsClassRoot::OBJECT; // Other types - case ClassRoot::STRING: + case ClassRoot::BASE_STRING: return EtsClassRoot::STRING; case ClassRoot::ARRAY_CLASS: return EtsClassRoot::STRING_ARRAY; diff --git a/static_core/plugins/ets/runtime/ets_class_root.h b/static_core/plugins/ets/runtime/ets_class_root.h index b2ceec182eb39014ad74a4d49eb0cdc07b7aab11..71ea78e91c60483e7997bc1592a27a2094edf644 100644 --- a/static_core/plugins/ets/runtime/ets_class_root.h +++ b/static_core/plugins/ets/runtime/ets_class_root.h @@ -45,7 +45,7 @@ enum class EtsClassRoot { CLASS = helpers::ToUnderlying(ClassRoot::CLASS), OBJECT = helpers::ToUnderlying(ClassRoot::OBJECT), - STRING = helpers::ToUnderlying(ClassRoot::STRING), + STRING = helpers::ToUnderlying(ClassRoot::BASE_STRING), STRING_ARRAY = helpers::ToUnderlying(ClassRoot::ARRAY_STRING), }; diff --git a/static_core/plugins/ets/runtime/ets_entrypoints.cpp b/static_core/plugins/ets/runtime/ets_entrypoints.cpp index 34a3fa3b2ea3050357484b5792a24a12ea50b8a4..4b691301f02d5e9bbde1e084507b10f8ca9e2498 100644 --- a/static_core/plugins/ets/runtime/ets_entrypoints.cpp +++ b/static_core/plugins/ets/runtime/ets_entrypoints.cpp @@ -15,6 +15,7 @@ #include "plugins/ets/runtime/ets_entrypoints.h" +#include "include/coretypes/string.h" #include "include/object_header.h" #include "libpandafile/shorty_iterator.h" #include "plugins/ets/runtime/ets_coroutine.h" @@ -480,12 +481,12 @@ extern "C" uintptr_t NO_ADDRESS_SANITIZE ResolveCallByNameEntrypoint(const Metho extern "C" coretypes::String *CreateStringFromCharCodeEntrypoint(ObjectHeader *array) { auto *charCodes = EtsBoxedDoubleArray::FromEtsObject(EtsObject::FromCoreType(array)); - return EtsString::CreateNewStringFromCharCode(charCodes->GetData())->GetCoreType(); + return EtsString::CreateNewStringFromCharCode(charCodes->GetData())->GetCoreString(); } extern "C" coretypes::String *CreateStringFromCharCodeSingleEntrypoint(uint64_t charCode) { - return EtsString::CreateNewStringFromCharCode(bit_cast(charCode))->GetCoreType(); + return EtsString::CreateNewStringFromCharCode(bit_cast(charCode))->GetCoreString(); } } // namespace ark::ets diff --git a/static_core/plugins/ets/runtime/ets_panda_file_items.h b/static_core/plugins/ets/runtime/ets_panda_file_items.h index 4a645d3f0f07d87813adacda80272bd6e1105554..a07ce8451a78c96c3a19975c514a663a3647d490 100644 --- a/static_core/plugins/ets/runtime/ets_panda_file_items.h +++ b/static_core/plugins/ets/runtime/ets_panda_file_items.h @@ -40,6 +40,9 @@ static constexpr std::string_view COND_VAR = "Lstd/c static constexpr std::string_view QUEUE_SPINLOCK = "Lstd/core/QueueSpinlock;"; static constexpr std::string_view NULL_VALUE = "Lstd/core/__NullValue;"; static constexpr std::string_view STRING = "Lstd/core/String;"; +static constexpr std::string_view LINE_STRING = "Lstd/core/LineString;"; +static constexpr std::string_view SLICED_STRING = "Lstd/core/SlicedString;"; +static constexpr std::string_view TREE_STRING = "Lstd/core/TreeString;"; static constexpr std::string_view WEAK_REF = "Lstd/core/WeakRef;"; static constexpr std::string_view FINALIZABLE_WEAK_REF = "Lstd/core/FinalizableWeakRef;"; static constexpr std::string_view FINALIZATION_REGISTRY = "Lstd/core/FinalizationRegistry;"; diff --git a/static_core/plugins/ets/runtime/ets_stubs.cpp b/static_core/plugins/ets/runtime/ets_stubs.cpp index 7d8e16dcfd3c4473535fd9766d9a428cda0b7b4e..038e9753d6e4cdc6349032205288adee0a0b4d3d 100644 --- a/static_core/plugins/ets/runtime/ets_stubs.cpp +++ b/static_core/plugins/ets/runtime/ets_stubs.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "include/coretypes/base_string.h" #include "plugins/ets/runtime/ets_class_linker_extension.h" #include "plugins/ets/runtime/ets_stubs-inl.h" #include "plugins/ets/runtime/ets_utils.h" @@ -124,10 +125,10 @@ bool EtsValueTypedEquals(EtsCoroutine *coro, EtsObject *obj1, EtsObject *obj2) auto ptypes = PlatformTypes(coro); ASSERT(ptypes != nullptr); - + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); if (cls1->IsStringClass()) { - return cls2->IsStringClass() && - coretypes::String::Cast(obj1->GetCoreType())->Compare(coretypes::String::Cast(obj2->GetCoreType())) == 0; + return cls2->IsStringClass() && coretypes::BaseString::Cast(obj1->GetCoreType()) + ->Compare(coretypes::BaseString::Cast(obj2->GetCoreType()), ctx) == 0; } if (cls1 == ptypes->coreBoolean) { return cls2 == ptypes->coreBoolean && CompareBoxedPrimitive(obj1, obj2); diff --git a/static_core/plugins/ets/runtime/ets_vm.cpp b/static_core/plugins/ets/runtime/ets_vm.cpp index 39aa414deed0c4eafc04bbe86226338569b48d46..335888dee95302d6b7cc90a5be2e39379702b0eb 100644 --- a/static_core/plugins/ets/runtime/ets_vm.cpp +++ b/static_core/plugins/ets/runtime/ets_vm.cpp @@ -513,7 +513,7 @@ coretypes::String *PandaEtsVM::CreateString(Method *ctor, ObjectHeader *obj) } else { LOG(FATAL, ETS) << "Must be 1 or 2 ctor args"; } - return str->GetCoreType(); + return reinterpret_cast(str->GetCoreType()); } Expected PandaEtsVM::InvokeEntrypointImpl(Method *entrypoint, const std::vector &args) diff --git a/static_core/plugins/ets/runtime/interop_js/call/arg_convertors.h b/static_core/plugins/ets/runtime/interop_js/call/arg_convertors.h index af169a45ac6f75a9125ddc2a0b9e18c93ac886c5..30b689e2da8734c57d8823719c45cc6670614ebf 100644 --- a/static_core/plugins/ets/runtime/interop_js/call/arg_convertors.h +++ b/static_core/plugins/ets/runtime/interop_js/call/arg_convertors.h @@ -54,7 +54,7 @@ template if (klass == ctx->GetJSValueClass()) { return UnwrapVal(ctx, env, jsVal, storeRes); } - if (klass == ctx->GetStringClass()) { + if (klass->IsStringClass()) { return UnwrapVal(ctx, env, jsVal, storeRes); } // start slowpath @@ -264,7 +264,7 @@ template if (klass == ctx->GetJSValueClass()) { return wrapRef(helpers::TypeIdentity(), ref); } - if (klass == ctx->GetStringClass()) { + if (klass->IsStringClass()) { return wrapRef(helpers::TypeIdentity(), ref); } // start slowpath diff --git a/static_core/plugins/ets/runtime/interop_js/call/call_js.cpp b/static_core/plugins/ets/runtime/interop_js/call/call_js.cpp index 52258fc38dd31a84e4e341272be5a81bdcca951d..f24ba9e068c09c26ce54222f03c48e04867358b1 100644 --- a/static_core/plugins/ets/runtime/interop_js/call/call_js.cpp +++ b/static_core/plugins/ets/runtime/interop_js/call/call_js.cpp @@ -510,7 +510,7 @@ static void *SelectCallJSEntrypoint(InteropCtx *ctx, Method *method) } return reinterpret_cast(JSRuntimeCallJSBridge); } - if (protoReader.GetClass() == ctx->GetStringClass()) { + if (protoReader.GetClass()->IsStringClass()) { return reinterpret_cast(JSRuntimeCallJSQNameBridge); } if (protoReader.GetClass() == ctx->GetJSValueClass()) { diff --git a/static_core/plugins/ets/runtime/interop_js/intrinsics_api_impl.cpp b/static_core/plugins/ets/runtime/interop_js/intrinsics_api_impl.cpp index 4f468e9c5ad67be7b506a1f2d86354353f811f81..e9b8113d2eb2041b5680ee177ec97a49056ade8c 100644 --- a/static_core/plugins/ets/runtime/interop_js/intrinsics_api_impl.cpp +++ b/static_core/plugins/ets/runtime/interop_js/intrinsics_api_impl.cpp @@ -1167,7 +1167,7 @@ void *CompilerConvertRefTypeToLocal(EtsObject *etsValue) } return res; } - if (klass == ctx->GetStringClass()) { + if (klass->IsStringClass()) { auto value = EtsString::FromEtsObject(EtsObject::FromCoreType(ref)); napi_value res = JSConvertString::Wrap(env, value); if (UNLIKELY(res == nullptr)) { diff --git a/static_core/plugins/ets/runtime/intrinsics/escompat_Array.cpp b/static_core/plugins/ets/runtime/intrinsics/escompat_Array.cpp index dec26961be2d0e820d397232d302d6c78b325327..0feee417e1563bab09214d76bea84af5d572df04 100644 --- a/static_core/plugins/ets/runtime/intrinsics/escompat_Array.cpp +++ b/static_core/plugins/ets/runtime/intrinsics/escompat_Array.cpp @@ -17,6 +17,7 @@ #include #include "cross_values.h" #include +#include "include/coretypes/base_string.h" #include "intrinsics.h" #include "libpandabase/utils/utf.h" #include "libpandabase/utils/utils.h" @@ -87,11 +88,12 @@ EtsInt NormalizeArrayIndex(EtsInt index, EtsInt actualLength) EtsDouble EtsEscompatArrayIndexOfString(EtsObjectArray *buffer, EtsObject *value, EtsInt fromIndex, EtsInt actualLength) { - auto valueString = coretypes::String::Cast(value->GetCoreType()); + auto valueString = coretypes::BaseString::Cast(value->GetCoreType()); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); for (EtsInt index = fromIndex; index < actualLength; index++) { auto element = buffer->Get(index); if (element != nullptr && element->IsStringClass() && - valueString->Compare(coretypes::String::Cast(element->GetCoreType())) == 0) { + valueString->Compare(coretypes::BaseString::Cast(element->GetCoreType()), ctx) == 0) { return index; } } @@ -100,11 +102,12 @@ EtsDouble EtsEscompatArrayIndexOfString(EtsObjectArray *buffer, EtsObject *value EtsDouble EtsEscompatArrayLastIndexOfString(EtsObjectArray *buffer, EtsObject *value, EtsInt fromIndex) { - auto valueString = coretypes::String::Cast(value->GetCoreType()); + auto valueString = coretypes::BaseString::Cast(value->GetCoreType()); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); for (EtsInt index = fromIndex; index >= 0; index--) { auto element = buffer->Get(index); if (element != nullptr && element->IsStringClass() && - valueString->Compare(coretypes::String::Cast(element->GetCoreType())) == 0) { + valueString->Compare(coretypes::BaseString::Cast(element->GetCoreType()), ctx) == 0) { return index; } } @@ -540,7 +543,7 @@ void ComputeElementCharSize(ElementComputeResult &res, EtsObject *element, const auto elementCls = element->GetClass(); if (elementCls->IsStringClass()) { - auto strElement = coretypes::String::Cast(element->GetCoreType()); + auto strElement = coretypes::BaseString::Cast(element->GetCoreType()); if (strElement->IsUtf16()) { auto utf16StrSize = strElement->GetUtf16Length(); res.utf16Size += utf16StrSize; @@ -661,7 +664,7 @@ ark::ets::EtsString *EtsEscompatArrayJoinUtf8String(EtsObjectArray *buffer, EtsI for (EtsInt i = 0; i < actualLength - 1; i++) { EtsObject *str = buffer->Get(i); - coretypes::String *srcString = coretypes::String::Cast(str->GetCoreType()); + coretypes::BaseString *srcString = coretypes::BaseString::Cast(str->GetCoreType()); uint32_t n = srcString->CopyDataRegionMUtf8(dstData, 0, srcString->GetLength(), utf8Size); dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) utf8Size -= n; @@ -674,7 +677,7 @@ ark::ets::EtsString *EtsEscompatArrayJoinUtf8String(EtsObjectArray *buffer, EtsI } EtsObject *lastStr = buffer->Get(actualLength - 1); - coretypes::String *lastString = coretypes::String::Cast(lastStr->GetCoreType()); + coretypes::BaseString *lastString = coretypes::BaseString::Cast(lastStr->GetCoreType()); lastString->CopyDataRegionMUtf8(dstData, 0, lastString->GetLength(), utf8Size); return s; } @@ -696,7 +699,7 @@ void ProcessUtf8Element(EtsObject *element, PandaVector &buf, const EtsPla } if (elementCls->IsStringClass()) { - auto strElement = coretypes::String::Cast(element->GetCoreType()); + auto strElement = coretypes::BaseString::Cast(element->GetCoreType()); auto str = strElement->GetDataUtf8(); auto elementSize = strElement->GetUtf8Length(); if (elementSize > 0) { @@ -766,7 +769,7 @@ void ProcessUtf16Element(EtsObject *element, PandaVector &buf, const Et } if (elementCls->IsStringClass()) { - auto strElement = coretypes::String::Cast(element->GetCoreType()); + auto strElement = coretypes::BaseString::Cast(element->GetCoreType()); auto strSize = strElement->IsUtf16() ? strElement->GetUtf16Length() : strElement->GetUtf8Length(); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) strElement->CopyDataUtf16(buf.data() + pos, strSize); diff --git a/static_core/plugins/ets/runtime/intrinsics/std_core_Console.cpp b/static_core/plugins/ets/runtime/intrinsics/std_core_Console.cpp index b30896a42dfcd1bef9eb11d1ffdc0ed50315f4a5..6bb236215540c28f315da30d8db85bd81eb3f0ab 100644 --- a/static_core/plugins/ets/runtime/intrinsics/std_core_Console.cpp +++ b/static_core/plugins/ets/runtime/intrinsics/std_core_Console.cpp @@ -98,7 +98,7 @@ void StdConsolePrintString(ObjectHeader *header [[maybe_unused]], EtsString *dat auto thread = ManagedThread::GetCurrent(); [[maybe_unused]] HandleScope scope(thread); - VMHandle dH(thread, data->GetCoreType()); + VMHandle dH(thread, data->GetCoreType()); auto res = PandaString(data->GetUtf8()); #ifdef PANDA_TARGET_OHOS diff --git a/static_core/plugins/ets/runtime/intrinsics/std_core_String.cpp b/static_core/plugins/ets/runtime/intrinsics/std_core_String.cpp index b0d72375fc18768a537570c9137c7453bd7747a7..f904049674841b2266181e03b005449053491a1f 100644 --- a/static_core/plugins/ets/runtime/intrinsics/std_core_String.cpp +++ b/static_core/plugins/ets/runtime/intrinsics/std_core_String.cpp @@ -16,7 +16,9 @@ #include #include #include +#include "include/coretypes/base_string.h" #include "include/mem/panda_string.h" +#include "include/object_header.h" #include "intrinsics.h" #include "libpandabase/utils/logger.h" #include "macros.h" @@ -44,26 +46,34 @@ constexpr const uint32_t CHAR0X1FFC00 = 0x1ffc00; constexpr const uint16_t CHAR0XD800 = 0xd800; constexpr const uint16_t CHAR0XDC00 = 0xdc00; -static ObjectHeader *StdCoreStringGetDataAsArray(EtsString *s, ets_int begin, ets_int end, bool isUtf16) +static bool CheckStringIndex(EtsString *s, ets_int begin, ets_int end) { ASSERT(s != nullptr); ets_int length = s->GetLength(); if (UNLIKELY(begin > end)) { ark::ThrowStringIndexOutOfBoundsException(begin, length); - return nullptr; + return false; } if (UNLIKELY(begin > length || begin < 0)) { ark::ThrowStringIndexOutOfBoundsException(begin, length); - return nullptr; + return false; } if (UNLIKELY(end > length)) { ark::ThrowStringIndexOutOfBoundsException(end, length); - return nullptr; + return false; } + return true; +} +static ObjectHeader *StdCoreStringGetDataAsArray(EtsString *s, ets_int begin, ets_int end, bool isUtf16) +{ + if (!CheckStringIndex(s, begin, end)) { + return nullptr; + } + ets_int length = s->GetLength(); auto thread = ManagedThread::GetCurrent(); [[maybe_unused]] HandleScope scope(thread); - VMHandle sHandle(thread, s->GetCoreType()); + VMHandle sHandle(thread, reinterpret_cast(s)); ets_int n = end - begin; void *array = nullptr; if (isUtf16) { @@ -77,20 +87,22 @@ static ObjectHeader *StdCoreStringGetDataAsArray(EtsString *s, ets_int begin, et if (isUtf16) { auto charArray = reinterpret_cast(array); Span out(charArray->GetData(), charArray->GetLength()); - sHandle.GetPtr()->CopyDataRegionUtf16(&out[0], begin, charArray->GetLength(), sHandle.GetPtr()->GetLength()); + sHandle->CopyDataRegionUtf16(&(out[0]), begin, n, n); } else { auto byteArray = reinterpret_cast(array); Span out(byteArray->GetData(), byteArray->GetLength()); /* as we need only one LSB no sophisticated conversion is needed */ - if (sHandle.GetPtr()->IsUtf16()) { - auto in = sHandle.GetPtr()->GetDataUtf16(); + if (sHandle->IsUtf16()) { + PandaVector in(length); + sHandle->CopyDataUtf16(in.data(), length); for (int i = 0; i < n; ++i) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) out[i] = in[i + begin]; } } else { - auto in = sHandle.GetPtr()->GetDataMUtf8(); + PandaVector in(length); + sHandle->CopyDataRegionUtf8(in.data(), 0, length, length); for (int i = 0; i < n; ++i) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) out[i] = in[i + begin]; @@ -110,10 +122,37 @@ ObjectHeader *StdCoreStringGetBytes(EtsString *s, ets_int begin, ets_int end) return StdCoreStringGetDataAsArray(s, begin, end, false); } +static std::pair NormalizeSubStringIndexes(int32_t beginIndex, int32_t endIndex, EtsString *str) +{ + auto strLen = str->GetLength(); + std::pair normIndexes = {beginIndex, endIndex}; + + // If begin_index < 0, then it is assumed to be equal to zero. + if (normIndexes.first < 0) { + normIndexes.first = 0; + } else if (static_cast(normIndexes.first) > strLen) { + // If begin_index > str_len, then it is assumed to be equal to str_len. + normIndexes.first = static_cast(strLen); + } + // If end_index < 0, then it is assumed to be equal to zero. + if (normIndexes.second < 0) { + normIndexes.second = 0; + } else if (static_cast(normIndexes.second) > strLen) { + // If end_index > str_len, then it is assumed to be equal to str_len. + normIndexes.second = static_cast(strLen); + } + // If begin_index > end_index, then these are swapped. + if (normIndexes.first > normIndexes.second) { + std::swap(normIndexes.first, normIndexes.second); + } + ASSERT((normIndexes.second - normIndexes.first) >= 0); + return normIndexes; +} + EtsString *StdCoreStringSubstring(EtsString *str, ets_int begin, ets_int end) { ASSERT(str != nullptr); - auto indexes = coretypes::String::NormalizeSubStringIndexes(begin, end, str->GetCoreType()); + auto indexes = NormalizeSubStringIndexes(begin, end, str); if (UNLIKELY(indexes.first == 0 && indexes.second == str->GetLength())) { return str; } @@ -130,14 +169,7 @@ uint16_t StdCoreStringCharAt(EtsString *s, int32_t index) ark::ThrowStringIndexOutOfBoundsException(index, length); return 0; } - - if (s->IsUtf16()) { - Span sp(s->GetDataUtf16(), length); - return sp[index]; - } - - Span sp(s->GetDataMUtf8(), length); - return sp[index]; + return s->At(index); } int32_t StdCoreStringGetLength(EtsString *s) @@ -172,14 +204,19 @@ uint8_t StdCoreStringEquals(EtsString *owner, EtsObject *s) EtsString *StringNormalize(EtsString *str, const Normalizer2 *normalizer) { auto coroutine = EtsCoroutine::GetCurrent(); - [[maybe_unused]] HandleScope scope(coroutine); + [[maybe_unused]] EtsHandleScope scope(coroutine); + auto thread = ManagedThread::GetCurrent(); + VMHandle strHandle(thread, reinterpret_cast(str)); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto flatStringInfo = coretypes::FlatStringInfo::FlattenAllString(strHandle, ctx); icu::UnicodeString utf16Str; - if (str->IsUtf16()) { - utf16Str = icu::UnicodeString {str->GetDataUtf16(), static_cast(str->GetUtf16Length())}; - } else { + if (flatStringInfo.IsUtf16()) { utf16Str = - icu::UnicodeString {utf::Mutf8AsCString(str->GetDataMUtf8()), static_cast(str->GetLength())}; + icu::UnicodeString {flatStringInfo.GetDataUtf16(), static_cast(strHandle->GetUtf16Length())}; + } else { + utf16Str = icu::UnicodeString {utf::Mutf8AsCString(flatStringInfo.GetDataUtf8()), + static_cast(flatStringInfo.GetLength())}; } UErrorCode errorCode = U_ZERO_ERROR; @@ -249,16 +286,15 @@ uint8_t StdCoreStringIsWellFormed(EtsString *thisStr) return UINT8_C(1); } auto length = thisStr->GetUtf16Length(); - auto codeUnits = Span(thisStr->GetDataUtf16(), length); for (size_t i = 0; i < length; ++i) { - uint16_t codeUnit = codeUnits[i]; + uint16_t codeUnit = thisStr->At(i); if ((codeUnit & CHAR0X1FFC00) == CHAR0XD800) { // Code unit is a leading surrogate if (i == length - 1) { return UINT8_C(0); } // Is not trail surrogate - if ((codeUnits[i + 1] & CHAR0X1FFC00) != CHAR0XDC00) { + if ((thisStr->At(i + 1) & CHAR0X1FFC00) != CHAR0XDC00) { return UINT8_C(0); } // Skip the paired trailing surrogate @@ -274,14 +310,19 @@ uint8_t StdCoreStringIsWellFormed(EtsString *thisStr) EtsString *ToLowerCase(EtsString *thisStr, const icu::Locale &locale) { auto coroutine = EtsCoroutine::GetCurrent(); - [[maybe_unused]] HandleScope scope(coroutine); + [[maybe_unused]] EtsHandleScope scope(coroutine); + auto thread = ManagedThread::GetCurrent(); + VMHandle strHandle(thread, reinterpret_cast(thisStr)); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto flatStringInfo = coretypes::FlatStringInfo::FlattenAllString(strHandle, ctx); icu::UnicodeString utf16Str; - if (thisStr->IsUtf16()) { - utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast(thisStr->GetUtf16Length())}; + if (flatStringInfo.IsUtf16()) { + utf16Str = + icu::UnicodeString {flatStringInfo.GetDataUtf16(), static_cast(strHandle->GetUtf16Length())}; } else { - utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()), - static_cast(thisStr->GetLength())}; + utf16Str = icu::UnicodeString {utf::Mutf8AsCString(flatStringInfo.GetDataUtf8()), + static_cast(flatStringInfo.GetLength())}; } auto res = utf16Str.toLower(locale); return EtsString::CreateFromUtf16(reinterpret_cast(res.getTerminatedBuffer()), res.length()); @@ -290,14 +331,19 @@ EtsString *ToLowerCase(EtsString *thisStr, const icu::Locale &locale) EtsString *ToUpperCase(EtsString *thisStr, const icu::Locale &locale) { auto coroutine = EtsCoroutine::GetCurrent(); - [[maybe_unused]] HandleScope scope(coroutine); + [[maybe_unused]] EtsHandleScope scope(coroutine); + auto thread = ManagedThread::GetCurrent(); + VMHandle strHandle(thread, reinterpret_cast(thisStr)); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto flatStringInfo = coretypes::FlatStringInfo::FlattenAllString(strHandle, ctx); icu::UnicodeString utf16Str; - if (thisStr->IsUtf16()) { - utf16Str = icu::UnicodeString {thisStr->GetDataUtf16(), static_cast(thisStr->GetUtf16Length())}; + if (flatStringInfo.IsUtf16()) { + utf16Str = + icu::UnicodeString {flatStringInfo.GetDataUtf16(), static_cast(strHandle->GetUtf16Length())}; } else { - utf16Str = icu::UnicodeString {utf::Mutf8AsCString(thisStr->GetDataMUtf8()), - static_cast(thisStr->GetLength())}; + utf16Str = icu::UnicodeString {utf::Mutf8AsCString(flatStringInfo.GetDataUtf8()), + static_cast(flatStringInfo.GetLength())}; } auto res = utf16Str.toUpper(locale); return EtsString::CreateFromUtf16(reinterpret_cast(res.getTerminatedBuffer()), res.length()); @@ -309,9 +355,16 @@ UErrorCode ParseSingleBCP47LanguageTag(EtsString *langTag, icu::Locale &locale) locale = icu::Locale::getDefault(); return U_ZERO_ERROR; } + auto coroutine = EtsCoroutine::GetCurrent(); + [[maybe_unused]] EtsHandleScope scope(coroutine); + + auto thread = ManagedThread::GetCurrent(); + VMHandle langTagHandle(thread, reinterpret_cast(langTag)); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto flatStringInfo = coretypes::FlatStringInfo::FlattenAllString(langTagHandle, ctx); PandaVector buf; - std::string_view locTag = langTag->ConvertToStringView(&buf); + std::string_view locTag = EtsString::FromCoreType(flatStringInfo.GetString())->ConvertToStringView(&buf); icu::StringPiece sp {locTag.data(), static_cast(locTag.size())}; UErrorCode status = U_ZERO_ERROR; locale = icu::Locale::forLanguageTag(sp, status); @@ -335,7 +388,8 @@ EtsString *StdCoreStringToLocaleUpperCase(EtsString *thisStr, EtsString *langTag icu::Locale locale; auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale); if (UNLIKELY(U_FAILURE(localeParseStatus))) { - auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported"; + auto message = "Language tag '" + ConvertToString(coretypes::BaseString::Cast(langTag->GetCoreType())) + + "' is invalid or not supported"; ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message); return nullptr; } @@ -349,7 +403,8 @@ EtsString *StdCoreStringToLocaleLowerCase(EtsString *thisStr, EtsString *langTag icu::Locale locale; auto localeParseStatus = ParseSingleBCP47LanguageTag(langTag, locale); if (UNLIKELY(U_FAILURE(localeParseStatus))) { - auto message = "Language tag '" + ConvertToString(langTag->GetCoreType()) + "' is invalid or not supported"; + auto message = "Language tag '" + ConvertToString(coretypes::BaseString::Cast(langTag->GetCoreType())) + + "' is invalid or not supported"; ThrowEtsException(EtsCoroutine::GetCurrent(), panda_file_items::class_descriptors::RANGE_ERROR, message); return nullptr; } @@ -358,7 +413,8 @@ EtsString *StdCoreStringToLocaleLowerCase(EtsString *thisStr, EtsString *langTag ets_int StdCoreStringIndexOfAfter(EtsString *s, uint16_t ch, ets_int fromIndex) { - return ark::intrinsics::StringIndexOfU16(s, ch, fromIndex); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + return ark::intrinsics::BaseStringIndexOfU16(s, ch, fromIndex, ctx); } ets_int StdCoreStringIndexOf(EtsString *s, uint16_t ch) @@ -369,14 +425,16 @@ ets_int StdCoreStringIndexOf(EtsString *s, uint16_t ch) ets_int StdCoreStringIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex) { ASSERT(thisStr != nullptr && patternStr != nullptr); - return thisStr->GetCoreType()->IndexOf(patternStr->GetCoreType(), fromIndex); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + return thisStr->GetCoreType()->IndexOf(patternStr->GetCoreType(), ctx, fromIndex); } ets_int StdCoreStringLastIndexOfString(EtsString *thisStr, EtsString *patternStr, ets_int fromIndex) { ASSERT(thisStr != nullptr && patternStr != nullptr); // "abc".lastIndexOf("ab", -10) will return 0 - return thisStr->GetCoreType()->LastIndexOf(patternStr->GetCoreType(), std::max(fromIndex, 0)); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + return thisStr->GetCoreType()->LastIndexOf(patternStr->GetCoreType(), ctx, std::max(fromIndex, 0)); } ets_int StdCoreStringCodePointToChar(ets_int codePoint) @@ -395,37 +453,86 @@ ets_int StdCoreStringCodePointToChar(ets_int codePoint) int32_t StdCoreStringHashCode(EtsString *thisStr) { ASSERT(thisStr != nullptr); - return thisStr->GetCoreType()->GetHashcode(); + return thisStr->GetHashcode(); } EtsBoolean StdCoreStringIsCompressed(EtsString *thisStr) { ASSERT(thisStr != nullptr); - return ToEtsBoolean(thisStr->GetCoreType()->IsMUtf8()); + return ToEtsBoolean(thisStr->IsMUtf8()); +} + +static coretypes::BaseString *BaseStringConcat2(coretypes::BaseString *str1, coretypes::BaseString *str2) +{ + auto *vm = Runtime::GetCurrent()->GetPandaVM(); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + return coretypes::BaseString::Concat(str1, str2, ctx, vm); +} + +static coretypes::BaseString *BaseStringConcat3(coretypes::BaseString *str1, coretypes::BaseString *str2, + coretypes::BaseString *str3) +{ + auto *vm = Runtime::GetCurrent()->GetPandaVM(); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle str3Handle(thread, str3); + auto str = coretypes::BaseString::Concat(str1, str2, ctx, vm); + if (UNLIKELY(str == nullptr)) { + HandlePendingException(); + UNREACHABLE(); + } + str = coretypes::BaseString::Concat(str, str3Handle.GetPtr(), ctx, vm); + return str; +} + +static coretypes::BaseString *BaseStringConcat4(coretypes::BaseString *str1, coretypes::BaseString *str2, + coretypes::BaseString *str3, coretypes::BaseString *str4) +{ + auto *vm = Runtime::GetCurrent()->GetPandaVM(); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle str3Handle(thread, str3); + VMHandle str4Handle(thread, str4); + auto str = coretypes::BaseString::Concat(str1, str2, ctx, vm); + if (UNLIKELY(str == nullptr)) { + HandlePendingException(); + UNREACHABLE(); + } + str3 = str3Handle.GetPtr(); + str = coretypes::BaseString::Concat(str, str3, ctx, vm); + if (UNLIKELY(str == nullptr)) { + HandlePendingException(); + UNREACHABLE(); + } + str4 = str4Handle.GetPtr(); + str = coretypes::BaseString::Concat(str, str4, ctx, vm); + return str; } EtsString *StdCoreStringConcat2(EtsString *str1, EtsString *str2) { - auto s1 = reinterpret_cast(str1); - auto s2 = reinterpret_cast(str2); - return reinterpret_cast(CoreStringConcat2(s1, s2)); + auto s1 = str1->GetCoreType(); + auto s2 = str2->GetCoreType(); + return reinterpret_cast(BaseStringConcat2(s1, s2)); } EtsString *StdCoreStringConcat3(EtsString *str1, EtsString *str2, EtsString *str3) { - auto s1 = reinterpret_cast(str1); - auto s2 = reinterpret_cast(str2); - auto s3 = reinterpret_cast(str3); - return reinterpret_cast(CoreStringConcat3(s1, s2, s3)); + auto s1 = str1->GetCoreType(); + auto s2 = str2->GetCoreType(); + auto s3 = str3->GetCoreType(); + return reinterpret_cast(BaseStringConcat3(s1, s2, s3)); } EtsString *StdCoreStringConcat4(EtsString *str1, EtsString *str2, EtsString *str3, EtsString *str4) { - auto s1 = reinterpret_cast(str1); - auto s2 = reinterpret_cast(str2); - auto s3 = reinterpret_cast(str3); - auto s4 = reinterpret_cast(str4); - return reinterpret_cast(CoreStringConcat4(s1, s2, s3, s4)); + auto s1 = str1->GetCoreType(); + auto s2 = str2->GetCoreType(); + auto s3 = str3->GetCoreType(); + auto s4 = str4->GetCoreType(); + return reinterpret_cast(BaseStringConcat4(s1, s2, s3, s4)); } ets_int StdCoreStringCompareTo(EtsString *str1, EtsString *str2) @@ -439,7 +546,7 @@ ets_int StdCoreStringCompareTo(EtsString *str1, EtsString *str2) } /* use the default implementation otherwise */ - return str1->GetCoreType()->Compare(str2->GetCoreType()); + return str1->Compare(str2); } EtsString *StdCoreStringTrimLeft(EtsString *thisStr) @@ -485,29 +592,7 @@ EtsString *StdCoreStringFromCharCodeSingle(EtsDouble charCode) /* the allocation routine to create an unitialized string of the given size */ extern "C" EtsString *AllocateStringObject(size_t length, bool compressed) { - auto ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); - auto vm = Runtime::GetCurrent()->GetPandaVM(); - ASSERT(vm != nullptr); - auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING); - size_t size = - compressed ? coretypes::String::ComputeSizeMUtf8(length) : coretypes::String::ComputeSizeUtf16(length); - auto string = reinterpret_cast(vm->GetHeapManager()->AllocateObject( - stringClass, size, DEFAULT_ALIGNMENT, nullptr, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT)); - if (string != nullptr) { - // After setting length we should have a full barrier, so this write should happens-before barrier - TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); - auto len = ToNativePtr(ToUintPtr(string) + coretypes::String::GetLengthOffset()); - auto hashcode = ToNativePtr(ToUintPtr(string) + coretypes::String::GetHashcodeOffset()); - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - len[0] = compressed ? (length << 1U) : (length << 1U) | 1U; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - hashcode[0] = 0U; - TSAN_ANNOTATE_IGNORE_WRITES_END(); - // Witout full memory barrier it is possible that architectures with - // weak memory order can try fetching string legth before it's set - arch::FullMemoryBarrier(); - } - return string; + return EtsString::AllocateNonInitializedString(length, compressed); } EtsString *StdCoreStringRepeat(EtsString *str, EtsInt count) @@ -522,39 +607,27 @@ EtsString *StdCoreStringRepeat(EtsString *str, EtsInt count) } if (length == 0 || count == 0) { - return EtsString::CreateFromUtf8(nullptr, 0); + return EtsString::CreateNewEmptyString(); } auto thread = ManagedThread::GetCurrent(); [[maybe_unused]] HandleScope scope(thread); - VMHandle sHandle(thread, str->GetCoreType()); + VMHandle sHandle(thread, reinterpret_cast(str)); int size = length * count; - auto compressed = str->GetCoreType()->IsMUtf8(); - auto rep = AllocateStringObject(size, compressed); - if (UNLIKELY(rep == nullptr)) { + auto compressed = str->IsMUtf8(); + auto result = AllocateStringObject(size, compressed); + if (UNLIKELY(result == nullptr)) { PandaString message = "repeat: memory allocation failed"; auto coroutine = EtsCoroutine::GetCurrent(); ThrowEtsException(coroutine, panda_file_items::class_descriptors::OUT_OF_MEMORY_ERROR, message); return nullptr; } - if (compressed) { - auto strData = sHandle.GetPtr()->GetDataMUtf8(); - auto repData = rep->GetDataMUtf8(); - for (int i = 0; i < count; ++i) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - std::copy_n(strData, length, repData + i * length); - } - } else { - auto strData = sHandle.GetPtr()->GetDataUtf16(); - auto repData = rep->GetDataUtf16(); - for (int i = 0; i < count; ++i) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - std::copy_n(strData, length, repData + i * length); - } + for (uint32_t i = 0; i < static_cast(count); i++) { + EtsString::ReadData(result, sHandle.GetPtr(), i * length, (count - i) * length, length); } - return rep; + return result; } uint16_t StdCoreStringGet(EtsString *str, EtsInt index) diff --git a/static_core/plugins/ets/runtime/intrinsics/std_core_Type.cpp b/static_core/plugins/ets/runtime/intrinsics/std_core_Type.cpp index 71b2f7c3dc1788c9b6b08a128ee1b2fc39cf3ced..472b7e477ab15666632909f0ac12e08abe160ba2 100644 --- a/static_core/plugins/ets/runtime/intrinsics/std_core_Type.cpp +++ b/static_core/plugins/ets/runtime/intrinsics/std_core_Type.cpp @@ -195,7 +195,7 @@ EtsString *TypeAPIGetUndefinedTypeDescriptor() EtsInt TypeAPIGetClassAttributes(EtsClass *cls) { uint32_t attrs = 0; - attrs |= (cls->IsFinal()) ? static_cast(EtsTypeAPIAttributes::FINAL) : 0U; + attrs |= (cls->IsExtensible()) ? 0U : static_cast(EtsTypeAPIAttributes::FINAL); return static_cast(attrs); } diff --git a/static_core/plugins/ets/runtime/types/ets_class.h b/static_core/plugins/ets/runtime/types/ets_class.h index 0c4b44f89c1aa1e38b05e754388126eaf73b7214..f9b596f488247aee4f7e39e1fc4cf6c0b1d195e8 100644 --- a/static_core/plugins/ets/runtime/types/ets_class.h +++ b/static_core/plugins/ets/runtime/types/ets_class.h @@ -196,6 +196,11 @@ public: return GetRuntimeClass()->IsFinal(); } + bool IsExtensible() const + { + return GetRuntimeClass()->IsExtensible(); + } + bool IsAnnotation() const { return GetRuntimeClass()->IsAnnotation(); diff --git a/static_core/plugins/ets/runtime/types/ets_string.h b/static_core/plugins/ets/runtime/types/ets_string.h index a9b8b8952df422f10a4804e21f1ec493a0b4abc7..23389b638146a73968157febf96dcfe67440a233 100644 --- a/static_core/plugins/ets/runtime/types/ets_string.h +++ b/static_core/plugins/ets/runtime/types/ets_string.h @@ -18,13 +18,17 @@ #include +#include "objects/string/base_string-inl.h" #include "libpandabase/utils/utf.h" + #include "plugins/ets/runtime/types/ets_array.h" #include "plugins/ets/runtime/types/ets_box_primitive.h" #include "plugins/ets/runtime/types/ets_object.h" #include "plugins/ets/runtime/napi/ets_napi.h" + #include "runtime/include/runtime.h" #include "runtime/include/coretypes/string-inl.h" +#include "runtime/include/coretypes/base_string.h" namespace ark::ets { @@ -205,9 +209,9 @@ public: ThrowNullPointerException(ctx, ManagedThread::GetCurrent()); return nullptr; } - coretypes::String *string = etsString->GetCoreType(); + coretypes::BaseString *string = etsString->GetCoreType(); return reinterpret_cast( - coretypes::String::CreateFromString(string, ctx, Runtime::GetCurrent()->GetPandaVM())); + coretypes::BaseString::CreateFromString(string, ctx, Runtime::GetCurrent()->GetPandaVM())); } static EtsString *CreateNewEmptyString() @@ -229,8 +233,8 @@ public: { ASSERT_HAVE_ACCESS_TO_MANAGED_OBJECTS(); LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); - coretypes::String *string3 = coretypes::String::Concat(etsString1->GetCoreType(), etsString2->GetCoreType(), - ctx, Runtime::GetCurrent()->GetPandaVM()); + coretypes::BaseString *string3 = coretypes::BaseString::Concat( + etsString1->GetCoreType(), etsString2->GetCoreType(), ctx, Runtime::GetCurrent()->GetPandaVM()); return reinterpret_cast(string3); } @@ -344,7 +348,8 @@ public: int32_t Compare(EtsString *rhs) { - return GetCoreType()->Compare(rhs->GetCoreType()); + LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); + return GetCoreType()->Compare(rhs->GetCoreType(), ctx); } uint16_t At(int32_t index) @@ -359,17 +364,17 @@ public: return reinterpret_cast( coretypes::String::CreateEmptyString(ctx, Runtime::GetCurrent()->GetPandaVM())); } - coretypes::String *result = coretypes::String::DoReplace(reinterpret_cast(src), oldC, newC, - ctx, Runtime::GetCurrent()->GetPandaVM()); + coretypes::BaseString *result = coretypes::BaseString::DoReplace( + reinterpret_cast(src), oldC, newC, ctx, Runtime::GetCurrent()->GetPandaVM()); return reinterpret_cast(result); } static EtsString *FastSubString(EtsString *src, uint32_t start, uint32_t length) { LanguageContext ctx = Runtime::GetCurrent()->GetLanguageContext(panda_file::SourceLang::ETS); - coretypes::String *string1 = src->GetCoreType(); - coretypes::String *string2 = - coretypes::String::FastSubString(string1, start, length, ctx, Runtime::GetCurrent()->GetPandaVM()); + coretypes::BaseString *string1 = src->GetCoreType(); + coretypes::BaseString *string2 = + coretypes::BaseString::FastSubString(string1, start, length, ctx, Runtime::GetCurrent()->GetPandaVM()); return reinterpret_cast(string2); } @@ -447,7 +452,7 @@ public: bool IsEqual(const char *str) { auto *mutf8Str = utf::CStringAsMutf8(str); - return coretypes::String::StringsAreEqualMUtf8(GetCoreType(), mutf8Str, utf::MUtf8ToUtf16Size(mutf8Str)); + return coretypes::BaseString::StringsAreEqualMUtf8(GetCoreType(), mutf8Str, utf::MUtf8ToUtf16Size(mutf8Str)); } PandaString GetMutf8() @@ -492,17 +497,79 @@ public: return GetCoreType()->IsEmpty(); } + uint32_t GetHashcode() + { + return GetCoreType()->GetHashcode(); + } + bool IsUtf16() const + { + return coretypes::String::GetCompressedStringsEnabled() ? GetCoreType()->IsUtf16() : true; + } + + bool IsUtf8() const + { + return IsMUtf8(); + } + + bool IsMUtf8() const + { + return !IsUtf16(); + } + + /** + * @brief read data from src to dest + * @param [in] dest : dest string + * @param [in] src : src string + * @param [in] start : write to dest positioned at start offset + * @param [in] destSize : dest max size + * @param [in] length : how many chars to copy + */ + static void ReadData(EtsString *dest, EtsString *src, uint32_t start, uint32_t destSize, uint32_t length) + { + dest->WriteData(src, start, destSize, length); + } + + /** + * @brief copy data from src to dest , dest is specified by this string + * @param [in] src : original data + * @param [in] start : write to dest positioned at start offset + * @param [in] destSize : dest max size + * @param [in] length : how many chars to copy + */ + void WriteData(EtsString *src, uint32_t start, uint32_t destSize, uint32_t length) + { + GetCoreType()->WriteData(src->GetCoreType(), start, destSize, length); + } + bool StringsAreEqual(EtsObject *obj) { - return coretypes::String::StringsAreEqual(GetCoreType(), FromEtsObject(obj)->GetCoreType()); + return coretypes::BaseString::StringsAreEqual(GetCoreType(), FromEtsObject(obj)->GetCoreType()); + } + + coretypes::BaseString *GetCoreType() + { + ASSERT_HAVE_ACCESS_TO_MANAGED_OBJECTS(); + return reinterpret_cast(this); + } + + const coretypes::BaseString *GetCoreType() const + { + ASSERT_HAVE_ACCESS_TO_MANAGED_OBJECTS(); + return reinterpret_cast(this); } - coretypes::String *GetCoreType() + coretypes::String *GetCoreString() { ASSERT_HAVE_ACCESS_TO_MANAGED_OBJECTS(); return reinterpret_cast(this); } + const coretypes::String *GetCoreString() const + { + ASSERT_HAVE_ACCESS_TO_MANAGED_OBJECTS(); + return reinterpret_cast(this); + } + ObjectHeader *AsObjectHeader() { return reinterpret_cast(this); @@ -518,6 +585,11 @@ public: return reinterpret_cast(this); } + static EtsString *FromCoreType(coretypes::BaseString *str) + { + return reinterpret_cast(str); + } + static EtsString *FromCoreType(coretypes::String *str) { return reinterpret_cast(str); @@ -525,10 +597,24 @@ public: static EtsString *FromEtsObject(EtsObject *obj) { - ASSERT(obj->GetClass()->GetRuntimeClass() == Runtime::GetCurrent() - ->GetClassLinker() - ->GetExtension(panda_file::SourceLang::ETS) - ->GetClassRoot(ClassRoot::STRING)); + [[maybe_unused]] Class *cls = obj->GetClass()->GetRuntimeClass(); + [[maybe_unused]] Class *strCls = Runtime::GetCurrent() + ->GetClassLinker() + ->GetExtension(panda_file::SourceLang::ETS) + ->GetClassRoot(ClassRoot::BASE_STRING); + [[maybe_unused]] Class *lineStrCls = Runtime::GetCurrent() + ->GetClassLinker() + ->GetExtension(panda_file::SourceLang::ETS) + ->GetClassRoot(ClassRoot::STRING); + [[maybe_unused]] Class *slicedStrCls = Runtime::GetCurrent() + ->GetClassLinker() + ->GetExtension(panda_file::SourceLang::ETS) + ->GetClassRoot(ClassRoot::SLICED_STRING); + [[maybe_unused]] Class *treeStrCls = Runtime::GetCurrent() + ->GetClassLinker() + ->GetExtension(panda_file::SourceLang::ETS) + ->GetClassRoot(ClassRoot::TREE_STRING); + ASSERT(cls == strCls || cls == lineStrCls || cls == slicedStrCls || cls == treeStrCls); return reinterpret_cast(obj); } diff --git a/static_core/plugins/ets/runtime/types/ets_string_builder.cpp b/static_core/plugins/ets/runtime/types/ets_string_builder.cpp index 80537b283a0388d1f10f293b5aeab28c67a07adf..167898403ffc01901c9308b924182c5d372ef944 100644 --- a/static_core/plugins/ets/runtime/types/ets_string_builder.cpp +++ b/static_core/plugins/ets/runtime/types/ets_string_builder.cpp @@ -13,6 +13,7 @@ * limitations under the License. */ +#include "include/coretypes/base_string.h" #include "libpandabase/utils/utils.h" #include "libpandabase/utils/utf.h" #include "runtime/arch/memory_helpers.h" @@ -108,7 +109,7 @@ ObjectHeader *AppendCharArrayToBuffer(VMHandle &sbHandle, EtsCharArra // Set the compress field to false if the array contains not compressable chars auto n = arr->GetLength(); for (uint32_t i = 0; i < n; ++i) { - if (!ark::coretypes::String::IsASCIICharacter(arr->Get(i))) { + if (!coretypes::BaseString::IsASCIICharacter(arr->Get(i))) { sb->SetFieldPrimitive(SB_COMPRESS_OFFSET, false); break; } @@ -117,15 +118,16 @@ ObjectHeader *AppendCharArrayToBuffer(VMHandle &sbHandle, EtsCharArra return sb->GetCoreType(); } -static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length, - EtsClass *stringKlass) +static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length) { // All strings in the buf are MUtf8 uint8_t *dstData = dstString->GetDataMUtf8(); for (uint32_t i = 0; i < index; ++i) { EtsObject *obj = buffer->Get(i); - if (obj->IsInstanceOf(stringKlass)) { - coretypes::String *srcString = EtsString::FromEtsObject(obj)->GetCoreType(); + // NOLINTNEXTLINE(modernize-use-auto) + panda::BaseObject *baseObj = reinterpret_cast(obj->GetCoreType()); + if (baseObj->IsString()) { + coretypes::BaseString *srcString = EtsString::FromEtsObject(obj)->GetCoreType(); uint32_t n = srcString->CopyDataRegionMUtf8(dstData, 0, srcString->GetLength(), length); dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) length -= n; @@ -143,15 +145,16 @@ static void ReconstructStringAsMUtf8(EtsString *dstString, EtsObjectArray *buffe } } -static void ReconstructStringAsUtf16(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length, - EtsClass *stringKlass) +static void ReconstructStringAsUtf16(EtsString *dstString, EtsObjectArray *buffer, uint32_t index, uint32_t length) { // Some strings in the buf are Utf16 uint16_t *dstData = dstString->GetDataUtf16(); for (uint32_t i = 0; i < index; ++i) { EtsObject *obj = buffer->Get(i); - if (obj->IsInstanceOf(stringKlass)) { - coretypes::String *srcString = EtsString::FromEtsObject(obj)->GetCoreType(); + // NOLINTNEXTLINE(modernize-use-auto) + panda::BaseObject *baseObj = reinterpret_cast(obj->GetCoreType()); + if (baseObj->IsString()) { + coretypes::BaseString *srcString = EtsString::FromEtsObject(obj)->GetCoreType(); uint32_t n = srcString->CopyDataRegionUtf16(dstData, 0, srcString->GetLength(), length); dstData += n; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) length -= n; @@ -647,7 +650,7 @@ static inline EtsCharArray *FloatingPointToCharArray(FpType number) auto *arr = EtsCharArray::Create(str.length()); Span data(arr->GetData(), str.length()); for (size_t i = 0; i < str.length(); ++i) { - ASSERT(ark::coretypes::String::IsASCIICharacter(str[i])); + ASSERT(coretypes::BaseString::IsASCIICharacter(str[i])); data[i] = static_cast(str[i]); } return arr; @@ -704,14 +707,12 @@ EtsString *StringBuilderToString(ObjectHeader *sb) auto index = sbHandle->GetFieldPrimitive(SB_INDEX_OFFSET); auto compress = sbHandle->GetFieldPrimitive(SB_COMPRESS_OFFSET); EtsString *s = EtsString::AllocateNonInitializedString(length, compress); - EtsClass *sKlass = EtsClass::FromRuntimeClass(s->GetCoreType()->ClassAddr()); auto *buf = EtsObjectArray::FromCoreType(sbHandle->GetFieldObject(SB_BUFFER_OFFSET)->GetCoreType()); if (compress) { - ReconstructStringAsMUtf8(s, buf, index, length, sKlass); + ReconstructStringAsMUtf8(s, buf, index, length); } else { - ReconstructStringAsUtf16(s, buf, index, length, sKlass); + ReconstructStringAsUtf16(s, buf, index, length); } return s; } - } // namespace ark::ets diff --git a/static_core/plugins/ets/tests/checked/jitinterface/compile_method.cpp b/static_core/plugins/ets/tests/checked/jitinterface/compile_method.cpp index 485dfe77089a4aec742c4f4421f6ca0bd6d51b45..1ec99e1cb5e6bdc9b43cc1d74868b72eeaf20a60 100644 --- a/static_core/plugins/ets/tests/checked/jitinterface/compile_method.cpp +++ b/static_core/plugins/ets/tests/checked/jitinterface/compile_method.cpp @@ -38,7 +38,7 @@ ani_int CompileMethod(ani_env *env, ani_string name) } else { ctx = PandaEtsVM::GetCurrent()->GetEtsClassLinkerExtension()->GetBootContext(); } - return ark::CompileMethodImpl(str->GetCoreType(), ctx); + return ark::CompileMethodImpl(str->GetCoreString(), ctx); } } // namespace ark::ets::ani diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_charat.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_charat.ets new file mode 100644 index 0000000000000000000000000000000000000000..2c88f6647359ac8738a0764fad96f3647b1377f8 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_charat.ets @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +let EscChars: String = new String('\f\n\r\t\v'); +const RusChars: String = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); // 16-bit +const EngChars: String = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'); // 8-bit +const Symbols: String = '`~!@#$%^&*()_-+={}[]:;\"\'\\|/<>,.'; +const RUS_LEN: int = RusChars.getLength(); +const ENG_LEN: int = EngChars.getLength(); +const SYM_LEN: int = Symbols.getLength(); +function main(): int { + /** + * charAt(int): char => StdCoreStringCharAt + */ + arktest.assertEQ(EscChars.charAt(0), c'\f'); + arktest.assertEQ(EscChars.charAt(1), c'\n'); + arktest.assertEQ(EscChars.charAt(2), c'\r'); + arktest.assertEQ(EscChars.charAt(3), c'\t'); + arktest.assertEQ(EscChars.charAt(4), c'\v'); + + arktest.assertEQ(RusChars.charAt(0), c'А'); + arktest.assertEQ(RusChars.charAt(1), c'а'); + arktest.assertEQ(RusChars.charAt(2), c'Б'); + arktest.assertEQ(RusChars.charAt(RUS_LEN - 1), c'я'); + arktest.assertEQ(RusChars.charAt(RUS_LEN - 2), c'Я'); + + arktest.assertEQ(EngChars.charAt(0), c'A'); + arktest.assertEQ(EngChars.charAt(1), c'a'); + arktest.assertEQ(EngChars.charAt(2), c'B'); + arktest.assertEQ(EngChars.charAt(ENG_LEN - 1), c'z'); + + arktest.assertEQ(Symbols.charAt(0), c'`'); + arktest.assertEQ(Symbols.charAt(1), c'~'); + arktest.assertEQ(Symbols.charAt(SYM_LEN - 1), c'.'); + arktest.assertEQ(Symbols.charAt(SYM_LEN - 2), c','); + + let TreeUtf8Chars = EscChars.concat(EngChars); + arktest.assertEQ(TreeUtf8Chars.charAt(0), c'\f'); + arktest.assertEQ(TreeUtf8Chars.charAt(1), c'\n'); + arktest.assertEQ(TreeUtf8Chars.charAt(5), c'A'); + arktest.assertEQ(TreeUtf8Chars.charAt(6), c'a'); + arktest.assertEQ(TreeUtf8Chars.charAt(TreeUtf8Chars.getLength() - 1), c'z'); + arktest.assertEQ(TreeUtf8Chars.charAt(TreeUtf8Chars.getLength() - 2), c'Z'); + + let rus_0 = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМм'); + let rus_1 = new String('НнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); + let TreeUtf16Chars = rus_0.concat(rus_1); + arktest.assertEQ(TreeUtf16Chars.charAt(0), c'А'); + arktest.assertEQ(TreeUtf16Chars.charAt(1), c'а'); + arktest.assertEQ(TreeUtf16Chars.charAt(2), c'Б'); + arktest.assertEQ(TreeUtf16Chars.charAt(3), c'б'); + arktest.assertEQ(TreeUtf16Chars.charAt(4), c'В'); + arktest.assertEQ(TreeUtf16Chars.charAt(5), c'в'); + arktest.assertEQ(TreeUtf16Chars.charAt(rus_0.getLength()), c'Н'); + arktest.assertEQ(TreeUtf16Chars.charAt(rus_0.getLength() - 1), c'м'); + arktest.assertEQ(TreeUtf16Chars.charAt(TreeUtf16Chars.getLength() - 1), c'я'); + + let slicedUtf8Chars = TreeUtf8Chars.substring(1, 10); + arktest.assertEQ(slicedUtf8Chars.charAt(0), c'\n'); + arktest.assertEQ(slicedUtf8Chars.charAt(1), c'\r'); + arktest.assertEQ(slicedUtf8Chars.charAt(4), c'A'); + arktest.assertEQ(slicedUtf8Chars.charAt(5), c'a'); + arktest.assertEQ(slicedUtf8Chars.charAt(slicedUtf8Chars.getLength() - 1), c'C'); + + let slicedUtf16Chars = TreeUtf16Chars.substring(rus_0.getLength(), TreeUtf16Chars.getLength()); + arktest.assertEQ(slicedUtf16Chars.charAt(0), c'Н'); + arktest.assertEQ(slicedUtf16Chars.charAt(1), c'н'); + arktest.assertEQ(slicedUtf16Chars.charAt(slicedUtf16Chars.getLength() - 1), c'я'); + arktest.assertEQ(slicedUtf16Chars.charAt(slicedUtf16Chars.getLength() - 2), c'Я'); + arktest.assertTrue(slicedUtf16Chars === rus_1); + arktest.assertTrue(rus_1 === slicedUtf16Chars); + return 0; +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_compare_to.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_compare_to.ets index a00b42ee94ae25041b170e78a662859ef636f2e8..44b2799b3119c6992f6865257e86150a92ddbfd9 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_compare_to.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_compare_to.ets @@ -103,9 +103,28 @@ function testUtf16() { } } +function test_tree() { + let basestring: String = "12345678901234" + let basestring2 = "abcdefghijklmn" + let treestring: String = basestring.concat(basestring2) + + let re = treestring.compareTo("12345678901234abcdefghijklmn") + arktest.assertEQ(re, 0); +} + +function test_sliced() { + let basestring: String = "12345678901234" + let slicedstring: String = basestring.substring(3, 6) // 456 + let re = slicedstring.compareTo("456") + arktest.assertEQ(re, 0); +} + function main(): int { testLatin(); testMixed(); testUtf16(); + test_tree(); + test_sliced(); return 0; } + diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_concat.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_concat.ets index c8747a432f7dadac45de29cef4dabc8c4180d2af..6d866811b0bb11ac1e643e81b59d696887e86ab2 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_concat.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_concat.ets @@ -100,6 +100,29 @@ function testConcat4(): int { return 0; } +function concat_test() { + let basestring: String = "1234567" + let basestring2: String = "8901234" + let basestring3: String = "abcdefg" + let basestring4: String = "hijklmn" + + let treeconcat2 = basestring.concat(basestring2) + let treeconcat3 = basestring.concat(basestring2, basestring3) + let treeconcat4 = basestring.concat(basestring2, basestring3, basestring4) + arktest.assertEQ(treeconcat2, "12345678901234") + arktest.assertEQ(treeconcat3, "12345678901234abcdefg") + arktest.assertEQ(treeconcat4, "12345678901234abcdefghijklmn") + + const slicedstring: String = String("12345678901234").substring(1, 6) // 23456 + let slicedconcat2 = slicedstring.concat(basestring2) + let slicedconcat3 = slicedstring.concat(basestring2, basestring3) + let slicedconcat4 = slicedstring.concat(basestring2, basestring3, basestring4) + arktest.assertEQ(slicedconcat2, "234568901234") + arktest.assertEQ(slicedconcat3, "234568901234abcdefg") + arktest.assertEQ(slicedconcat4, "234568901234abcdefghijklmn") +} + function main(): int { + concat_test() return testConcat2() + testConcat3() + testConcat4(); } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_endswith.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_endswith.ets index d67daafb829204c75b00f4bc7f175e46711a794b..3948f32e648eeceb6b9978bd89b07758bd0f96c1 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_endswith.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_endswith.ets @@ -16,8 +16,21 @@ const EMPTY: String = ''; const HELLO: String = 'Hello'; const NATURE: String = '自然界'; +const TreeString: String = "12345678901234".concat("abcdefghijklmn"); +const SlicedString: String = String("12345678901234").substring(1, 6); // 23456 function main(): int { + + arktest.assertTrue(TreeString.endsWith('lmn')); + arktest.assertTrue(TreeString.endsWith("lmn", 28)); + arktest.assertTrue(TreeString.endsWith("lmn", 100)); + arktest.assertFalse(TreeString.endsWith("m")); + + arktest.assertTrue(SlicedString.endsWith('6')); + arktest.assertTrue(SlicedString.endsWith("56", 5)); + arktest.assertTrue(SlicedString.endsWith("56", 100)); + arktest.assertFalse(SlicedString.endsWith("5")); + arktest.assertTrue(EMPTY.endsWith('')); arktest.assertTrue(EMPTY.endsWith('', -1)); arktest.assertTrue(EMPTY.endsWith('', 0)); diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_equals.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_equals.ets index b6eea764bc9b59e2de0dd3830f649f61bf70887d..e90423056de1ad6e4b1bd28baf53ea457b57c447 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_equals.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_equals.ets @@ -19,5 +19,17 @@ function main(): int { arktest.assertNE(str, NULL_STR, 'String must not be equal to null'); arktest.assertNE(NULL_STR, str, 'Null string must not be equal to non-null one'); arktest.assertEQ(NULL_STR, NULL_STR, 'Null values of type String must be equal'); + arktest.assertEQ(str, str, "String must be equal to itself"); + arktest.assertEQ(str, "abc", "String must be equal to 'abc'"); + let treeStr = str.concat("defghijklmnopqrstuvwxyz"); + arktest.assertNE(treeStr, NULL_STR, "TreeString must not be equal to null"); + arktest.assertNE(NULL_STR, treeStr, "Null string must not be equal to non-null one"); + arktest.assertEQ(treeStr, treeStr, "TreeString must be equal to itself"); + arktest.assertEQ(treeStr, "abcdefghijklmnopqrstuvwxyz", "TreeString must be equal to 'abcdefghijklmnopqrstuvwxyz'"); + let slicedStr = treeStr.substring(1, 5); + arktest.assertNE(slicedStr, NULL_STR, "SlicedString must not be equal to null"); + arktest.assertNE(NULL_STR, slicedStr, "Null string must not be equal to non-null one"); + arktest.assertEQ(slicedStr, slicedStr, "SlicedString must be equal to itself"); + arktest.assertEQ(slicedStr, "bcde", "SlicedString must be equal to 'bcde'"); return 0; } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_fromcodepoint.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_fromcodepoint.ets new file mode 100644 index 0000000000000000000000000000000000000000..9547d96c82fb7a1f03407e92531da5ad135fb9ac --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_fromcodepoint.ets @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +function main(): void { + arktest.assertTrue(String.fromCodePoint(42) === "*"); // "*" + arktest.assertTrue(String.fromCodePoint(65, 90) === "AZ"); // "AZ" + arktest.assertTrue(String.fromCodePoint(0x404) === "Є"); // "\u0404" === "Є" + arktest.assertTrue(String.fromCodePoint(0x2f804) === "\uD87E\uDC04"); // "\uD87E\uDC04" + arktest.assertTrue(String.fromCodePoint(194564) === "\uD87E\uDC04"); // "\uD87E\uDC04" + arktest.assertTrue(String.fromCodePoint(0x1d306, 0x61, 0x1d307) === "\uD834\uDF06a\uD834\uDF07"); // "\uD834\uDF06a\uD834\uDF07" + arktest.assertTrue(String.fromCodePoint(65) === 'A'); + arktest.assertTrue(String.fromCodePoint(0x1F600) === '😀'); + arktest.assertTrue(String.fromCodePoint(0) === '\u0000'); // min value + arktest.assertTrue(String.fromCodePoint(0x10FFFF) === '\u{10FFFF}'); // max value + arktest.assertTrue(String.fromCodePoint(65, 66) === 'AB'); + arktest.assertTrue(String.fromCodePoint(0x1F600, 0x1F601) === '😀😁'); + + // negative number + arktest.expectError(() => { String.fromCodePoint(-1) }); + // Bigger than 0x10FFFF + arktest.expectError(() => { String.fromCodePoint(0x10FFFF + 1) }); + // Not Integer + arktest.expectError(() => { String.fromCodePoint(3.14) }); + // special values + arktest.expectError(() => { String.fromCodePoint(Infinity) }); + arktest.expectError(() => { String.fromCodePoint(NaN) }); +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_get.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_get.ets new file mode 100644 index 0000000000000000000000000000000000000000..d1403b18bd3a51afdd6d5f806ef625ea82251d2d --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_get.ets @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function main() { + + let suite = new ArkTestsuite("string get"); + suite.addTest("test", test); + return suite.run(); +} + +function test() { + let basestring: String = "12345678901234"; + let basestring2: String = "abcdefghijklmn"; + let treestring: String = basestring.concat(basestring2); // "12345678901234abcdefghijklmn" + let slicedstring: String = basestring.substring(3, 13); // "4567890123" + + arktest.assertEQ(treestring.charAt(5), c'6'); + arktest.assertEQ(slicedstring.charAt(5), c'9'); + arktest.assertEQ(treestring.substring(5, 6), '6'); +} \ No newline at end of file diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getbytes.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getbytes.ets index 6e121680d8d057781d5c722f4ed140ca062391d5..269cea582c691ed0ef955da1544cfe40c368d2de 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getbytes.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getbytes.ets @@ -15,6 +15,8 @@ let str: String = new String('abcΣΨΩ0123456789!'); let bytes: byte[] = [97, 98, 99, -93, -88, -87, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 33] +let strTree : String = new String("abcΣΨΩ0").concat("123456789!"); +let strSliced: String = new String("sub_abcΣΨΩ0123456789!_sub").substring(4, 21); function equals(val: byte[], exp: byte[], offset: int): boolean { if (val.length != (exp.length - offset)) { @@ -48,5 +50,29 @@ function main(): int { return 1; } } + + arktest.assertEQ(strTree, str, "strTree != str"); + arktest.assertEQ(strSliced, str, "strSliced != str"); + + for (let i: int = 0; i < len; ++i) { + let arr: byte[] = strTree.getBytes(i, len); + if (!equals(arr, bytes, i)) { + console.println("at: " + i); + printArray(arr, 0, "actual ") + printArray(bytes, i, "expected") + return 1; + } + } + + for (let i: int = 0; i < len; ++i) { + let arr: byte[] = strSliced.getBytes(i, len); + if (!equals(arr, bytes, i)) { + console.println("at: " + i); + printArray(arr, 0, "actual ") + printArray(bytes, i, "expected") + return 1; + } + } + return 0; } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getchars.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getchars.ets index 239edd59e1d7a37f6c34f719d2eb1a37633d12c2..85811e4f8ab29d831551c358217986444d37a249 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getchars.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getchars.ets @@ -17,6 +17,8 @@ const DUMP: boolean = false; const HITS_EXPECTED: int = 21; let golden: String = new String("abcΣΨΩ0123456789!"); let hits: int = 0; +let silver: String = new String("abcΣΨΩ").concat("0123456789!"); +let copper: String = new String("sub_abcΣΨΩ0123456789!_sub").substring(4, 21); let table1: char[][] = [ [c'a', c'b', c'c', c'Σ', c'Ψ', c'Ω', c'0', c'1', c'2', c'3', c'4', c'5', c'6', c'7', c'8', c'9', c'!'], @@ -107,7 +109,101 @@ function main(): int { } } - if (hits != HITS_EXPECTED) { + arktest.assertTrue(golden=== silver); + arktest.assertTrue(golden=== copper); + // test for TreeString + for (let i: int = 0; i <= silver.getLength(); ++i) { + let arr: char[] = silver.getChars(i, silver.getLength()); + if (equals(arr, table1[i])) { + ++hits; + } + if (DUMP) { + printArray(table1[i], "expected"); + printArray(arr, "getChars"); + console.println(); + } + } + + try { + // end > length + let arr: char[] = silver.getChars(1, silver.getLength() + 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("end > length"); + console.println(e); + } + } + + try { + // begin > end + let arr: char[] = silver.getChars(2, 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("begin > end"); + console.println(e); + } + } + + try { + // begin < 0 + let arr: char[] = silver.getChars(-1, 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("begin < 0"); + console.println(e); + } + } + // test for SlicedString + for (let i: int = 0; i <= copper.getLength(); ++i) { + let arr: char[] = copper.getChars(i, copper.getLength()); + if (equals(arr, table1[i])) { + ++hits; + } + if (DUMP) { + printArray(table1[i], "expected"); + printArray(arr, "getChars"); + console.println(); + } + } + + try { + // end > length + let arr: char[] = copper.getChars(1, copper.getLength() + 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("end > length"); + console.println(e); + } + } + + try { + // begin > end + let arr: char[] = copper.getChars(2, 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("begin > end"); + console.println(e); + } + } + + try { + // begin < 0 + let arr: char[] = copper.getChars(-1, 1); + } catch (e: StringIndexOutOfBoundsError) { + ++hits; + if (DUMP) { + console.println("begin < 0"); + console.println(e); + } + } + + + if (hits != 3*HITS_EXPECTED) { if (DUMP) { console.println("hits: " + hits + " (expected " + HITS_EXPECTED + ")"); } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getlength.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getlength.ets new file mode 100644 index 0000000000000000000000000000000000000000..bbb9fb5d58c9cede18051cbf3f200834361beeca --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_getlength.ets @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function newString(): String { + return new String("abcde"); +} + +function main(): int { + let s = newString(); + arktest.assertEQ(s.getLength(), 5, "s.length != 5"); + let treeStr = s.concat("fghijklmnopqrstuvwxyz") + arktest.assertEQ(treeStr.getLength(), 26, "treeStr.length != 26"); + let slicedStr = treeStr.substring(1, 5) + arktest.assertEQ(slicedStr.getLength(), 4, "slicedStr.length != 4"); + return 0; +} + diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_hashcode.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_hashcode.ets index bb7a06367dc35b0aa1e44dbbdedbcda3dfadb8da..ace33d2d7dee4ad783347ec6875d4c51c5712456 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_hashcode.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_hashcode.ets @@ -131,6 +131,30 @@ function main(): int { } } + // Test for TreeString + let TreeCompressableStr_0: String = new String("0123456789abcdef"); + let TreeCompressableStr_1: String = new String("ghijk"); + let TreeCompressableStr = TreeCompressableStr_0.concat(TreeCompressableStr_1); + arktest.assertTrue(TreeCompressableStr === compressableStringsTable[compressableStringsTable.length - 1]); + arktest.assertEQ(TreeCompressableStr.$_hashCode(), compressable_hashcode_table[compressableStringsTable.length - 1]); + + let TreeUncompressableStr_0: String = new String("\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7"); + let TreeUncompressableStr_1: String = new String("\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf"); + let TreeUncompressableStr = TreeUncompressableStr_0.concat(TreeUncompressableStr_1); + arktest.assertTrue(TreeUncompressableStr === uncompressableStringsTable[uncompressableStringsTable.length - 1]); + arktest.assertEQ(TreeUncompressableStr.$_hashCode(), uncompressable_hashcode_table[uncompressable_hashcode_table.length - 1]); + + // Test for SliceString + let ToSliceCompressableStr: String = new String("123_0123456789abcdefghijk_456"); + let SliceCompressableStr: String = ToSliceCompressableStr.substring(4, ToSliceCompressableStr.getLength() - 4); + arktest.assertTrue(SliceCompressableStr === compressableStringsTable[compressableStringsTable.length - 1]); + arktest.assertEQ(SliceCompressableStr.$_hashCode(), compressable_hashcode_table[compressableStringsTable.length - 1]); + + let ToSliceUncompressableStr: String = new String("\u00b0\u00b0\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf\u00b0\u00b0"); + let SliceUncompressableStr: String = ToSliceUncompressableStr.substring(2, ToSliceUncompressableStr.getLength() - 2); + arktest.assertTrue(SliceUncompressableStr === uncompressableStringsTable[uncompressableStringsTable.length - 1]); + arktest.assertEQ(SliceUncompressableStr.$_hashCode(), uncompressable_hashcode_table[uncompressable_hashcode_table.length - 1]); + // Test hash collisions let hcA: String = new String(hashCollisionA); let hcB: String = new String(hashCollisionB); diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_indexofstring.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_indexofstring.ets new file mode 100644 index 0000000000000000000000000000000000000000..88770c8419bfe265d76e2eb922247138adf3fedb --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_indexofstring.ets @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +let Empty: String = new String(''); +let EscChars: String = new String('\f\n\r\t\v'); +let RusChars: String = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); // 16-bit +let EngChars: String = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'); // 8-bit +let DupChars: String = new String('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); // 8-bit +const Digits: String = '0123456789'; +const Symbols: String = '`~!@#$%^&*()_-+={}[]:;\"\'\\|/<>,.'; +const CharNotFound: int = -1; + +function main(): int { + + /** + * indexOf(string): int => StdCoreStringIndexOfString + */ + + arktest.assertEQ(Empty.indexOf('', 0), 0); + arktest.assertEQ(Empty.indexOf('x', 0), CharNotFound); + arktest.assertEQ(EscChars.indexOf('\r\t\v', 1), 2); + arktest.assertEQ(EscChars.indexOf(EscChars), 0); + for (let i = 0; i < EscChars.getLength(); ++i) { + arktest.assertEQ(EscChars.indexOf(EscChars.substring(i, EscChars.length), i - 1), i); + } + for (let i = 0; i < EscChars.getLength(); ++i) { + arktest.assertEQ(EscChars.indexOf(EscChars.substring(0, i), 0), 0); + } + let mixedTreeChars = EscChars.concat(EngChars); + for (let i = 0; i < mixedTreeChars.getLength(); ++i) { + arktest.assertEQ(mixedTreeChars.indexOf(mixedTreeChars.substring(i, mixedTreeChars.length), i - 1), i); + } + for (let i = 0; i < mixedTreeChars.getLength(); ++i) { + arktest.assertEQ(mixedTreeChars.indexOf(mixedTreeChars.substring(0, i), 0), 0); + } + let escSlicedChars = EscChars.substring(1, 4); + for (let i = 0; i < escSlicedChars.getLength(); ++i) { + arktest.assertEQ(escSlicedChars.indexOf(escSlicedChars.substring(i, escSlicedChars.length), i - 1), i); + } + for (let i = 0; i < escSlicedChars.getLength(); ++i) { + arktest.assertEQ(escSlicedChars.indexOf(escSlicedChars.substring(0, i), 0), 0); + } + + for (let i = 0; i < RusChars.getLength(); ++i) { + arktest.assertEQ(RusChars.indexOf(RusChars.substring(i, RusChars.length), i - 1), i); + } + for (let i = 0; i < RusChars.getLength(); ++i) { + arktest.assertEQ(RusChars.indexOf(RusChars.substring(0, i), 0), 0); + } + arktest.assertEQ(RusChars.indexOf('Ѫ'), CharNotFound); + arktest.assertEQ(RusChars.indexOf(RusChars), 0); + + let rusChars_1 = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНн'); + let rusChars_2 = new String('ОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); + let rusTreeChars = rusChars_1.concat(rusChars_2); + arktest.assertTrue(rusTreeChars === RusChars); + arktest.assertTrue(RusChars === rusTreeChars); + arktest.assertEQ(RusChars.indexOf(rusChars_1), 0); + arktest.assertEQ(RusChars.indexOf(rusChars_2), rusChars_1.length); + arktest.assertEQ(rusTreeChars.indexOf(rusChars_1), 0); + arktest.assertEQ(rusTreeChars.indexOf(rusChars_2), rusChars_1.length); + for (let i = 0; i < rusTreeChars.getLength(); ++i) { + arktest.assertEQ(rusTreeChars.indexOf(rusTreeChars.substring(i, rusTreeChars.length), i - 1), i); + } + for (let i = 0; i < rusTreeChars.getLength(); ++i) { + arktest.assertEQ(rusTreeChars.indexOf(rusTreeChars.substring(0, i), 0), 0); + } + let rusSlicedChars = rusTreeChars.substring(3, 30); + arktest.assertEQ(RusChars.indexOf(rusSlicedChars), 3); + for (let i = 0; i < rusSlicedChars.getLength(); ++i) { + arktest.assertEQ(rusSlicedChars.indexOf(rusSlicedChars.substring(i, rusSlicedChars.length), i - 1), i); + } + for (let i = 0; i < rusSlicedChars.getLength(); ++i) { + arktest.assertEQ(rusSlicedChars.indexOf(rusSlicedChars.substring(0, i), 0), 0); + } + + for (let i = 0; i < EngChars.getLength(); ++i) { + arktest.assertEQ(EngChars.indexOf(EngChars.substring(i, EngChars.length), i - 1), i); + } + for (let i = 0; i < EngChars.getLength(); ++i) { + arktest.assertEQ(EngChars.indexOf(EngChars.substring(0, i), 0), 0); + } + arktest.assertEQ(EngChars.indexOf(c'æ'), CharNotFound); + arktest.assertEQ(EngChars.indexOf(EngChars), 0); + + let engChars_1 = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNn'); + let engChars_2 = new String('OoPpQqRrSsTtUuVvWwXxYyZz'); + let engTreeChars = engChars_1.concat(engChars_2); + arktest.assertEQ(EngChars.indexOf(engChars_1), 0); + arktest.assertEQ(EngChars.indexOf(engChars_2), engChars_1.length); + arktest.assertEQ(engTreeChars.indexOf(engChars_1), 0); + arktest.assertEQ(engTreeChars.indexOf(engChars_2), engChars_1.length); + arktest.assertTrue(engTreeChars === EngChars); + arktest.assertTrue(EngChars === engTreeChars); + for (let i = 0; i < engTreeChars.getLength(); ++i) { + arktest.assertEQ(engTreeChars.indexOf(engTreeChars.substring(i, engTreeChars.length), i - 1), i); + } + for (let i = 0; i < engTreeChars.getLength(); ++i) { + arktest.assertEQ(engTreeChars.indexOf(engTreeChars.substring(0, i), 0), 0); + } + let engSlicedChars = engTreeChars.substring(3, 30); + for (let i = 0; i < engSlicedChars.getLength(); ++i) { + arktest.assertEQ(engSlicedChars.indexOf(engSlicedChars.substring(i, engSlicedChars.length), i - 1), i); + } + for (let i = 0; i < engSlicedChars.getLength(); ++i) { + arktest.assertEQ(engSlicedChars.indexOf(engSlicedChars.substring(0, i), 0), 0); + } + + for (let i = 1; i < DupChars.getLength(); ++i) { + arktest.assertEQ(DupChars.indexOf(DupChars.substring(i, DupChars.length), i - 1), i - 1); + } + for (let i = 0; i < DupChars.getLength(); ++i) { + arktest.assertEQ(DupChars.indexOf(DupChars.substring(0, i), 0), 0); + } + for (let i = 0; i < Digits.getLength(); ++i) { + arktest.assertEQ(Digits.indexOf(Digits.substring(i, Digits.length), i - 1), i); + } + for (let i = 0; i < Digits.getLength(); ++i) { + arktest.assertEQ(Digits.indexOf(Digits.substring(0, i), 0), 0); + } + for (let i = 0; i < Symbols.getLength(); ++i) { + arktest.assertEQ(Symbols.indexOf(Symbols.substring(i, Symbols.length), i - 1), i); + } + for (let i = 0; i < Symbols.getLength(); ++i) { + arktest.assertEQ(Symbols.indexOf(Symbols.substring(0, i), 0), 0); + } + let symbols_1 = new String('`~!@#$%^&*()_-+={}[]'); + let symbols_2 = new String(':;\"\'\\|/<>,.'); + let symbolsTreeChars = symbols_1.concat(symbols_2); + for (let i = 0; i < symbolsTreeChars.getLength(); ++i) { + arktest.assertEQ(symbolsTreeChars.indexOf(symbolsTreeChars.substring(i, symbolsTreeChars.length), i - 1), i); + } + let symbolsSlicedChars = symbolsTreeChars.substring(1, 12); + for (let i = 0; i < symbolsSlicedChars.getLength(); ++i) { + arktest.assertEQ(symbolsSlicedChars.indexOf(symbolsSlicedChars.substring(i, symbolsSlicedChars.length), i - 1), i); + } + return 0; +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iscompressed.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iscompressed.ets new file mode 100644 index 0000000000000000000000000000000000000000..69f560abc31c3a857d001b397aecc080decb9bcf --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iscompressed.ets @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const RusChars: String = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); // 16-bit +const EngChars: String = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'); // 8-bit + +function main() { + arktest.assertEQ(RusChars.isCompressed(), false); + arktest.assertEQ(EngChars.isCompressed(), true); + + let utf16Temp = new String('𐍈𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿'); + let treeUtf16Chars = RusChars.concat(utf16Temp); + arktest.assertEQ(treeUtf16Chars.isCompressed(), false); + treeUtf16Chars = treeUtf16Chars.concat(EngChars); + arktest.assertEQ(treeUtf16Chars.isCompressed(), false); + let slicedUtf16Chars = treeUtf16Chars.substring(2, 10); + arktest.assertEQ(slicedUtf16Chars.isCompressed(), false); + slicedUtf16Chars = RusChars.substring(2, 10); + arktest.assertEQ(slicedUtf16Chars.isCompressed(), false); + + let utf8Temp = new String('1234567890'); + let treeUtf8Chars = EngChars.concat(utf8Temp); + arktest.assertEQ(treeUtf8Chars.isCompressed(), true); + let slicedUtf8Chars = treeUtf8Chars.substring(2, 10); + arktest.assertEQ(slicedUtf8Chars.isCompressed(), true); + slicedUtf8Chars = EngChars.substring(2, 10); + arktest.assertEQ(slicedUtf8Chars.isCompressed(), true); +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_isempty.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_isempty.ets index 30252a4122d8a330c201739cfefa2af033c01870..a4aee6c0021ab991f40200069c95490924c6f0c3 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_isempty.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_isempty.ets @@ -13,14 +13,34 @@ * limitations under the License. */ -function newString(): String { - return new String('abcde'); -} function main(): int { - let s = newString(); - if (s.isEmpty()) { - return 1; - } - return 0; + let suite = new ArkTestsuite("StringIsEmpty"); + suite.addTest("test LineString", isEmptyLineTest); + suite.addTest("test SlicedString", isEmptySlicedTest); + suite.addTest("test TreeString", isEmptyTreeTest); + return suite.run(); +} + +function isEmptyLineTest() { + let str: String = new String(); + let result: boolean = str.isEmpty(); + arktest.assertEQ(result, true); + str = "abcde"; + result = str.isEmpty(); + arktest.assertEQ(result, false); +} + +function isEmptySlicedTest() { + let str: String = "abcdefgh"; + let slicedStr = str.substring(1, 6); // slicedStr is SliceString + arktest.assertEQ(slicedStr.length, 5); + arktest.assertEQ(slicedStr.isEmpty(), false); +} + +function isEmptyTreeTest() { + let str1: String = "abcdefghijklmn"; + let str2: String = "opqrstuvwxyz"; + let treeStr = str1.concat(str2); // treeStr is TreeString + arktest.assertEQ(treeStr.isEmpty(), false); } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iswellformed.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iswellformed.ets new file mode 100644 index 0000000000000000000000000000000000000000..25a7d449a862281fd6fde689072d1b3bfa868a25 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_iswellformed.ets @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const valiedUTF16String: String = new String('Hello 你好 नमस्ते 😊👩💻\uD83D\uDC68\u200D\uD83D\uDCBBАаБбВвГгДдЕеЁё'); +const loneHighSurrogateString: String = new String('Normal start\uD83DMid content\uD83D\uDC68end'); +const loneLowSurrogateString: String = new String('abc\uDC68Invalid startMid content\uD83D\uDCBBEnd\uDCBBabc'); +const reversedSurrogateString: String = new String('Normal part\uDCBB\uD83DOther content'); + +const createValidUTF16TreeString = (): String => { + return new String('Hello 你好 नमस्ते 😊👩💻') // Multilingual + Emoji + .concat('\uD83D\uDC68\u200D\uD83D\uDCBB', 'АаБбВвГгДдЕеЁё') //+ 👨💻 (valid surrogate pair) and rus chars +} + +const createLoneHighSurrogateTreeString = (): String => { + return new String('Normal start\uD83D') // Isolated high surrogate + .concat('Mid content\uD83D\uDC68') // Valid pair + .concat('\uD83Dend'); // Isolated high surrogate +} + +const createLoneLowSurrogateTreeString = (): String => { + return new String('\uDC68Invalid start') // Isolated low surrogate + .concat('Mid content\uD83D\uDCBB') // Valid pair + .concat('End\uDCBB'); // Isolated low surrogate +} + +const createReversedSurrogateTreeString = (): String => { + return new String('Normal part\uDCBB\uD83D') // Reversed pair (low-high) + .concat('Other content'); // Normal content +} + +function main(): void { + // Tests for LineString + arktest.assertTrue(valiedUTF16String.isWellFormed(), 'Valid UTF-16'); // true + arktest.assertFalse(loneHighSurrogateString.isWellFormed(), 'Lone high surrogate'); // false + arktest.assertFalse(loneLowSurrogateString.isWellFormed(), 'Lone low surrogate'); // false + arktest.assertFalse(reversedSurrogateString.isWellFormed(), 'Reversed surrogates'); // false + + // Tests for TreeString + arktest.assertTrue(createValidUTF16TreeString().isWellFormed(), 'Valid UTF-16 TreeString'); // true + arktest.assertFalse(createLoneHighSurrogateTreeString().isWellFormed(), 'Lone high surrogate TreeString'); // false + arktest.assertFalse(createLoneLowSurrogateTreeString().isWellFormed(), 'Lone low surrogate TreeString'); // false + arktest.assertFalse(createReversedSurrogateTreeString().isWellFormed(), 'Reversed surrogates TreeString'); // false + + // Tests for SlicedString + arktest.assertEQ(valiedUTF16String.substring(4, valiedUTF16String.length - 4).isWellFormed(), true, 'Valid UTF-16 SlicedString'); // true + arktest.assertEQ(loneHighSurrogateString.substring(2, loneHighSurrogateString.length - 2).isWellFormed(), false, 'Lone high surrogate SlicedString'); // false + arktest.assertEQ(loneLowSurrogateString.substring(1, loneHighSurrogateString.getLength() - 1).isWellFormed(), false, 'Lone low surrogate SlicedString'); // false + arktest.assertEQ(reversedSurrogateString.substring(3, reversedSurrogateString.length - 3).isWellFormed(), false, 'Reversed surrogates SlicedString'); // false + + // Utf8 Tests + let utf8Str: String = new String('Hello1234567890abcdefg'); + arktest.assertEQ(utf8Str.isWellFormed(), true, 'Valid UTF-8'); // true + arktest.assertEQ(utf8Str.concat('1234567890abcdefg').isWellFormed(), true, 'Valid UTF-8 TreeString'); // true + arktest.assertEQ(utf8Str.substring(0, 5).isWellFormed(), true, 'Valid UTF-8 SlicedString'); // true +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_lastindexof.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_lastindexof.ets new file mode 100644 index 0000000000000000000000000000000000000000..aa77b00406167dab39af8bce388e7fcc26376821 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_lastindexof.ets @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +let Empty: String = new String(''); +let EscChars: String = new String('\f\n\r\t\v'); +let RusChars: String = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); // 16-bit +let EngChars: String = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz'); // 8-bit +let DupChars: String = new String('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); // 8-bit +const Digits: String = '0123456789'; +const Symbols: String = '`~!@#$%^&*()_-+={}[]:;\"\'\\|/<>,.'; +const CharNotFound: int = -1; + +function main(): int { + + /** + * lastIndexOf(string): int => StdCoreStringlastIndexOfString + */ + + arktest.assertEQ(Empty.lastIndexOf(''), 0); + arktest.assertEQ(Empty.lastIndexOf('x'), CharNotFound); + arktest.assertEQ(EscChars.lastIndexOf('\r\t\v'), 2); + arktest.assertEQ(EscChars.lastIndexOf(EscChars), 0); + for (let i = 0; i < EscChars.getLength(); ++i) { + arktest.assertEQ(EscChars.lastIndexOf(EscChars.substring(i, EscChars.length)), i); + } + for (let i = 1; i < EscChars.getLength(); ++i) { + arktest.assertEQ(EscChars.lastIndexOf(EscChars.substring(0, i), i + 1), 0); + } + let mixedTreeChars = EscChars.concat(EngChars); + for (let i = 0; i < mixedTreeChars.getLength(); ++i) { + arktest.assertEQ(mixedTreeChars.lastIndexOf(mixedTreeChars.substring(i, mixedTreeChars.length)), i); + } + for (let i = 1; i < mixedTreeChars.getLength(); ++i) { + arktest.assertEQ(mixedTreeChars.lastIndexOf(mixedTreeChars.substring(0, i), i + 1), 0); + } + let escSlicedChars = EscChars.substring(1, 4); + for (let i = 0; i < escSlicedChars.getLength(); ++i) { + arktest.assertEQ(escSlicedChars.lastIndexOf(escSlicedChars.substring(i, escSlicedChars.length)), i); + } + for (let i = 1; i < escSlicedChars.getLength(); ++i) { + arktest.assertEQ(escSlicedChars.lastIndexOf(escSlicedChars.substring(0, i), i + 1), 0); + } + + for (let i = 0; i < RusChars.getLength(); ++i) { + arktest.assertEQ(RusChars.lastIndexOf(RusChars.substring(i, RusChars.length)), i); + } + for (let i = 1; i < RusChars.getLength(); ++i) { + arktest.assertEQ(RusChars.lastIndexOf(RusChars.substring(0, i), i + 1), 0); + } + arktest.assertEQ(RusChars.lastIndexOf('Ѫ'), CharNotFound); + arktest.assertEQ(RusChars.lastIndexOf(RusChars), 0); + + let rusChars_1 = new String('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНн'); + let rusChars_2 = new String('ОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя'); + let rusTreeChars = rusChars_1.concat(rusChars_2); + arktest.assertTrue(rusTreeChars === RusChars); + arktest.assertTrue(RusChars === rusTreeChars); + arktest.assertEQ(RusChars.lastIndexOf(rusChars_1), 0); + arktest.assertEQ(RusChars.lastIndexOf(rusChars_2), rusChars_1.length); + arktest.assertEQ(rusTreeChars.lastIndexOf(rusChars_1), 0); + arktest.assertEQ(rusTreeChars.lastIndexOf(rusChars_2), rusChars_1.length); + for (let i = 0; i < rusTreeChars.getLength(); ++i) { + arktest.assertEQ(rusTreeChars.lastIndexOf(rusTreeChars.substring(i, rusTreeChars.length)), i); + } + for (let i = 1; i < rusTreeChars.getLength(); ++i) { + arktest.assertEQ(rusTreeChars.lastIndexOf(rusTreeChars.substring(0, i), i + 1), 0); + } + let rusSlicedChars = rusTreeChars.substring(3, 30); + arktest.assertEQ(RusChars.lastIndexOf(rusSlicedChars), 3); + for (let i = 0; i < rusSlicedChars.getLength(); ++i) { + arktest.assertEQ(rusSlicedChars.lastIndexOf(rusSlicedChars.substring(i, rusSlicedChars.length)), i); + } + for (let i = 1; i < rusSlicedChars.getLength(); ++i) { + arktest.assertEQ(rusSlicedChars.lastIndexOf(rusSlicedChars.substring(0, i), i + 1), 0); + } + + for (let i = 0; i < EngChars.getLength(); ++i) { + arktest.assertEQ(EngChars.lastIndexOf(EngChars.substring(i, EngChars.length)), i); + } + for (let i = 1; i < EngChars.getLength(); ++i) { + arktest.assertEQ(EngChars.lastIndexOf(EngChars.substring(0, i), i + 1), 0); + } + arktest.assertEQ(EngChars.lastIndexOf(c'æ'), CharNotFound); + arktest.assertEQ(EngChars.lastIndexOf(EngChars), 0); + + let engChars_1 = new String('AaBbCcDdEeFfGgHhIiJjKkLlMmNn'); + let engChars_2 = new String('OoPpQqRrSsTtUuVvWwXxYyZz'); + let engTreeChars = engChars_1.concat(engChars_2); + arktest.assertEQ(EngChars.lastIndexOf(engChars_1), 0); + arktest.assertEQ(EngChars.lastIndexOf(engChars_2), engChars_1.length); + arktest.assertEQ(engTreeChars.lastIndexOf(engChars_1), 0); + arktest.assertEQ(engTreeChars.lastIndexOf(engChars_2), engChars_1.length); + arktest.assertTrue(engTreeChars === EngChars); + arktest.assertTrue(EngChars === engTreeChars); + for (let i = 0; i < engTreeChars.getLength(); ++i) { + arktest.assertEQ(engTreeChars.lastIndexOf(engTreeChars.substring(i, engTreeChars.length)), i); + } + for (let i = 1; i < engTreeChars.getLength(); ++i) { + arktest.assertEQ(engTreeChars.lastIndexOf(engTreeChars.substring(0, i), i + 1), 0); + } + let engSlicedChars = engTreeChars.substring(3, 30); + for (let i = 0; i < engSlicedChars.getLength(); ++i) { + arktest.assertEQ(engSlicedChars.lastIndexOf(engSlicedChars.substring(i, engSlicedChars.length)), i); + } + for (let i = 1; i < engSlicedChars.getLength(); ++i) { + arktest.assertEQ(engSlicedChars.lastIndexOf(engSlicedChars.substring(0, i), i + 1), 0); + } + + for (let i = 0; i < DupChars.getLength(); ++i) { + arktest.assertEQ(DupChars.lastIndexOf(DupChars.substring(i, DupChars.length)), i); + } + for (let i = 1; i < DupChars.getLength(); ++i) { + arktest.assertEQ(DupChars.lastIndexOf(DupChars.substring(0, i)), DupChars.length - i); + } + for (let i = 0; i < Digits.getLength(); ++i) { + arktest.assertEQ(Digits.lastIndexOf(Digits.substring(i, Digits.length)), i); + } + for (let i = 1; i < Digits.getLength(); ++i) { + arktest.assertEQ(Digits.lastIndexOf(Digits.substring(0, i), i + 1), 0); + } + for (let i = 0; i < Symbols.getLength(); ++i) { + arktest.assertEQ(Symbols.lastIndexOf(Symbols.substring(i, Symbols.length)), i); + } + for (let i = 1; i < Symbols.getLength(); ++i) { + arktest.assertEQ(Symbols.lastIndexOf(Symbols.substring(0, i), i + 1), 0); + } + let symbols_1 = new String('`~!@#$%^&*()_-+={}[]'); + let symbols_2 = new String(':;\"\'\\|/<>,.'); + let symbolsTreeChars = symbols_1.concat(symbols_2); + for (let i = 0; i < symbolsTreeChars.getLength(); ++i) { + arktest.assertEQ(symbolsTreeChars.lastIndexOf(symbolsTreeChars.substring(i, symbolsTreeChars.length)), i); + } + let symbolsSlicedChars = symbolsTreeChars.substring(1, 12); + for (let i = 1; i < symbolsSlicedChars.getLength(); ++i) { + arktest.assertEQ(symbolsSlicedChars.lastIndexOf(symbolsSlicedChars.substring(0, i), i + 1), 0); + } + return 0; +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_length.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_length.ets index 42e4735ee065f6d732eeb77fd44490c366aa4210..a8520d7729ac9a2b0964888f4b9bbb88fcecd9a9 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_length.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_length.ets @@ -19,8 +19,10 @@ function newString(): String { function main(): int { let s = newString(); - if (s.length != 5) { - return 1; - } + arktest.assertEQ(s.length, 5, "s.length != 5"); + let treeStr = s.concat("fghijklmnopqrstuvwxyz") + arktest.assertEQ(treeStr.length, 26, "treeStr.length != 26"); + let slicedStr = treeStr.substring(1, 5) + arktest.assertEQ(slicedStr.length, 4, "slicedStr.length != 4"); return 0; } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_normalize.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_normalize.ets new file mode 100644 index 0000000000000000000000000000000000000000..eb5d5bf1ffc3f13fb963a783d8197984f208b1da --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_normalize.ets @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function testNormalization( + input: string, + expectedNFC: string, + expectedNFD: string, + expectedNFKC: string, + expectedNFKD: string +) { + // test NFC + arktest.assertTrue(input.normalize("NFC") === expectedNFC, "NFC"); + // test NFD + arktest.assertTrue(input.normalize("NFD") === expectedNFD, "NFD"); + // test NFKC + arktest.assertTrue(input.normalize("NFKC") === expectedNFKC, "NFKC"); + // test NFKD + arktest.assertTrue(input.normalize("NFKD") === expectedNFKD, "NFKD"); +} + +function main() { + const inputString = "dghe\u0301a\u0301あ𝒜𝑨汉字𠜎abc𠁀xyzf?"; + + const expectedNFC = "dgh\u00E9\u00E1あ𝒜𝑨\u6C49\u5B57𠜎abc𠁀xyzf?"; + const expectedNFD = "dghe\u0301a\u0301あ𝒜𝑨\u6C49\u5B57𠜎abc𠁀xyzf?"; + + const expectedNFKC = "dgh\u00E9\u00E1あA\u0041\u6C49\u5B57𠜎abc𠁀xyzf?"; + const expectedNFKD = "dghe\u0301a\u0301あA\u0041\u6C49\u5B57𠜎abc𠁀xyzf?"; + + // test LineString + testNormalization(inputString, expectedNFC, expectedNFD, expectedNFKC, expectedNFKD); + + // test TreeString + const inputStr_0 = "dghe\u0301a\u0301あ𝒜"; + const inputStr_1 = "𝑨汉字𠜎abc𠁀xyzf?"; + const treeStr = inputStr_0.concat(inputStr_1); + arktest.assertTrue(treeStr === inputString); + testNormalization(treeStr, expectedNFC, expectedNFD, expectedNFKC, expectedNFKD); + + // test SlicedString + const inputStr_2 = "-+&dghe\u0301a\u0301あ𝒜𝑨汉字𠜎abc𠁀xyzf?op]" + const slicedStr = inputStr_2.substring(3, inputStr_2.length - 3); + arktest.assertTrue(slicedStr === inputString); + testNormalization(slicedStr, expectedNFC, expectedNFD, expectedNFKC, expectedNFKD); +} \ No newline at end of file diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_repeat.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_repeat.ets index 6d012719a628da41399a314bb41d4c206986b3a9..3730e5580bc453958837e09ae0180e1ab32ec74f 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_repeat.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_repeat.ets @@ -13,32 +13,49 @@ * limitations under the License. */ -const gEmpty: String = ''; -const gHello: String = 'Hello'; -const gHelloDup: String = 'HelloHelloHelloHelloHelloHelloHelloHello'; -const gNature: String = '自然界'; -const gNatureDup: String = '自然界自然界自然界自然界自然界自然界自然界自然界'; - +const Empty: String = ''; +const Hello: String = 'Hello'; +const HelloDup: String = 'HelloHelloHelloHelloHelloHelloHelloHello'; +const Nature: String = '自然界'; +const NatureDup: String = '自然界自然界自然界自然界自然界自然界自然界自然界'; +const TreeString: String = "123456789".concat("abcdefghijklmn"); +const SlicedString: String = String("自然界12345678901234").substring(1, 6); +const TreeStringDup: String = TreeString.repeat(8); +const SlicedStringDup: String = SlicedString.repeat(8); function main(): int { - arktest.assertEQ(gEmpty.repeat(0), '') - arktest.assertEQ(gEmpty.repeat(1), '') - arktest.assertEQ(gNature.repeat(0), '') - arktest.assertEQ(gHello.repeat(0), '') - for (let i: int = 1; i < 8; ++i) { - let hello: String = gHello.repeat(i); - let nature: String = gNature.repeat(i); - if (hello != gHelloDup.substring(0, 5 * i)) { - return 10 + i; + arktest.assertEQ(Empty.repeat(0), ""); + arktest.assertEQ(Empty.repeat(1), ""); + arktest.assertEQ(Nature.repeat(0), ""); + arktest.assertEQ(Hello.repeat(0), ""); + arktest.assertEQ(TreeString.repeat(0), ""); + arktest.assertEQ(SlicedString.repeat(0), ""); + arktest.assertEQ(TreeString.repeat(2), "123456789abcdefghijklmn123456789abcdefghijklmn"); + arktest.assertEQ(SlicedString.repeat(2), "然界123然界123"); + arktest.assertEQ(TreeString.length * 8, TreeStringDup.length); + arktest.assertEQ(SlicedString.length * 8, SlicedStringDup.length); + for(let i = 1; i < 8; ++i) { + let hello: String = Hello.repeat(i); + let nature: String = Nature.repeat(i); + let treeString: String = TreeString.repeat(i); + let slicedString: String = SlicedString.repeat(i); + if(hello != HelloDup.substring(0, 5 * i)) { + return 10+i; + } + if(nature != NatureDup.substring(0, 3 * i)) { + return 20+i; + } + if(treeString != TreeStringDup.substring(0, TreeString.length * i)) { + return 30+i; } - if (nature != gNatureDup.substring(0, 3 * i)) { - return 20 + i; + if(slicedString != SlicedStringDup.substring(0, SlicedString.length * i)) { + return 30+i; } } // try repeat() with a negative index - let errors: int = 0; + let errors = 0; try { - let z = gEmpty.repeat(-1); + let z = Empty.repeat(-1); } catch (e) { errors = 1; } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_startswith.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_startswith.ets index ab6214da59443937c7b67a5dcb1215f0cfd6e009..b8961632496323342b63667733a463ff5aa4de1d 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_startswith.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_startswith.ets @@ -13,52 +13,68 @@ * limitations under the License. */ -const gEmpty: String = ''; -const gEsc: String = '\t\n\v'; -const gHello: String = 'Hello'; -const gNature: String = '自然界'; +const Empty: String = ''; +const Esc: String = '\t\n\v'; +const Hello: String = 'Hello'; +const Nature: String = '自然界'; +const TreeString: String = "12345678901234".concat("abcdefghijklmn"); +const SlicedString: String = String("12345678901234").substring(1, 6); function main(): int { - arktest.assertTrue(gEmpty.startsWith('')); - arktest.assertTrue(gEmpty.startsWith('', -1)); - arktest.assertTrue(gEmpty.startsWith('', 0)); - arktest.assertTrue(gEmpty.startsWith('', 1)); - arktest.assertFalse(gEmpty.startsWith('*')) - arktest.assertFalse(gEmpty.startsWith('*', -1)) - arktest.assertFalse(gEmpty.startsWith('*', 0)) - arktest.assertFalse(gEmpty.startsWith('*', 1)) + arktest.assertTrue(TreeString.startsWith('1')); + arktest.assertTrue(TreeString.startsWith("123")); + arktest.assertTrue(TreeString.startsWith("123", 0)); + arktest.assertTrue(TreeString.startsWith("123", -1)); + arktest.assertTrue(TreeString.startsWith("234", 1)); + arktest.assertTrue(TreeString.startsWith("e", 18)); - arktest.assertTrue(gEsc.startsWith('\t')) - arktest.assertTrue(gEsc.startsWith('\t', -1)) - arktest.assertTrue(gEsc.startsWith('\t', 0)) - arktest.assertTrue(gEsc.startsWith('\n', 1)) - arktest.assertTrue(gEsc.startsWith('\v', 2)) + arktest.assertTrue(SlicedString.startsWith('2')); + arktest.assertTrue(SlicedString.startsWith("234")); + arktest.assertTrue(SlicedString.startsWith("234", 0)); + arktest.assertTrue(SlicedString.startsWith("234", -1)); + arktest.assertTrue(SlicedString.startsWith("34", 1)); + arktest.assertFalse(SlicedString.startsWith("1", 0)); - arktest.assertTrue(gHello.startsWith('')) - arktest.assertTrue(gHello.startsWith('', -1)) - arktest.assertTrue(gHello.startsWith('', 0)) - arktest.assertTrue(gHello.startsWith('', 1)) - arktest.assertTrue(gHello.startsWith('', 2)) - arktest.assertTrue(gHello.startsWith('', 3)) - arktest.assertTrue(gHello.startsWith('', 4)) - arktest.assertTrue(gHello.startsWith('', 5)) + arktest.assertTrue(Empty.startsWith('')); + arktest.assertTrue(Empty.startsWith('', -1)); + arktest.assertTrue(Empty.startsWith('', 0)); + arktest.assertTrue(Empty.startsWith('', 1)); + arktest.assertFalse(Empty.startsWith('*')); + arktest.assertFalse(Empty.startsWith('*', -1)); + arktest.assertFalse(Empty.startsWith('*', 0)); + arktest.assertFalse(Empty.startsWith('*', 1)); - arktest.assertTrue(gHello.startsWith('H')) - arktest.assertTrue(gHello.startsWith('Hel')) - arktest.assertTrue(gHello.startsWith('Hel', 0)) - arktest.assertTrue(gHello.startsWith('Hel', -1)) - arktest.assertTrue(gHello.startsWith('ello', 1)) - arktest.assertTrue(gHello.startsWith('o', 4)) + arktest.assertTrue(Esc.startsWith('\t')); + arktest.assertTrue(Esc.startsWith('\t', -1)); + arktest.assertTrue(Esc.startsWith('\t', 0)); + arktest.assertTrue(Esc.startsWith('\n', 1)); + arktest.assertTrue(Esc.startsWith('\v', 2)); - arktest.assertTrue(gNature.startsWith('自然界')) - arktest.assertTrue(gNature.startsWith('自然')) - arktest.assertTrue(gNature.startsWith('自')) - arktest.assertTrue(gNature.startsWith('自', -1)) - arktest.assertFalse(gNature.startsWith('然', 0)) - arktest.assertTrue(gNature.startsWith('然界', 1)) - arktest.assertTrue(gNature.startsWith('界', 2)) - arktest.assertFalse(gNature.startsWith('界', 3)) + arktest.assertTrue(Hello.startsWith('')); + arktest.assertTrue(Hello.startsWith('',-1)); + arktest.assertTrue(Hello.startsWith('', 0)); + arktest.assertTrue(Hello.startsWith('', 1)); + arktest.assertTrue(Hello.startsWith('', 2)); + arktest.assertTrue(Hello.startsWith('', 3)); + arktest.assertTrue(Hello.startsWith('', 4)); + arktest.assertTrue(Hello.startsWith('', 5)); + + arktest.assertTrue(Hello.startsWith('H')); + arktest.assertTrue(Hello.startsWith("Hel")); + arktest.assertTrue(Hello.startsWith("Hel", 0)); + arktest.assertTrue(Hello.startsWith("Hel", -1)); + arktest.assertTrue(Hello.startsWith("ello", 1)); + arktest.assertTrue(Hello.startsWith("o", 4)); + + arktest.assertTrue(Nature.startsWith('自然界')); + arktest.assertTrue(Nature.startsWith('自然')); + arktest.assertTrue(Nature.startsWith("自")); + arktest.assertTrue(Nature.startsWith("自", -1)); + arktest.assertFalse(Nature.startsWith("然", 0)); + arktest.assertTrue(Nature.startsWith("然界", 1)); + arktest.assertTrue(Nature.startsWith("界", 2)); + arktest.assertFalse(Nature.startsWith("界", 3)); return 0; } diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_substring.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_substring.ets index 391efab391fa24ba9e5f31817addc9d543388c4c..fefb48c93d3816e8255339b2e5726774c27da44e 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_substring.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_substring.ets @@ -15,9 +15,9 @@ const DUMP: boolean = false; const HITS_EXPECTED: int = 40; -let golden: String = new String('abc\u03a3\u03a8\u03a90123456789!'); -let strlenPlus1: int = golden.getLength() + 1; -let hits: int = 0; +let Golden: String = new String("abc\u03a3\u03a8\u03a90123456789!"); +let strlenPlus1: int = Golden.getLength() + 1; + let stringTable1: String[] = [ 'abcΣΨΩ0123456789!', @@ -61,9 +61,29 @@ let stringTable2: String[] = [ 'abcΣΨΩ0123456789!' ]; +let s_front: String = new String("abc\u03a3\u03a8\u03a9"); +let s_end: String = new String("0123456789!"); +let Silver: String = s_front.concat(s_end); + +let MoreString: String = new String("dabcΣΨΩ0123456789!?"); +let Copper: String = MoreString.substring(1, MoreString.length-1); function main(): int { + // for LineString + arktest.assertEQ(testString(Golden), 0); + + // for TreeString + arktest.assertEQ(testString(Silver) ,0); + + // for SlicedString + arktest.assertEQ(testString(Copper), 0); + + return 0; +} + +function testString(golden: String): int { + let hits: int = 0; for (let i: int = 0; i <= golden.getLength(); ++i) { let subs: String = golden.substring(i, golden.getLength()); arktest.assertLT(i, stringTable1.length); diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocalelowercase.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocalelowercase.ets new file mode 100644 index 0000000000000000000000000000000000000000..07edeeec508b94a9897119dbb418399990259d55 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocalelowercase.ets @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function main() { + testTurkishLocaleSpecific(); + testEnglishLocaleSpecific(); +} + +// Test Case 1: Turkish-specific rules with mixed characters +// Input: 'İIĞŞÇÖÜ12345!@#ÂΣẞ' (length: 18) +// Locale: 'tr-TR' +// Expected Output: 'iığşçöü12345!@#âσß' +function testTurkishLocaleSpecific() { + const input = 'İIĞŞÇÖÜ12345!@#ÂΣẞ'; + const expected = 'iığşçöü12345!@#âσß'; + const result = input.toLocaleLowerCase('tr-TR'); + arktest.assertTrue(result === expected, + `TR Locale Failed: ${result} vs ${expected}`); + + const input_0 = new String('İIĞŞÇÖÜ1'); + const input_1 = new String('2345!@#ÂΣẞ'); + const inputTree = input_0.concat(input_1); + arktest.assertTrue(inputTree.toLocaleLowerCase('tr-TR') === expected, + `TR Locale Failed: ${inputTree.toLocaleLowerCase('tr-TR')} vs ${expected}`); + + const inputSliced = new String('abcİIĞŞÇÖÜ12345!@#ÂΣẞdef').substring(3, 21); + arktest.assertTrue(inputSliced.toLocaleLowerCase('tr-TR') === expected, + `TR Locale Failed: ${inputSliced.toLocaleLowerCase('tr-TR')} vs ${expected}`); +} + +// Test Case 2: English locale with Unicode normalization +// Input: 'IİĞŞÇÖÜ12345!@#ÂΣẞ' (length: 17) +// Locale: 'en-US' +// Expected Output: 'i\u0069\u0307ğşçöü12345!@#âσß' +function testEnglishLocaleSpecific() { + const input = 'IİĞŞÇÖÜ12345!@#ÂΣẞ'; + const expected = 'ii̇ğşçöü12345!@#âσß'; + const result = input.toLocaleLowerCase('en-US'); + arktest.assertTrue(result === expected, + `en-US Locale Failed: ${result} vs ${expected}`); + + const input_0 = new String('IİĞŞÇÖÜ1'); + const input_1 = new String('2345!@#ÂΣẞ'); + const inputTree = input_0.concat(input_1); + arktest.assertTrue(result === inputTree.toLocaleLowerCase('en-US')); + arktest.assertTrue(inputTree.toLocaleLowerCase('en-US') === expected, + `en-US Locale Failed: ${inputTree.toLocaleLowerCase('en-US')} vs ${expected}`); + + const inputSliced = new String('abcIİĞŞÇÖÜ12345!@#ÂΣẞdef').substring(3, 21); + arktest.assertTrue(inputSliced.toLocaleLowerCase('en-US') === expected, + `en-US Locale Failed: ${inputSliced.toLocaleLowerCase('en-US')} vs ${expected}`); +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocaleuppercase.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocaleuppercase.ets new file mode 100644 index 0000000000000000000000000000000000000000..ed24477724b4001ecbe5febdad59f4bc3b5ac127 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolocaleuppercase.ets @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +function main() { + testTurkishLocaleSpecific(); + testEnglishLocaleSpecific(); +} + +/** + * Tests Turkish locale-specific uppercase conversion rules with multiple string operations. + * + * Key Test Points: + * 1. Verifies the Turkish-specific mappings: + * - 'i' → 'İ' (dotted uppercase I) + * - 'ı' → 'I' (dotless uppercase I) + * - 'ğşçöü' → 'ĞŞÇÖÜ' + * 2. Validates non-Turkish characters: + * - 'σ' (Greek sigma) → 'Σ' + * - 'ß' (German sharp S) → 'SS' + * 3. Tests string manipulation combinations: + * - Concatenation (String.prototype.concat) + * - Substring extraction (String.prototype.substring) + */ +function testTurkishLocaleSpecific() { + // Base string test + const input = 'iığşçöü12345!@#âσß'; + const expected = 'İIĞŞÇÖÜ12345!@#ÂΣSS'; + const result = input.toLocaleUpperCase('tr-TR'); + arktest.assertTrue(result === expected, + `TR Locale Failed: ${result} vs ${expected}`); + + // TreeString test + const input_0 = new String('iığşçöü12'); + const input_1 = new String('345!@#âσß'); + const inputTree = input_0.concat(input_1); + const resultTree = inputTree.toLocaleUpperCase('tr-TR'); + arktest.assertTrue(resultTree === expected, + `TR Locale Failed: ${resultTree} vs ${expected}`); + + // SlicedString test + const inputSliced = new String('abciığşçöü12345!@#âσßdef').substring(3, 21); + const resultSliced = inputSliced.toLocaleUpperCase('tr-TR'); + arktest.assertTrue(resultSliced === expected, + `TR Locale Failed: ${resultSliced} vs ${expected}`); +} + +/** + * Tests English (US) locale uppercase conversion with Unicode normalization. + * + * Key Test Points: + * 1. Verifies English-specific behavior: + * - 'i' → 'I' (normal uppercase I) + * - 'i̇' (i with combining dot) → 'İ' + * 2. Handles Unicode normalization: + * - Normalizes results to NFC form for reliable comparison + * 3. Tests multilingual characters: + * - 'ğşçöü' (Turkish) → 'ĞŞÇÖÜ' + * - 'σ' (Greek) → 'Σ' + * - 'ß' → 'SS' + * 4. Validates string operations: + * - Concatenation and substring handling + */ +function testEnglishLocaleSpecific() { + // Base string with combining characters + const input = 'ii̇ğşçöü12345!@#âσß'; + const expected = 'IİĞŞÇÖÜ12345!@#ÂΣSS'.normalize('NFC'); + const result = input.toLocaleUpperCase('en-US').normalize('NFC'); + arktest.assertTrue(result === expected, + `en-US Locale Failed: ${result} vs ${expected}`); + + // TreeString test with normalization + const input_0 = new String('ii̇ğşçöü1'); + const input_1 = new String('2345!@#âσß'); + const inputTree = input_0.concat(input_1); + const resultTree = inputTree.toLocaleUpperCase('en-US').normalize('NFC'); + arktest.assertTrue(resultTree === expected, + `en-us Locale Failed: ${resultTree} vs ${expected}`); + + // SlicedString test with normalization + const inputSliced = new String('abcii̇ğşçöü12345!@#âσßdef').substring(3, 22); + const resultSliced = inputSliced.toLocaleUpperCase('en-US').normalize('NFC'); + arktest.assertTrue(resultSliced === expected, + `en-us Locale Failed: ${resultSliced} vs ${expected}`); +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolowercase.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolowercase.ets new file mode 100644 index 0000000000000000000000000000000000000000..1b57f0812d150f020d51eb167c0bbf8238a3fbb2 --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_tolowercase.ets @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +function main() { + // Test cases for LineString + const valiedUTF16String: String = new String('Hello 你好 नमस्ते 😊👩💻\uD83D\uDC68\u200D\uD83D\uDCBBАаБбВвГгДдЕеЁё'); + arktest.assertTrue(valiedUTF16String.toLowerCase() === "hello 你好 नमस्ते 😊👩💻👨‍💻ааббввггддееёё", "valid Utf16Chars"); + const validEngChars: String = new String('ALPHABETalphabetABCabc'); + arktest.assertTrue(validEngChars.toLowerCase() === "alphabetalphabetabcabc", "valid EngChars"); + const invalidUTF16String: String = new String('Normal start\uD83DMid content\uD83D\uDC68end'); + arktest.assertTrue(invalidUTF16String.toLowerCase() === "normal start\uD83Dmid content\uD83D\uDC68end", "invalid UTF16Chars"); + + // Test cases for TreeString + const validUTF16TreeString: String = new String('Hello 你好 नमस्ते 😊👩💻').concat('😊👩💻ABCabc') // Multilingual + Emoji + arktest.assertTrue(validUTF16TreeString.toLowerCase() === "hello 你好 नमस्ते 😊👩💻😊👩💻abcabc", "valid UTF-16 TreeString"); + const validEngTreeString: String = new String('ALPHABETalphabetABCabc').concat('ABCabc'); + arktest.assertTrue(validEngTreeString.toLowerCase() === "alphabetalphabetabcabcabcabc", "valid EngChars TreeString"); + const invalidUTF16TreeString: String = new String('Normal start\uD83DMid content\uD83D\uDC68end').concat('ABCabc'); + arktest.assertTrue(invalidUTF16TreeString.toLowerCase() === "normal start\uD83Dmid content\uD83D\uDC68endabcabc", "invalid UTF16Chars TreeString"); + + // Test cases for SlicedString + const slicedValidUTF16String: String = valiedUTF16String.substring(4, valiedUTF16String.length - 4); + arktest.assertTrue(slicedValidUTF16String.toLowerCase() === "o 你好 नमस्ते 😊👩💻👨‍💻ааббввггдд", "valid UTF-16 SlicedString"); + const slicedValidEngChars: String = validEngChars.substring(4, validEngChars.length - 4); + arktest.assertTrue(slicedValidEngChars.toLowerCase() === "abetalphabetab", "valid EngChars SlicedString"); + const slicedInvalidUTF16String: String = invalidUTF16String.substring(2, invalidUTF16String.length - 2); + arktest.assertTrue(slicedInvalidUTF16String.toLowerCase() === "rmal start\uD83Dmid content\uD83D\uDC68e", "invalid UTF16Chars SlicedString"); + +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_touppercase.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_touppercase.ets new file mode 100644 index 0000000000000000000000000000000000000000..beb4fccc50ff1fd4df15c86330e6351c70c5df4e --- /dev/null +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_touppercase.ets @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +function main() { + // Test cases for LineString + const valiedUTF16String: String = new String('Hello 你好 नमस्ते 😊👩💻\uD83D\uDC68\u200D\uD83D\uDCBBАаБбВвГгДдЕеЁё'); + arktest.assertTrue(valiedUTF16String.toUpperCase() === "HELLO 你好 नमस्ते 😊👩💻👨‍💻ААББВВГГДДЕЕЁЁ", "valid Utf16Chars"); + const validEngChars: String = new String('ALPHABETalphabetABCabc'); + arktest.assertTrue(validEngChars.toUpperCase() === "ALPHABETALPHABETABCABC", "valid EngChars"); + const invalidUTF16String: String = new String('Normal start\uD83DMid content\uD83D\uDC68end'); + arktest.assertTrue(invalidUTF16String.toUpperCase() === "NORMAL START\uD83DMID CONTENT👨END", "invalid UTF16Chars"); + + // Test cases for TreeString + const validUTF16TreeString: String = new String('Hello 你好 नमस्ते 😊👩💻').concat('😊👩💻ABCabc') // Multilingual + Emoji + arktest.assertTrue(validUTF16TreeString.toUpperCase() === "HELLO 你好 नमस्ते 😊👩💻😊👩💻ABCABC", "valid UTF-16 TreeString"); + const validEngTreeString: String = new String('ALPHABETalphabetABCabc').concat('ABCabc'); + arktest.assertTrue(validEngTreeString.toUpperCase() === "ALPHABETALPHABETABCABCABCABC", "valid EngChars TreeString"); + const invalidUTF16TreeString: String = new String('Normal start\uD83DMid content\uD83D\uDC68end').concat('ABCabc'); + arktest.assertTrue(invalidUTF16TreeString.toUpperCase() === "NORMAL START\uD83DMID CONTENT\uD83D\uDC68ENDABCABC", "invalid UTF16Chars TreeString"); + + // Test cases for SlicedString + const slicedValidUTF16String: String = valiedUTF16String.substring(4, valiedUTF16String.length - 4); + arktest.assertTrue(slicedValidUTF16String.toUpperCase() === "O 你好 नमस्ते 😊👩💻👨‍💻ААББВВГГДД", "valid UTF-16 SlicedString"); + const slicedValidEngChars: String = validEngChars.substring(4, validEngChars.length - 4); + arktest.assertTrue(slicedValidEngChars.toUpperCase() === "ABETALPHABETAB", "valid EngChars SlicedString"); + const slicedInvalidUTF16String: String = invalidUTF16String.substring(2, invalidUTF16String.length - 2); + arktest.assertTrue(slicedInvalidUTF16String.toUpperCase() === "RMAL START\uD83DMID CONTENT\uD83D\uDC68E", "invalid UTF16Chars SlicedString"); + +} diff --git a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_trim.ets b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_trim.ets index 2ad89081ed877374180efdbd67c38fa767b0af24..c45680b9c8a813465921e3d277159efc0122fac0 100644 --- a/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_trim.ets +++ b/static_core/plugins/ets/tests/ets-common-tests/intrinsics/string_trim.ets @@ -40,36 +40,47 @@ 0xFEFF -- byte order mark 0x3000 -- ideographic space */ -const gWss: String = +const Wss: String = '\u0020\u0009\u000A\u000B\u000C\u000D\u00A0\u1680\ \u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\ \u2008\u2009\u200A\u2028\u2029\u202F\u205F\uFEFF\u3000'; -const gEmpty: String = ''; -const gWs: String = ' '; -const gX: String = 'x'; -const gXWs: String = gX + gWs; -const gWsX: String = gWs + gX; -const gWsXWs: String = gWs + gX + gWs; -const gXy: String = 'gXy'; -const gWsXy: String = gWs + 'gXy'; -const gXyWs: String = 'gXy' + gWs; -const gWsXyWs: String = gWs + 'gXy' + gWs; -const gZ: String = '\u01B5'; // 'Ƶ' -const gZWs: String = gZ + gWs; -const gWsZ: String = gWs + gZ; -const gWsZWs: String = gWs + gZ + gWs; -const gAbc: String = 'abc'; -const gAwsBwsC: String = 'a b c'; -const gWsAbc: String = ' ' + gAbc; -const gAbcWs: String = gAbc + ' '; -const gWsAbcWs: String = ' ' + gAbc + ' '; -const gWssAbc: String = gWss + gAbc; -const gAbcWss: String = gAbc + gWss; -const gWssAbcWss: String = gWss + gAbc + gWss; -const gAbcWssAbc: String = gAbc + gWss + gAbc; -const gEsc: String = ' \t\n\v'; -const gRusChars: String = 'Русские буквы'; +const Empty: String = ''; +const Ws: String = ' '; +const X: String = 'x'; +const XWs: String = X + Ws; +const WsX: String = Ws + X; +const WsXWs: String = Ws + X + Ws; +const Xy: String = 'Xy'; +const WsXy: String = Ws + 'Xy'; +const XyWs: String = 'Xy' + Ws; +const WsXyWs: String = Ws + 'Xy' + Ws; +const Z: String = '\u01B5'; // 'Ƶ' +const ZWs: String = Z + Ws; +const WsZ: String = Ws + Z; +const WsZWs: String = Ws + Z + Ws; +const Abc: String = 'abc'; +const AwsBwsC: String = 'a b c'; +const WsAbc: String = ' ' + Abc; +const AbcWs: String = Abc + ' '; +const WsAbcWs: String = ' ' + Abc + ' '; +const WssAbc: String = Wss + Abc; +const AbcWss: String = Abc + Wss; +const WssAbcWss: String = Wss + Abc + Wss; +const AbcWssAbc: String = Abc + Wss + Abc; +const Esc: String = ' \t\n\v'; +const RusChars: String = 'Русские буквы'; + +const treestring: String = StringBuilder.concatStrings("12345678901234", "abcdefghijklmn"); +const WstreestringWs: String = Ws + treestring + Ws; +const treestringWs: String = treestring + Ws; +const Wstreestring: String = Ws + treestring; + +const slicedstring: String = RusChars.substring(3, 5); +const WsslicedstringWs: String = Ws + slicedstring + Ws; +const slicedstringWs: String = slicedstring + Ws; +const Wsslicedstring: String = Ws + slicedstring; + function main(): int { @@ -78,64 +89,70 @@ function main(): int { let c = i.toChar(); if (Char.isWhiteSpace(c)) { ++wssCount; - arktest.assertNE(gWss.indexOf(c), -1, 'Bad char code is encountered: ' + i); + arktest.assertNE(Wss.indexOf(c), -1, 'Bad char code is encountered: ' + i); } } - arktest.assertEQ(wssCount, gWss.length, 'Wrong number of u16 whitespaces: ' + wssCount); + arktest.assertEQ(wssCount, Wss.length, 'Wrong number of u16 whitespaces: ' + wssCount); // trim - arktest.assertEQ(gEmpty.trim(), '') - arktest.assertEQ(gWs.trim(), gEmpty) - arktest.assertEQ(gWss.trim(), gEmpty) - arktest.assertEQ(gXy.trim(), gXy) - arktest.assertEQ(gWsXy.trim(), gXy) - arktest.assertEQ(gXyWs.trim(), gXy) - arktest.assertEQ(gWsXyWs.trim(), gXy) - arktest.assertEQ(gAbc.trim(), gAbc) - arktest.assertEQ(gWsAbc.trim(), gAbc) - arktest.assertEQ(gAbcWs.trim(), gAbc) - arktest.assertEQ(gWsAbcWs.trim(), gAbc) - arktest.assertEQ(gAwsBwsC.trim(), gAwsBwsC) - arktest.assertEQ(gWssAbc.trim(), gAbc) - arktest.assertEQ(gAbcWss.trim(), gAbc) - arktest.assertEQ(gWssAbcWss.trim(), gAbc) - arktest.assertEQ(gAbcWssAbc.trim(), gAbcWssAbc) - arktest.assertEQ(gX.trim(), gX) - arktest.assertEQ(gWsX.trim(), gX) - arktest.assertEQ(gXWs.trim(), gX) - arktest.assertEQ(gZ.trim(), gZ) - arktest.assertEQ(gWsZ.trim(), gZ) - arktest.assertEQ(gZWs.trim(), gZ) - arktest.assertEQ(gEsc.trim(), gEmpty) - arktest.assertEQ(gRusChars.trim(), gRusChars) + arktest.assertEQ(Empty.trim(), ""); + arktest.assertEQ(Ws.trim(), Empty); + arktest.assertEQ(Wss.trim(), Empty); + arktest.assertEQ(Xy.trim(), Xy); + arktest.assertEQ(WsXy.trim(), Xy); + arktest.assertEQ(XyWs.trim(), Xy); + arktest.assertEQ(WsXyWs.trim(), Xy); + arktest.assertEQ(Abc.trim(), Abc); + arktest.assertEQ(WsAbc.trim(), Abc); + arktest.assertEQ(AbcWs.trim(), Abc); + arktest.assertEQ(WsAbcWs.trim(), Abc); + arktest.assertEQ(AwsBwsC.trim(), AwsBwsC); + arktest.assertEQ(WssAbc.trim(), Abc); + arktest.assertEQ(AbcWss.trim(), Abc); + arktest.assertEQ(WssAbcWss.trim(), Abc); + arktest.assertEQ(AbcWssAbc.trim(), AbcWssAbc); + arktest.assertEQ(X.trim(), X); + arktest.assertEQ(WsX.trim(), X); + arktest.assertEQ(XWs.trim(), X); + arktest.assertEQ(Z.trim(), Z); + arktest.assertEQ(WsZ.trim(), Z); + arktest.assertEQ(ZWs.trim(), Z); + arktest.assertEQ(Esc.trim(), Empty); + arktest.assertEQ(RusChars.trim(), RusChars); + arktest.assertEQ(WstreestringWs.trim(), treestring); + arktest.assertEQ(WsslicedstringWs.trim(), slicedstring); // trimLeft - arktest.assertEQ(gWs.trimLeft(), gEmpty) - arktest.assertEQ(gWss.trimLeft(), gEmpty) - arktest.assertEQ(gWsAbcWs.trimLeft(), gAbcWs) - arktest.assertEQ(gAwsBwsC.trimLeft(), gAwsBwsC) - arktest.assertEQ(gWssAbcWss.trimLeft(), gAbcWss) - arktest.assertEQ(gX.trimLeft(), gX) - arktest.assertEQ(gWsX.trimLeft(), gX) - arktest.assertEQ(gXWs.trimLeft(), gXWs) - arktest.assertEQ(gZ.trimLeft(), gZ) - arktest.assertEQ(gWsZ.trimLeft(), gZ) - arktest.assertEQ(gZWs.trimLeft(), gZWs) - arktest.assertEQ(gEsc.trimLeft(), gEmpty) - arktest.assertEQ(gRusChars.trimLeft(), gRusChars) + arktest.assertEQ(Ws.trimLeft(), Empty); + arktest.assertEQ(Wss.trimLeft(), Empty); + arktest.assertEQ(WsAbcWs.trimLeft(), AbcWs); + arktest.assertEQ(AwsBwsC.trimLeft(), AwsBwsC); + arktest.assertEQ(WssAbcWss.trimLeft(), AbcWss); + arktest.assertEQ(X.trimLeft(), X); + arktest.assertEQ(WsX.trimLeft(), X); + arktest.assertEQ(XWs.trimLeft(), XWs); + arktest.assertEQ(Z.trimLeft(), Z); + arktest.assertEQ(WsZ.trimLeft(), Z); + arktest.assertEQ(ZWs.trimLeft(), ZWs); + arktest.assertEQ(Esc.trimLeft(), Empty); + arktest.assertEQ(RusChars.trimLeft(), RusChars); + arktest.assertEQ(WstreestringWs.trimLeft(), treestringWs); + arktest.assertEQ(WsslicedstringWs.trimLeft(), slicedstringWs); // trimRight - arktest.assertEQ(gWs.trimRight(), gEmpty) - arktest.assertEQ(gWss.trimRight(), gEmpty) - arktest.assertEQ(gWsAbcWs.trimRight(), gWsAbc) - arktest.assertEQ(gAwsBwsC.trimRight(), gAwsBwsC) - arktest.assertEQ(gWssAbcWss.trimRight(), gWssAbc) - arktest.assertEQ(gX.trimRight(), gX) - arktest.assertEQ(gWsX.trimRight(), gWsX) - arktest.assertEQ(gXWs.trimRight(), gX) - arktest.assertEQ(gZ.trimRight(), gZ) - arktest.assertEQ(gWsZ.trimRight(), gWsZ) - arktest.assertEQ(gZWs.trimRight(), gZ) - arktest.assertEQ(gEsc.trimRight(), gEmpty) - arktest.assertEQ(gRusChars.trimRight(), gRusChars) + arktest.assertEQ(Ws.trimRight(), Empty); + arktest.assertEQ(Wss.trimRight(), Empty); + arktest.assertEQ(WsAbcWs.trimRight(), WsAbc); + arktest.assertEQ(AwsBwsC.trimRight(), AwsBwsC); + arktest.assertEQ(WssAbcWss.trimRight(), WssAbc); + arktest.assertEQ(X.trimRight(), X); + arktest.assertEQ(WsX.trimRight(), WsX); + arktest.assertEQ(XWs.trimRight(), X); + arktest.assertEQ(Z.trimRight(), Z); + arktest.assertEQ(WsZ.trimRight(), WsZ); + arktest.assertEQ(ZWs.trimRight(), Z); + arktest.assertEQ(Esc.trimRight(), Empty); + arktest.assertEQ(RusChars.trimRight(), RusChars); + arktest.assertEQ(WstreestringWs.trimRight(), Wstreestring); + arktest.assertEQ(WsslicedstringWs.trimRight(), Wsslicedstring); return 0; } diff --git a/static_core/plugins/ets/tests/ets_test_suite/intrinsics/CMakeLists.txt b/static_core/plugins/ets/tests/ets_test_suite/intrinsics/CMakeLists.txt index 2e5cdecf537dcf4530771af27ae153fdbe904986..2595d6716e2725ffd71e4a64f54b09fca4533de4 100644 --- a/static_core/plugins/ets/tests/ets_test_suite/intrinsics/CMakeLists.txt +++ b/static_core/plugins/ets/tests/ets_test_suite/intrinsics/CMakeLists.txt @@ -22,23 +22,6 @@ set(intrinsics_tests isFinite isInteger isSafeInteger - string_equals - string_length - string_isempty - string_from_string - string_from_char_code - string_from_chars - string_substring - string_getchars - string_getbytes - string_hashcode - string_concat - string_trim - string_startswith - string_endswith - string_indexof - string_repeat - string_compare_to typed_arrays_reverse char_isuppercase abs @@ -48,8 +31,6 @@ set(intrinsics_tests floor ceil trunc - stringbuilder_oom - stringbuilder uint8array_sort uint8clampedarray_sort uint16array_sort @@ -75,6 +56,58 @@ foreach(test ${intrinsics_tests}) set(target ets_test_suite_intrinsics_${test}) set(extra_options "") + run_int_jit_aot_ets_code(${test_in} ${test_out_dir} ${target} ${extra_options}) + add_dependencies(ets_test_suite_intrinsics ${target}) +endforeach() + +set(intrinsics_string_tests + string_charat + string_compare_to + string_concat + string_endswith + string_equals + string_from_char_code_escape + string_from_char_code + string_from_chars + string_from_string + string_fromcodepoint + string_get + string_getbytes + string_getchars + string_getlength + string_hashcode + string_indexof + string_indexofstring + string_iscompressed + string_isempty + string_iswellformed + string_lastindexof + string_length + string_normalize + string_repeat + string_startswith + string_substring + string_tolocalelowercase + string_tolocaleuppercase + string_tolowercase + string_touppercase + string_trim + stringbuilder_oom + stringbuilder +) + +set(intrinsics_string_tests_in_dir "${CMAKE_CURRENT_SOURCE_DIR}") +set(intrinsics_string_tests_out_dir "${CMAKE_CURRENT_BINARY_DIR}") + +add_custom_target(ets_test_suite_intrinsics_string) + +foreach(test ${intrinsics_string_tests}) + set(test_out_dir "${intrinsics_string_tests_out_dir}/${test}") + + set(test_in "${intrinsics_string_tests_in_dir}/${test}.ets") + set(target ets_test_suite_intrinsics_string_${test}) + set(extra_options "") + # disable OSR for the tests throwing exceptions if ("${test}" STREQUAL "stringbuilder_oom") set(extra_options RUNTIME_EXTRA_OPTIONS "--compiler-enable-osr=false") @@ -88,9 +121,21 @@ foreach(test ${intrinsics_tests}) if ("${test}" STREQUAL "stringbuilder" OR "${test}" STREQUAL "string_compare_to" OR "${test}" STREQUAL "string_from_char_code") - run_int_jit_aot_ets_code_foreach_gc(ets_test_suite_intrinsics ${test_in} ${test_out_dir} ${target}) + run_int_jit_aot_ets_code_foreach_gc(ets_test_suite_intrinsics_string ${test_in} ${test_out_dir} ${target}) else() run_int_jit_aot_ets_code(${test_in} ${test_out_dir} ${target} ${extra_options}) - add_dependencies(ets_test_suite_intrinsics ${target}) + add_dependencies(ets_test_suite_intrinsics_string ${target}) + if(extra_options) + set(extra_enable_string_options ${extra_options} "--use-all-strings=true") + else() + set(extra_enable_string_options RUNTIME_EXTRA_OPTIONS "--use-all-strings=true") + endif() + run_int_ets_code( + ${target}-all-strings + ${test_out_dir} + SOURCES "${test_in}" + ${extra_enable_string_options} + ) + add_dependencies(ets_test_suite_intrinsics_string ${target}-all-strings) endif() -endforeach() +endforeach() \ No newline at end of file diff --git a/static_core/plugins/ets/tests/ets_test_suite/strings/CMakeLists.txt b/static_core/plugins/ets/tests/ets_test_suite/strings/CMakeLists.txt index b217da8d204dead6acb9689d44e182f99f1cfa00..d365ad76ca0279fec5ae3a96eb6f159aa3642e12 100644 --- a/static_core/plugins/ets/tests/ets_test_suite/strings/CMakeLists.txt +++ b/static_core/plugins/ets/tests/ets_test_suite/strings/CMakeLists.txt @@ -26,8 +26,8 @@ foreach(test ${strings_tests}) set(test_in "${strings_tests_in_dir}/${test}.ets") set(target ets_test_suite_strings_${test}) - - if (PANDA_TARGET_ARM32) + set(extra_options "") + if(PANDA_TARGET_ARM32) # failure in regalloc, see #13413 set(extra_options RUNTIME_EXTRA_OPTIONS "--compiler-inlining-blacklist=std.core.ETSGLOBAL::copyTo,std.core.String::codePointAt" "--compiler-regex='(?!std.core.ETSGLOBAL::copyTo)&(?!std.core.String::codePointAt).*'" "--compiler-ignore-failures=true") endif() @@ -36,7 +36,27 @@ foreach(test ${strings_tests}) add_dependencies(ets_test_suite_strings ${target}-ets-jit ${target}-ets-int) - if (NOT CMAKE_CROSSCOMPILING) + + if(NOT CMAKE_CROSSCOMPILING) add_dependencies(ets_test_suite_strings ${target}-ets-aot) endif() + + if(extra_options) + set(extra_enable_string_options ${extra_options}) + list(APPEND extra_enable_string_options + RUNTIME_EXTRA_OPTIONS "--use-all-strings=true" + ) + else() + set(extra_enable_string_options + RUNTIME_EXTRA_OPTIONS "--use-all-strings=true" + ) + endif() + + run_int_ets_code( + ${target}-all-strings + ${test_out_dir} + SOURCES "${test_in}" + ${extra_enable_string_options} + ) + add_dependencies(ets_test_suite_strings ${target}-all-strings) endforeach() diff --git a/static_core/plugins/ets/tests/runtime/types/ets_string_from_char_code_test.cpp b/static_core/plugins/ets/tests/runtime/types/ets_string_from_char_code_test.cpp index 0dbb080ab8cffb6c51ff6cbee339251077d5a532..b37a8b437672361bae93907d698b564b0d54c128 100644 --- a/static_core/plugins/ets/tests/runtime/types/ets_string_from_char_code_test.cpp +++ b/static_core/plugins/ets/tests/runtime/types/ets_string_from_char_code_test.cpp @@ -101,8 +101,8 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewCompressedStringFromCharCodes) EtsString *stringFromCompressedCharCodes = CreateNewStringFromCharCodes({0x48, 0x65, 0x6C, 0x6C, 0x6F, 4294901862, 0xffff0066, 10.316}); ASSERT_TRUE(stringFromCompressedCharCodes->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedCompressedString->GetCoreType(), - stringFromCompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedCompressedString->GetCoreType(), + stringFromCompressedCharCodes->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewCompressedStringFromCharCode) @@ -110,13 +110,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewCompressedStringFromCharCode) EtsString *expectedCompressedString = EtsString::CreateFromMUtf8("A"); EtsString *stringFromCompressedCharCodes = CreateNewStringFromCharCodes({0x41}); ASSERT_TRUE(stringFromCompressedCharCodes->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedCompressedString->GetCoreType(), - stringFromCompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedCompressedString->GetCoreType(), + stringFromCompressedCharCodes->GetCoreType())); EtsString *stringFromCompressedCharCode = CreateNewStringFromCharCode(0x41); ASSERT_TRUE(stringFromCompressedCharCode->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedCompressedString->GetCoreType(), - stringFromCompressedCharCode->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedCompressedString->GetCoreType(), + stringFromCompressedCharCode->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewUncompressedStringFromCharCode) @@ -125,13 +125,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewUncompressedStringFromCharCode) EtsString *expectedUncompressedString = EtsString::CreateFromUtf16(data.data(), static_cast(data.size())); EtsString *stringFromUncompressedCharCodes = CreateNewStringFromCharCodes({0x3B2}); ASSERT_TRUE(stringFromUncompressedCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCodes->GetCoreType())); EtsString *stringFromUncompressedCharCode = CreateNewStringFromCharCode(0x3B2); ASSERT_TRUE(stringFromUncompressedCharCode->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCode->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCode->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewUncompressedStringFromCharCodes) @@ -154,8 +154,8 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewUncompressedStringFromCharCodes) 0}; EtsString *stringFromUncompressedCharCodes = CreateNewStringFromCharCodes(charCodes); ASSERT_TRUE(stringFromUncompressedCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCodes->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewEmptyStringFromCharCode) @@ -163,7 +163,7 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewEmptyStringFromCharCode) EtsString *emptyString = EtsString::CreateNewEmptyString(); EtsString *stringFromCharCodes = CreateNewStringFromCharCodes({}); ASSERT_TRUE(stringFromCharCodes->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(emptyString->GetCoreType(), stringFromCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(emptyString->GetCoreType(), stringFromCharCodes->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromNaNCharCode) @@ -173,13 +173,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromNaNCharCode) EtsString *stringFromUncompressedCharCodes = CreateNewStringFromCharCodes({std::numeric_limits::quiet_NaN()}); ASSERT_TRUE(stringFromUncompressedCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCodes->GetCoreType())); EtsString *stringFromUncompressedCharCode = CreateNewStringFromCharCode(std::numeric_limits::quiet_NaN()); ASSERT_TRUE(stringFromUncompressedCharCode->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCode->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCode->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromInfinityCharCode) @@ -189,13 +189,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromInfinityCharCode) EtsString *stringFromUncompressedCharCodes = CreateNewStringFromCharCodes({std::numeric_limits::infinity()}); ASSERT_TRUE(stringFromUncompressedCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCodes->GetCoreType())); EtsString *stringFromUncompressedCharCode = CreateNewStringFromCharCode(std::numeric_limits::infinity()); ASSERT_TRUE(stringFromUncompressedCharCode->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCode->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCode->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromNaNAndInfinityCharCodes) @@ -206,8 +206,8 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromNaNAndInfinityCharCodes) CreateNewStringFromCharCodes({std::numeric_limits::quiet_NaN(), std::numeric_limits::infinity(), -std::numeric_limits::infinity()}); ASSERT_TRUE(stringFromUncompressedCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromUncompressedCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromUncompressedCharCodes->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromMaxAvailableCharCode) @@ -216,13 +216,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromMaxAvailableCharCode) EtsString *expectedUncompressedString = EtsString::CreateFromUtf16(data.data(), static_cast(data.size())); EtsString *stringFromMaxCharCodes1 = CreateNewStringFromCharCodes({9007199254740991.0}); ASSERT_TRUE(stringFromMaxCharCodes1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromMaxCharCodes1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromMaxCharCodes1->GetCoreType())); EtsString *stringFromMaxCharCode1 = CreateNewStringFromCharCode(9007199254740991.0); ASSERT_TRUE(stringFromMaxCharCode1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromMaxCharCode1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromMaxCharCode1->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromMinAvailableCharCode) @@ -230,13 +230,13 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromMinAvailableCharCode) EtsString *expectedCompressedString = EtsString::CreateFromMUtf8("\x01"); EtsString *stringFromMaxCharCodes1 = CreateNewStringFromCharCodes({-9007199254740991.0}); ASSERT_TRUE(stringFromMaxCharCodes1->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedCompressedString->GetCoreType(), - stringFromMaxCharCodes1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedCompressedString->GetCoreType(), + stringFromMaxCharCodes1->GetCoreType())); EtsString *stringFromMaxCharCode1 = CreateNewStringFromCharCode(-9007199254740991.0); ASSERT_TRUE(stringFromMaxCharCode1->GetCoreType()->IsMUtf8()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedCompressedString->GetCoreType(), - stringFromMaxCharCode1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedCompressedString->GetCoreType(), + stringFromMaxCharCode1->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeCharCode) @@ -245,33 +245,33 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeCharCode) EtsString *expectedUncompressedString = EtsString::CreateFromUtf16(data.data(), static_cast(data.size())); EtsString *stringFromHugeCharCodes1 = CreateNewStringFromCharCodes({18446744073709551616.0}); ASSERT_TRUE(stringFromHugeCharCodes1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes1->GetCoreType())); EtsString *stringFromHugeCharCode1 = CreateNewStringFromCharCode(18446744073709551616.0); ASSERT_TRUE(stringFromHugeCharCode1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode1->GetCoreType())); EtsString *stringFromHugeCharCodes2 = CreateNewStringFromCharCodes({18446744073709551617.0}); ASSERT_TRUE(stringFromHugeCharCodes2->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes2->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes2->GetCoreType())); EtsString *stringFromHugeCharCode2 = CreateNewStringFromCharCode(18446744073709551617.0); ASSERT_TRUE(stringFromHugeCharCode2->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode2->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode2->GetCoreType())); EtsString *stringFromHugeCharCodes3 = CreateNewStringFromCharCodes({9007199254740992.0}); ASSERT_TRUE(stringFromHugeCharCodes3->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes3->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes3->GetCoreType())); EtsString *stringFromHugeCharCode3 = CreateNewStringFromCharCode(9007199254740992.0); ASSERT_TRUE(stringFromHugeCharCode3->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode3->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode3->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeNegativeCharCode) @@ -280,33 +280,33 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeNegativeCharCode) EtsString *expectedUncompressedString = EtsString::CreateFromUtf16(data.data(), static_cast(data.size())); EtsString *stringFromHugeCharCodes1 = CreateNewStringFromCharCodes({-18446744073709551616.0}); ASSERT_TRUE(stringFromHugeCharCodes1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes1->GetCoreType())); EtsString *stringFromHugeCharCode1 = CreateNewStringFromCharCode(-18446744073709551616.0); ASSERT_TRUE(stringFromHugeCharCode1->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode1->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode1->GetCoreType())); EtsString *stringFromHugeCharCodes2 = CreateNewStringFromCharCodes({-18446744073709551617.0}); ASSERT_TRUE(stringFromHugeCharCodes2->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes2->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes2->GetCoreType())); EtsString *stringFromHugeCharCode2 = CreateNewStringFromCharCode(-18446744073709551617.0); ASSERT_TRUE(stringFromHugeCharCode2->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode2->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode2->GetCoreType())); EtsString *stringFromHugeCharCodes3 = CreateNewStringFromCharCodes({-9007199254740992.0}); ASSERT_TRUE(stringFromHugeCharCodes3->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes3->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes3->GetCoreType())); EtsString *stringFromHugeCharCode3 = CreateNewStringFromCharCode(-9007199254740992.0); ASSERT_TRUE(stringFromHugeCharCode3->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCode3->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCode3->GetCoreType())); } TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeCharCodes) @@ -318,8 +318,8 @@ TEST_F(EtsStringFromCharCodeTest, CreateNewStringFromHugeCharCodes) 9007199254740991.0, -9007199254740991.0}; EtsString *stringFromHugeCharCodes = CreateNewStringFromCharCodes(charCodes); ASSERT_TRUE(stringFromHugeCharCodes->GetCoreType()->IsUtf16()); - ASSERT_TRUE(coretypes::String::StringsAreEqual(expectedUncompressedString->GetCoreType(), - stringFromHugeCharCodes->GetCoreType())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(expectedUncompressedString->GetCoreType(), + stringFromHugeCharCodes->GetCoreType())); } } // namespace ark::ets::test diff --git a/static_core/plugins/ets/tests/runtime/types/ets_string_test.cpp b/static_core/plugins/ets/tests/runtime/types/ets_string_test.cpp index e88141d9ffa55e57af7cddfb37ec17ee6d72e87a..abd7a9596f7024a463ec7c28a86d2fc5974d41aa 100644 --- a/static_core/plugins/ets/tests/runtime/types/ets_string_test.cpp +++ b/static_core/plugins/ets/tests/runtime/types/ets_string_test.cpp @@ -74,10 +74,10 @@ TEST_F(EtsStringTest, CreateFromUtf16) EtsString *firstEtsString = EtsString::CreateFromUtf16(data.data(), data.size()); - auto *firstString = reinterpret_cast(firstEtsString); + auto *firstString = reinterpret_cast(firstEtsString); auto *secondString = reinterpret_cast(data.data()); - ASSERT_TRUE(coretypes::String::StringsAreEqualUtf16(firstString, secondString, data.size())); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqualUtf16(firstString, secondString, data.size())); } TEST_F(EtsStringTest, CreateFromMUtf8) @@ -87,10 +87,10 @@ TEST_F(EtsStringTest, CreateFromMUtf8) EtsString *firstEtsString = EtsString::CreateFromMUtf8(mutf8Data); - auto *firstString = reinterpret_cast(firstEtsString); + auto *firstString = reinterpret_cast(firstEtsString); auto *secondString = reinterpret_cast(data.data()); - ASSERT_TRUE(coretypes::String::StringsAreEqualMUtf8( + ASSERT_TRUE(coretypes::BaseString::StringsAreEqualMUtf8( firstString, secondString, data.size() - 1)); // need to subtract 1 'cause of 0 in the end of Mutf8 string } @@ -102,10 +102,10 @@ TEST_F(EtsStringTest, CreateFromMUtf8WithLenArg) EtsString *firstEtsString = EtsString::CreateFromMUtf8( mutf8Data, data.size() - 1); // need to subtract 1 'cause of 0 in the end of Mutf8 string - auto *firstString = reinterpret_cast(firstEtsString); + auto *firstString = reinterpret_cast(firstEtsString); auto *secondString = reinterpret_cast(data.data()); - ASSERT_TRUE(coretypes::String::StringsAreEqualMUtf8( + ASSERT_TRUE(coretypes::BaseString::StringsAreEqualMUtf8( firstString, secondString, data.size() - 1)); // need to subtract 1 'cause of 0 in the end of Mutf8 string } @@ -145,7 +145,7 @@ TEST_F(EtsStringTest, CreateFromUtf8) size_t thirdStringLength = firstStringLength / 2; EtsString *thirdEtsString = EtsString::CreateFromUtf8(utf8Data, thirdStringLength); - auto *thirdString = reinterpret_cast(thirdEtsString); + auto *thirdString = reinterpret_cast(thirdEtsString); // Utf8 format, no need to have \0 at the end, so check half string ASSERT_TRUE(utf::IsEqual({firstEtsString->GetDataMUtf8(), thirdStringLength}, @@ -171,8 +171,8 @@ TEST_F(EtsStringTest, CreateNewStringFromChars) EtsString *stringFromCharArray = EtsString::CreateNewStringFromChars(beginOffset, length, charArray); - ASSERT_TRUE(coretypes::String::StringsAreEqual(reinterpret_cast(expectedString), - reinterpret_cast(stringFromCharArray))); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(expectedString), + reinterpret_cast(stringFromCharArray))); } TEST_F(EtsStringTest, CreateNewStringFromString) @@ -183,11 +183,49 @@ TEST_F(EtsStringTest, CreateNewStringFromString) EtsString *string2 = EtsString::CreateFromUtf16(data.data(), data.size() - 1); EtsString *createdString = EtsString::CreateNewStringFromString(string1); - ASSERT_TRUE(coretypes::String::StringsAreEqual(reinterpret_cast(string1), - reinterpret_cast(createdString))); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(string1), + reinterpret_cast(createdString))); - ASSERT_FALSE(coretypes::String::StringsAreEqual(reinterpret_cast(string2), - reinterpret_cast(createdString))); + ASSERT_FALSE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(string2), + reinterpret_cast(createdString))); +} + +TEST_F(EtsStringTest, CreateNewStringFromConcatString) +{ + EtsString *str1 = EtsString::CreateFromMUtf8("canjie_language"); + EtsString *str2 = EtsString::CreateFromMUtf8("arkts_language"); + EtsString *concat = EtsString::Concat(str1, str2); + EtsString *createdString = EtsString::CreateNewStringFromString(concat); + EtsString *equal = EtsString::CreateFromMUtf8("canjie_languagearkts_language"); + + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(concat), + reinterpret_cast(createdString))); + + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(equal), + reinterpret_cast(createdString))); + + ASSERT_FALSE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(str1), + reinterpret_cast(createdString))); + + ASSERT_FALSE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(str2), + reinterpret_cast(createdString))); +} + +TEST_F(EtsStringTest, CreateNewStringFromSlicedString) +{ + EtsString *string1 = EtsString::CreateFromMUtf8("canjie_languagearkts_language"); + EtsString *slicedStr = EtsString::FastSubString(string1, 0, 17); + EtsString *createdString = EtsString::CreateNewStringFromString(slicedStr); + EtsString *equal = EtsString::CreateFromMUtf8("canjie_languagear"); + + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(slicedStr), + reinterpret_cast(createdString))); + + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(equal), + reinterpret_cast(createdString))); + + ASSERT_FALSE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(string1), + reinterpret_cast(createdString))); } TEST_F(EtsStringTest, CreateNewEmptyString) @@ -197,10 +235,10 @@ TEST_F(EtsStringTest, CreateNewEmptyString) EtsString *str2 = EtsString::CreateFromUtf16(&data, 1); EtsString *str3 = EtsString::CreateNewEmptyString(); - ASSERT_TRUE(coretypes::String::StringsAreEqual(reinterpret_cast(str1), - reinterpret_cast(str3))); - ASSERT_FALSE(coretypes::String::StringsAreEqual(reinterpret_cast(str2), - reinterpret_cast(str3))); + ASSERT_TRUE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(str1), + reinterpret_cast(str3))); + ASSERT_FALSE(coretypes::BaseString::StringsAreEqual(reinterpret_cast(str2), + reinterpret_cast(str3))); } TEST_F(EtsStringTest, Compare) @@ -538,8 +576,8 @@ TEST_F(EtsStringTest, GetCoreType) EtsString *str = EtsString::CreateFromMUtf8(data.data()); EtsString *emptyStr = EtsString::CreateNewEmptyString(); - ASSERT_EQ(reinterpret_cast(str), str->GetCoreType()); - ASSERT_EQ(reinterpret_cast(emptyStr), emptyStr->GetCoreType()); + ASSERT_EQ(reinterpret_cast(str), str->GetCoreType()); + ASSERT_EQ(reinterpret_cast(emptyStr), emptyStr->GetCoreType()); } TEST_F(EtsStringTest, FromEtsObject) diff --git a/static_core/runtime/BUILD.gn b/static_core/runtime/BUILD.gn index 82129186368eed3c2b9256fe26ecfbe85fc76290..1ad7ce89d349611e3a0241ebb2b8f1a322e66402 100644 --- a/static_core/runtime/BUILD.gn +++ b/static_core/runtime/BUILD.gn @@ -42,6 +42,7 @@ config("arkruntime_public_config") { ] include_dirs += platform_include_dirs + include_dirs += [ "$ark_root/../common_interfaces" ] if (ark_use_cmc_gc) { include_dirs += [ "$ark_root/../common_interfaces" ] @@ -153,6 +154,7 @@ ohos_source_set("libarkruntime_set_static") { "compiler_thread_pool_worker.cpp", "coretypes/array.cpp", "coretypes/string.cpp", + "coretypes/base_string.cpp", "coroutines/coroutine.cpp", "coroutines/coroutine_events.cpp", "coroutines/coroutine_manager.cpp", diff --git a/static_core/runtime/CMakeLists.txt b/static_core/runtime/CMakeLists.txt index 000666b029191064e3de4457cce0ce30c375d2c3..c79e6429d964429bab5db29d41fbe325f5604721 100644 --- a/static_core/runtime/CMakeLists.txt +++ b/static_core/runtime/CMakeLists.txt @@ -49,6 +49,7 @@ set(SOURCES interpreter/runtime_interface.cpp intrinsics.cpp coretypes/string.cpp + coretypes/base_string.cpp coretypes/array.cpp class.cpp class_helper.cpp @@ -360,6 +361,7 @@ panda_target_include_directories(arkruntime_interpreter_impl PUBLIC ${PANDA_BINARY_ROOT}/libpandabase/generated PUBLIC ${VERIFIER_INCLUDE_DIR} PUBLIC ${PANDA_BINARY_ROOT} + PUBLIC ${PANDA_ROOT}/../common_interfaces ) panda_target_include_directories(arkruntime_interpreter_impl @@ -602,6 +604,7 @@ panda_target_include_directories(arkruntime_obj PUBLIC ${PANDA_BINARY_ROOT}/runtime/asm_defines PUBLIC ${GEN_INCLUDE_DIR} PUBLIC ${VERIFIER_INCLUDE_DIR} + PUBLIC ${PANDA_ROOT}/../common_interfaces ) panda_target_link_libraries(arkruntime_obj arkbase arkfile arkcompiler dprof arkaotmanager arktarget_options) @@ -676,6 +679,7 @@ if(PANDA_WITH_TESTS) PUBLIC ${PANDA_BINARY_ROOT} PUBLIC ${PANDA_BINARY_ROOT}/libpandabase/generated PUBLIC ${VERIFIER_INCLUDE_DIR} + PUBLIC ${PANDA_ROOT}/../common_interfaces ) panda_target_include_directories(arkruntime_test_interpreter_impl diff --git a/static_core/runtime/asm_defines/CMakeLists.txt b/static_core/runtime/asm_defines/CMakeLists.txt index 417ada7219541942529564720fb466ae75135713..77b8e51b3665d47c707174d8575d273f5f536dd6 100644 --- a/static_core/runtime/asm_defines/CMakeLists.txt +++ b/static_core/runtime/asm_defines/CMakeLists.txt @@ -54,6 +54,7 @@ panda_target_include_directories(asm_defines PUBLIC ${PANDA_ROOT}/libpandafile PUBLIC ${PANDA_BINARY_ROOT}/libpandafile/include PUBLIC ${PANDA_BINARY_ROOT}/libpandabase/generated + PUBLIC ${PANDA_ROOT}/../common_interfaces ) panda_target_include_directories(asm_defines SYSTEM PUBLIC diff --git a/static_core/runtime/class_initializer.cpp b/static_core/runtime/class_initializer.cpp index d911874ee715cb6ed7b0f09687338ecec96a988c..a796affa7460b49d6999b5dbcac94f5acaf10035 100644 --- a/static_core/runtime/class_initializer.cpp +++ b/static_core/runtime/class_initializer.cpp @@ -151,7 +151,7 @@ static bool IsBadSuperClass(const Class *base, ManagedThread *thread, const Clas return true; } - if (base->IsFinal()) { + if (!base->IsExtensible() && !klass->IsStringClass()) { ThrowVerifyError(thread, klass); return true; } diff --git a/static_core/runtime/core/core_class_linker_extension.cpp b/static_core/runtime/core/core_class_linker_extension.cpp index 0a2588a17fb168747b27d40a144ecfee5704fd32..1cec89888ad81185ff711b4c34751273532e4747 100644 --- a/static_core/runtime/core/core_class_linker_extension.cpp +++ b/static_core/runtime/core/core_class_linker_extension.cpp @@ -15,6 +15,7 @@ #include "runtime/core/core_class_linker_extension.h" +#include "include/class_root.h" #include "runtime/include/coretypes/class.h" #include "runtime/include/exceptions.h" #include "runtime/include/panda_vm.h" @@ -112,15 +113,31 @@ bool CoreClassLinkerExtension::InitializeImpl(bool compressedStringEnabled) } classClass->SetBase(objClass); + // auto *stringClass = CreateClass(ctx.GetStringClassDescriptor(), + // GetClassVTableSize(ClassRoot::BASE_STRING), + // GetClassIMTSize(ClassRoot::BASE_STRING), GetClassSize(ClassRoot::BASE_STRING)); + auto *stringClass = CreateClass(ctx.GetStringClassDescriptor(), GetClassVTableSize(ClassRoot::STRING), GetClassIMTSize(ClassRoot::STRING), GetClassSize(ClassRoot::STRING)); + + // auto *lineStringClass = CreateClass(utf::CStringAsMutf8("Lpanda/LineString;"), + // GetClassVTableSize(ClassRoot::STRING), + // GetClassIMTSize(ClassRoot::STRING), GetClassSize(ClassRoot::STRING)); stringClass->SetBase(objClass); stringClass->SetStringClass(); + stringClass->SetLineStringClass(); coretypes::String::SetCompressedStringsEnabled(compressedStringEnabled); stringClass->SetState(Class::State::LOADED); stringClass->SetLoadContext(GetBootContext()); GetClassLinker()->AddClassRoot(ClassRoot::STRING, stringClass); + // lineStringClass->SetLoadContext(GetBootContext()); + // lineStringClass->SetBase(objClass); + // lineStringClass->SetLineStringClass(); + // lineStringClass->SetStringClass(); + // lineStringClass->SetState(Class::State::LOADED); + // GetClassLinker()->AddClassRoot(ClassRoot::STRING, lineStringClass); + InitializeArrayClassRoot(ClassRoot::ARRAY_CLASS, ClassRoot::CLASS, utf::Mutf8AsCString(ctx.GetClassArrayClassDescriptor())); @@ -186,6 +203,7 @@ size_t CoreClassLinkerExtension::GetClassVTableSize(ClassRoot root) return GetArrayClassVTableSize(); case ClassRoot::OBJECT: case ClassRoot::CLASS: + // case ClassRoot::BASE_STRING: case ClassRoot::STRING: return 0; default: { @@ -231,6 +249,7 @@ size_t CoreClassLinkerExtension::GetClassIMTSize(ClassRoot root) return GetArrayClassIMTSize(); case ClassRoot::OBJECT: case ClassRoot::CLASS: + // case ClassRoot::BASE_STRING: case ClassRoot::STRING: return 0; default: { @@ -276,6 +295,7 @@ size_t CoreClassLinkerExtension::GetClassSize(ClassRoot root) return GetArrayClassSize(); case ClassRoot::OBJECT: case ClassRoot::CLASS: + // case ClassRoot::BASE_STRING: case ClassRoot::STRING: return Class::ComputeClassSize(GetClassVTableSize(root), GetClassIMTSize(root), 0, 0, 0, 0, 0, 0); default: { diff --git a/static_core/runtime/coretypes/base_string.cpp b/static_core/runtime/coretypes/base_string.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f5672dbd6413edf4303dc8129521dfe09d689fbb --- /dev/null +++ b/static_core/runtime/coretypes/base_string.cpp @@ -0,0 +1,852 @@ +/** + * Copyright (c) 2021-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "include/object_header.h" +#include "objects/base_object.h" +#include "objects/string/base_string-inl.h" + +#include "include/coretypes/string.h" +#include "libpandabase/utils/utf.h" +#include "libpandabase/utils/hash.h" +#include "libpandabase/utils/span.h" +#include "objects/string/base_string.h" +#include "runtime/arch/memory_helpers.h" +#include "runtime/include/coretypes/array.h" +#include "runtime/include/runtime.h" +#include "runtime/handle_base-inl.h" +#include "runtime/include/panda_vm.h" +#include "runtime/include/coretypes/base_string.h" +#include "runtime/include/coretypes/string.h" + +namespace ark::coretypes { + +/* static */ +BaseString *BaseString::CreateFromString(BaseString *str, const LanguageContext &ctx, PandaVM *vm) +{ + ASSERT(str != nullptr); + // allocator may trig gc and move str, need to hold it + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle strHandle(thread, str); + auto string = AllocLineStringObject(strHandle->GetLength(), !strHandle->IsUtf16(), ctx, vm); + if (string == nullptr) { + return nullptr; + } + + // retrive str after gc + str = strHandle.GetPtr(); + + // After memcpy we should have a full barrier, so this writes should happen-before barrier + TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); + string->WriteData(str, 0, string->GetLength(), str->GetLength()); + TSAN_ANNOTATE_IGNORE_WRITES_END(); + // String is supposed to be a constant object, so all its data should be visible by all threads + arch::FullMemoryBarrier(); + return string; +} + +Array *BaseString::GetChars(BaseString *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx) +{ + // allocator may trig gc and move 'src', need to hold it + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle strHandle(thread, src); + auto *klass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::ARRAY_U16); + Array *array = Array::Create(klass, utf16Length); + if (array == nullptr) { + return nullptr; + } + + for (uint32_t i = 0; i < utf16Length; i++) { + array->Set(i, strHandle->At(start + i)); + } + return array; +} + +/* static */ +BaseString *BaseString::AllocLineStringObject(size_t length, bool compressed, const LanguageContext &ctx, PandaVM *vm, + bool movable, bool pinned) +{ + ASSERT(vm != nullptr); + auto *thread = ManagedThread::GetCurrent(); + auto *stringClass = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::STRING); + size_t size = compressed ? panda::LineString::ComputeSizeUtf8(length) : panda::LineString::ComputeSizeUtf16(length); + panda::BaseString *string = + movable + ? reinterpret_cast( + vm->GetHeapManager()->AllocateObject(stringClass, size, DEFAULT_ALIGNMENT, thread, + mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT, pinned)) + : reinterpret_cast(vm->GetHeapManager()->AllocateNonMovableObject( + // CC-OFFNXT(G.FMT.06) project code style + stringClass, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::NO_INIT)); + if (string != nullptr) { + // After setting length we should have a full barrier, so this write should happens-before barrier + TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); + string->InitLengthAndFlags(length, compressed); + string->SetRawHashcode(0); + TSAN_ANNOTATE_IGNORE_WRITES_END(); + // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string + // legth before it's set + arch::FullMemoryBarrier(); + } + return BaseString::Cast(string); +} + +/* static */ +BaseString *FlatStringInfo::SlowFlatten(VMHandle &str, const LanguageContext &ctx) +{ + ASSERT(str->IsSlicedString() || str->IsTreeString()); + PandaVM *vm = Runtime::GetCurrent()->GetPandaVM(); + + uint32_t length = str->GetLength(); + bool compressed = str->IsUtf8(); + + BaseString *result = BaseString::AllocLineStringObject(length, compressed, ctx, vm); + if (result == nullptr) { + return nullptr; + } + + auto thread = ManagedThread::GetCurrent(); + VMHandle resultHandle(thread, result); + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast(ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + + if (compressed) { + panda::BaseString::WriteToFlat(std::move(readBarrier), str->ToBaseString(), resultHandle->GetDataUtf8Writable(), + length); + } else { + panda::BaseString::WriteToFlat(std::move(readBarrier), str->ToBaseString(), + resultHandle->GetDataUtf16Writable(), length); + } + return resultHandle.GetPtr(); +} + +/* static */ +BaseString *BaseString::FastSubUtf8String(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx) +{ + PandaVM *vm = Runtime::GetCurrent()->GetPandaVM(); + VMHandle srcHandle(ManagedThread::GetCurrent(), src); + + // 1. alloc dest line string + auto lineStr = AllocLineStringObject(length, true, ctx, vm); + if (lineStr == nullptr) { + return nullptr; + } + VMHandle lineStrHandle(ManagedThread::GetCurrent(), lineStr); + + // 2. flatten src string + FlatStringInfo srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + + // 3. copy it + panda::common::Span dest(lineStrHandle->GetDataUtf8Writable(), length); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + panda::common::Span source(srcFlat.GetDataUtf8() + start, length); + panda::BaseString::MemCopyChars(dest, length, source, length); + return lineStrHandle.GetPtr(); +} + +/* static */ +BaseString *BaseString::FastSubUtf16String(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx) +{ + // 1. judge can compressed or not + VMHandle srcHandle(ManagedThread::GetCurrent(), src); + FlatStringInfo srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + bool canBeCompressed = panda::BaseString::CanBeCompressed(srcFlat.GetDataUtf16() + start, length); + + // 2. alloc dest line string + PandaVM *vm = Runtime::GetCurrent()->GetPandaVM(); + auto lineStr = AllocLineStringObject(length, canBeCompressed, ctx, vm); + if (lineStr == nullptr) { + return nullptr; + } + VMHandle lineStrHandle(ManagedThread::GetCurrent(), lineStr); + + // maybe happen GC,so get srcFlat again + srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + + if (canBeCompressed) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + panda::BaseString::CopyChars(lineStrHandle->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length); + } else { + uint32_t len = length * (sizeof(uint16_t) / sizeof(uint8_t)); + panda::common::Span dest(lineStrHandle->GetDataUtf16Writable(), length); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + panda::common::Span source(srcFlat.GetDataUtf16() + start, length); + panda::BaseString::MemCopyChars(dest, len, source, len); + } + return lineStrHandle.GetPtr(); +} + +/* static */ +BaseString *BaseString::FastSubString(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx, + PandaVM *vm) +{ + if (!(Runtime::GetOptions().IsUseAllStrings())) { + return BaseString::Cast(String::FastSubString(BaseString::Cast(src), start, length, ctx, vm)); + } + + ASSERT(src != nullptr); + ASSERT((start + length) <= src->GetLength()); + [[maybe_unused]] HandleScope scope(ManagedThread::GetCurrent()); + + if (length == 0) { + return reinterpret_cast(String::CreateEmptyString(ctx, vm)); + } + + if (start == 0 && length == src->GetLength()) { + return src; + } + + // no need to make sliced string if too short + if (length < panda::SlicedString::MIN_SLICED_STRING_LENGTH) { + if (src->IsUtf8()) { + return FastSubUtf8String(src, start, length, ctx); + } + return FastSubUtf16String(src, start, length, ctx); + } + + VMHandle srcHandle(ManagedThread::GetCurrent(), src); + // src is utf16 , substr if all ASCII chars , no need to slice it + if (src->IsUtf16()) { + FlatStringInfo srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + bool canBeCompressed = panda::BaseString::CanBeCompressed(srcFlat.GetDataUtf16() + start, length); + if (canBeCompressed) { + auto lineStr = AllocLineStringObject(length, canBeCompressed, ctx, vm); + if (lineStr == nullptr) { + return nullptr; + } + VMHandle lineStrHandle(ManagedThread::GetCurrent(), lineStr); + + // maybe happen gc , get srcFlat again + srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + panda::BaseString::CopyChars(lineStrHandle->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length); + return lineStrHandle.GetPtr(); + } + } + return GetSlicedString(srcHandle.GetPtr(), start, length, ctx, vm); +} + +/* static */ +BaseString *BaseString::Concat(BaseString *str1, BaseString *str2, const LanguageContext &ctx, PandaVM *vm) +{ + if (!(Runtime::GetOptions().IsUseAllStrings())) { + return BaseString::Cast(String::Concat(BaseString::Cast(str1), BaseString::Cast(str2), ctx, vm)); + } + + ASSERT(str1 != nullptr); + ASSERT(str2 != nullptr); + // allocator may trig gc and move src, need to hold it + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle str1Handle(thread, str1); + VMHandle str2Handle(thread, str2); + + uint32_t length1 = str1Handle->GetLength(); + uint32_t length2 = str2Handle->GetLength(); + uint32_t newLength = length1 + length2; + if (newLength == 0) { + return reinterpret_cast(String::CreateEmptyString(ctx, vm)); + } + if (length1 == 0) { + return str2Handle.GetPtr(); + } + if (length2 == 0) { + return str1Handle.GetPtr(); + } + + // concat with tree string if too long + bool compressed = String::GetCompressedStringsEnabled() && (str1Handle->IsUtf8()) && (str2Handle->IsUtf8()); + if (newLength >= panda::TreeString::MIN_TREE_STRING_LENGTH) { + return CreateTreeString(str1Handle.GetPtr(), str2Handle.GetPtr(), newLength, compressed, ctx, vm); + } + + // concat with line string if short + ASSERT(str1Handle->IsLineString()); + ASSERT(str2Handle->IsLineString()); + return ConcatLineString(str1Handle, str2Handle, ctx, vm); +} + +/* static */ +BaseString *BaseString::ConcatLineString(VMHandle &str1Handle, VMHandle &str2Handle, + const LanguageContext &ctx, PandaVM *vm) +{ + uint32_t length1 = str1Handle->GetLength(); + uint32_t length2 = str2Handle->GetLength(); + uint32_t newLength = length1 + length2; + bool compressed = String::GetCompressedStringsEnabled() && (str1Handle->IsUtf8()) && (str2Handle->IsUtf8()); + + auto newString = AllocLineStringObject(newLength, compressed, ctx, vm); + if (UNLIKELY(newString == nullptr)) { + return nullptr; + } + VMHandle newStringHandle(ManagedThread::GetCurrent(), newString); + // After copying we should have a full barrier, so this writes should happen-before barrier + TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); + if (compressed) { + // copy left part + panda::common::Span sp(newStringHandle->GetDataUtf8Writable(), newLength); + panda::common::Span src1(str1Handle->GetDataUtf8(), length1); + panda::BaseString::MemCopyChars(sp, newLength, src1, length1); + // copy right part + sp = sp.SubSpan(length1); + panda::common::Span src2(str2Handle->GetDataUtf8(), length2); + panda::BaseString::MemCopyChars(sp, length2, src2, length2); + } else { + // copy left part + panda::common::Span sp(newStringHandle->GetDataUtf16Writable(), newLength); + if (str1Handle->IsUtf8()) { + panda::BaseString::CopyChars(sp.data(), str1Handle->GetDataUtf8(), length1); + } else { + panda::common::Span src1(str1Handle->GetDataUtf16(), length1); + panda::BaseString::MemCopyChars(sp, newLength << 1U, src1, length1 << 1U); + } + // copy right part + sp = sp.SubSpan(length1); + if (str2Handle->IsUtf8()) { + panda::BaseString::CopyChars(sp.data(), str2Handle->GetDataUtf8(), length2); + } else { + panda::common::Span src2(str2Handle->GetDataUtf16(), length2); + panda::BaseString::MemCopyChars(sp, length2 << 1U, src2, length2 << 1U); + } + } + TSAN_ANNOTATE_IGNORE_WRITES_END(); + // String is supposed to be a constant object, so all its data should be visible by all threads + arch::FullMemoryBarrier(); + return newStringHandle.GetPtr(); +} + +/* static */ +BaseString *BaseString::GetSlicedString(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx, + PandaVM *vm, bool movable, bool pinned) +{ + ASSERT((start + length) <= src->GetLength()); + VMHandle srcHandle(ManagedThread::GetCurrent(), src); + + auto baseStr = AllocSlicedStringObject(ctx, vm, movable, pinned); + if (baseStr == nullptr) { + return nullptr; + } + VMHandle baseStrHandle(ManagedThread::GetCurrent(), baseStr); + + FlatStringInfo srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + auto slicedStr = panda::SlicedString::Cast(baseStrHandle->ToBaseString()); + slicedStr->InitLengthAndFlags(length, srcFlat.GetString()->IsMUtf8()); + slicedStr->SetRawHashcode(0); + auto writeBarrier = [](void *obj, size_t offset, panda::BaseObject *str) { + ObjectAccessor::SetObject(obj, offset, reinterpret_cast(str)); + }; + + slicedStr->SetParent(std::move(writeBarrier), srcFlat.GetString()->ToBaseString()); + slicedStr->SetStartIndex(start + srcFlat.GetStartIndex()); + return BaseString::Cast(slicedStr); +} + +BaseString *BaseString::AllocSlicedStringObject(const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned) +{ + ASSERT(vm != nullptr); + auto *thread = ManagedThread::GetCurrent(); + auto *slicedStrCls = + Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::SLICED_STRING); + size_t size = panda::SlicedString::SIZE; + panda::BaseString *slicedStr = nullptr; + if (movable) { + slicedStr = reinterpret_cast( + vm->GetHeapManager()->AllocateObject(slicedStrCls, size, DEFAULT_ALIGNMENT, thread, + mem::ObjectAllocatorBase::ObjMemInitPolicy::REQUIRE_INIT, pinned)); + } else { + slicedStr = reinterpret_cast(vm->GetHeapManager()->AllocateNonMovableObject( + slicedStrCls, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::REQUIRE_INIT)); + } + + slicedStr->SetRawHashcode(0); + return BaseString::Cast(slicedStr); +} + +/** + * @brief Alloc a TreeString + * @return The TreeString created + */ +BaseString *BaseString::AllocTreeStringObject(const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned) +{ + ASSERT(vm != nullptr); + auto *thread = ManagedThread::GetCurrent(); + auto *treeStrCls = Runtime::GetCurrent()->GetClassLinker()->GetExtension(ctx)->GetClassRoot(ClassRoot::TREE_STRING); + size_t size = panda::TreeString::SIZE; + panda::BaseString *treeStr = nullptr; + if (movable) { + treeStr = reinterpret_cast( + vm->GetHeapManager()->AllocateObject(treeStrCls, size, DEFAULT_ALIGNMENT, thread, + mem::ObjectAllocatorBase::ObjMemInitPolicy::REQUIRE_INIT, pinned)); + } else { + treeStr = reinterpret_cast(vm->GetHeapManager()->AllocateNonMovableObject( + treeStrCls, size, DEFAULT_ALIGNMENT, thread, mem::ObjectAllocatorBase::ObjMemInitPolicy::REQUIRE_INIT)); + } + treeStr->SetRawHashcode(0); + return BaseString::Cast(treeStr); +} + +BaseString *BaseString::CreateTreeString(BaseString *left, BaseString *right, uint32_t length, bool compressed, + const LanguageContext &ctx, PandaVM *vm, bool movable, bool pinned) +{ + auto thread = ManagedThread::GetCurrent(); + VMHandle leftHandle(thread, left); + VMHandle rightHandle(thread, right); + + auto baseStr = AllocTreeStringObject(ctx, vm, movable, pinned); + if (baseStr == nullptr) { + return nullptr; + } + VMHandle baseStrHandle(thread, baseStr); + auto treeStr = panda::TreeString::Cast(baseStrHandle->ToBaseString()); + + // After copying we should have a full barrier, so this writes should happen-before barrier + TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); + + treeStr->InitLengthAndFlags(length, compressed); + treeStr->SetRawHashcode(0); + + auto writeBarrierLeft = [](void *obj, size_t offset, panda::BaseObject *str) { + ObjectAccessor::SetObject(obj, offset, reinterpret_cast(str)); + }; + treeStr->SetLeftSubString(std::move(writeBarrierLeft), leftHandle->ToBaseString()); + auto writeBarrierRight = [](void *obj, size_t offset, panda::BaseObject *str) { + ObjectAccessor::SetObject(obj, offset, reinterpret_cast(str)); + }; + treeStr->SetRightSubString(std::move(writeBarrierRight), rightHandle->ToBaseString()); + + TSAN_ANNOTATE_IGNORE_WRITES_END(); + // String is supposed to be a constant object, so all its data should be visible by all threads + arch::FullMemoryBarrier(); + return BaseString::Cast(treeStr); +} + +int32_t BaseString::IndexOf(BaseString *rhs, const LanguageContext &ctx, int32_t pos) +{ + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle receiver(thread, this); + VMHandle search(thread, rhs); + return BaseString::IndexOf(receiver, search, ctx, pos); +} + +int32_t BaseString::LastIndexOf(BaseString *rhs, const LanguageContext &ctx, int32_t pos) +{ + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle receiver(thread, this); + VMHandle search(thread, rhs); + return BaseString::LastIndexOf(receiver, search, ctx, pos); +} + +/* static */ +int32_t BaseString::IndexOf(VMHandle &receiver, VMHandle &search, const LanguageContext &ctx, + int pos) +{ + auto *lhstring = receiver.GetPtr(); + auto *rhstring = search.GetPtr(); + if (lhstring == nullptr || rhstring == nullptr) { + return -1; + } + int32_t lhsCount = static_cast(receiver.GetPtr()->GetLength()); // NOLINT(modernize-use-auto) + int32_t rhsCount = static_cast(search.GetPtr()->GetLength()); // NOLINT(modernize-use-auto) + + if (pos < 0) { + pos = 0; + } + + if (rhsCount == 0) { + return std::min(lhsCount, pos); + } + + int32_t max = lhsCount - rhsCount; + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + FlatStringInfo lhs = FlatStringInfo::FlattenAllString(receiver, ctx); + VMHandle string(thread, lhs.GetString()); + FlatStringInfo rhs = FlatStringInfo::FlattenAllString(search, ctx); + lhs.SetString(string.GetPtr()); + + if (rhs.IsUtf8() && lhs.IsUtf8()) { + panda::common::Span lhsSp(lhs.GetDataUtf8(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf8(), rhsCount); + return panda::BaseString::IndexOf(lhsSp, rhsSp, pos, max); + } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) + panda::common::Span lhsSp(lhs.GetDataUtf16(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf16(), rhsCount); + return panda::BaseString::IndexOf(lhsSp, rhsSp, pos, max); + } else if (rhs.IsUtf16()) { + panda::common::Span lhsSp(lhs.GetDataUtf8(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf16(), rhsCount); + return panda::BaseString::IndexOf(lhsSp, rhsSp, pos, max); + } else { // NOLINT(readability-else-after-return) + panda::common::Span lhsSp(lhs.GetDataUtf16(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf8(), rhsCount); + return panda::BaseString::IndexOf(lhsSp, rhsSp, pos, max); + } +} + +/* static */ +int32_t BaseString::LastIndexOf(VMHandle &receiver, VMHandle &search, + const LanguageContext &ctx, int pos) +{ + auto *lhstring = receiver.GetPtr(); + auto *rhstring = search.GetPtr(); + if (lhstring == nullptr || rhstring == nullptr) { + return -1; + } + int32_t lhsCount = static_cast(receiver.GetPtr()->GetLength()); // NOLINT(modernize-use-auto) + int32_t rhsCount = static_cast(search.GetPtr()->GetLength()); // NOLINT(modernize-use-auto) + + int32_t max = lhsCount - rhsCount; + if (pos > max) { + pos = max; + } + + if (pos < 0) { + return -1; + } + + if (rhsCount == 0) { + return pos; + } + + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + FlatStringInfo lhs = FlatStringInfo::FlattenAllString(receiver, ctx); + VMHandle string(thread, lhs.GetString()); + FlatStringInfo rhs = FlatStringInfo::FlattenAllString(search, ctx); + lhs.SetString(string.GetPtr()); + + if (rhs.IsUtf8() && lhs.IsUtf8()) { + panda::common::Span lhsSp(lhs.GetDataUtf8(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf8(), rhsCount); + return panda::BaseString::LastIndexOf(lhsSp, rhsSp, pos); + } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) + panda::common::Span lhsSp(lhs.GetDataUtf16(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf16(), rhsCount); + return panda::BaseString::LastIndexOf(lhsSp, rhsSp, pos); + } else if (rhs.IsUtf16()) { + panda::common::Span lhsSp(lhs.GetDataUtf8(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf16(), rhsCount); + return panda::BaseString::LastIndexOf(lhsSp, rhsSp, pos); + } else { // NOLINT(readability-else-after-return) + panda::common::Span lhsSp(lhs.GetDataUtf16(), lhsCount); + panda::common::Span rhsSp(rhs.GetDataUtf8(), rhsCount); + return panda::BaseString::LastIndexOf(lhsSp, rhsSp, pos); + } +} + +/* static */ +int32_t BaseString::Compare(VMHandle &left, VMHandle &right, const LanguageContext &ctx) +{ + if (left.GetPtr() == right.GetPtr()) { + return 0; + } + + auto thread = ManagedThread::GetCurrent(); + FlatStringInfo lflat = FlatStringInfo::FlattenAllString(left, ctx); + VMHandle string(thread, lflat.GetString()); + FlatStringInfo rflat = FlatStringInfo::FlattenAllString(right, ctx); + lflat.SetString(string.GetPtr()); + + int32_t lCount = static_cast(lflat.GetLength()); // NOLINT(modernize-use-auto) + int32_t rCount = static_cast(rflat.GetLength()); // NOLINT(modernize-use-auto) + + int32_t countDiff = lCount - rCount; + int32_t minCount = (countDiff < 0) ? lCount : rCount; + + if (lflat.IsUtf8()) { + // left utf8 , right utf8 + if (right.GetPtr()->IsMUtf8()) { + panda::common::Span lspan(lflat.GetDataUtf8(), lCount); + panda::common::Span rspan(rflat.GetDataUtf8(), rCount); + int32_t charDiff = panda::CompareStringSpan(lspan, rspan, minCount); + if (charDiff != 0) { + return charDiff; + } + + // left utf8 , right utf16 + } else { + panda::common::Span lspan(lflat.GetDataUtf8(), lCount); + panda::common::Span rspan(rflat.GetDataUtf16(), rCount); + int32_t charDiff = panda::CompareStringSpan(lspan, rspan, minCount); + if (charDiff != 0) { + return charDiff; + } + } + } else { + // left utf16 , right utf16 + if (right.GetPtr()->IsUtf16()) { + panda::common::Span lspan(lflat.GetDataUtf16(), lCount); + panda::common::Span rspan(rflat.GetDataUtf16(), rCount); + int32_t charDiff = panda::CompareStringSpan(lspan, rspan, minCount); + if (charDiff != 0) { + return charDiff; + } + // left utf16 , right utf8 + } else { + panda::common::Span lspan(lflat.GetDataUtf16(), lCount); + panda::common::Span rspan(rflat.GetDataUtf8(), rCount); + int32_t charDiff = panda::CompareStringSpan(lspan, rspan, minCount); + if (charDiff != 0) { + return charDiff; + } + } + } + return countDiff; +} + +int32_t BaseString::Compare(BaseString *rstr, const LanguageContext &ctx) +{ + if (this == rstr) { + return 0; + } + + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle leftHandle(thread, this); + VMHandle rightHandle(thread, rstr); + + return Compare(leftHandle, rightHandle, ctx); +} + +/* static */ +bool BaseString::CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non) +{ + bool isCompressed = true; + Span data(utf16Data, utf16Length); + for (uint32_t i = 0; i < utf16Length; i++) { + if (!panda::BaseString::IsASCIICharacter(data[i]) && data[i] != non) { + isCompressed = false; + break; + } + } + return isCompressed; +} + +/* static */ +bool BaseString::CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non) +{ + bool isCompressed = true; + Span data(mutf8Data, mutf8Length); + for (uint32_t i = 0; i < mutf8Length; i++) { + if (!panda::BaseString::IsASCIICharacter(data[i]) && data[i] != non) { + isCompressed = false; + break; + } + } + return isCompressed; +} + +BaseString *BaseString::DoReplace(BaseString *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, + PandaVM *vm) +{ + ASSERT(src != nullptr); + auto length = static_cast(src->GetLength()); + bool canBeCompressed = panda::BaseString::IsASCIICharacter(newC); + + // allocator may trig gc and move src, need to hold it + auto thread = ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle srcHandle(thread, src); + FlatStringInfo srcFlat = FlatStringInfo::FlattenAllString(srcHandle, ctx); + if (srcFlat.IsUtf16()) { + canBeCompressed = canBeCompressed && CanBeCompressedUtf16(srcFlat.GetDataUtf16(), length, oldC); + } else { + canBeCompressed = canBeCompressed && CanBeCompressedMUtf8(srcFlat.GetDataUtf8(), length, oldC); + } + + auto string = AllocLineStringObject(length, canBeCompressed, ctx, vm); + if (string == nullptr) { + return nullptr; + } + + ASSERT(string->GetHashcode() == 0); + + // After replacing we should have a full barrier, so this writes should happen-before barrier + TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); + if (srcFlat.IsUtf16()) { + if (canBeCompressed) { + auto replace = [oldC, newC](uint16_t c) { return static_cast((oldC != c) ? c : newC); }; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::transform(srcFlat.GetDataUtf16(), srcFlat.GetDataUtf16() + length, string->GetDataUtf8Writable(), + replace); + } else { + auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; }; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::transform(srcFlat.GetDataUtf16(), srcFlat.GetDataUtf16() + length, string->GetDataUtf16Writable(), + replace); + } + } else { + if (canBeCompressed) { + auto replace = [oldC, newC](uint16_t c) { return static_cast((oldC != c) ? c : newC); }; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::transform(srcFlat.GetDataUtf8(), srcFlat.GetDataUtf8() + length, string->GetDataUtf8Writable(), + replace); + } else { + auto replace = [oldC, newC](uint16_t c) { return (oldC != c) ? c : newC; }; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::transform(srcFlat.GetDataUtf8(), srcFlat.GetDataUtf8() + length, string->GetDataUtf16Writable(), + replace); + } + } + TSAN_ANNOTATE_IGNORE_WRITES_END(); + // String is supposed to be a constant object, so all its data should be visible by all threads + arch::FullMemoryBarrier(); + return string; +} + +// static +bool BaseString::CanBeCompressedMUtf8(const uint8_t *mutf8Data) +{ + return utf::IsMUtf8OnlySingleBytes(mutf8Data); +} + +bool BaseString::StringsAreEqualMUtf8(BaseString *str1, const uint8_t *mutf8Data, uint32_t utf16Length) +{ + ASSERT(utf16Length == utf::MUtf8ToUtf16Size(mutf8Data)); + if (str1->GetLength() != utf16Length) { + return false; + } + bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data); + return StringsAreEqualMUtf8(str1, mutf8Data, utf16Length, canBeCompressed); +} + +/* static */ +bool BaseString::IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength) +{ + auto allocator = Runtime::GetCurrent()->GetInternalAllocator(); + auto tmpBuffer = allocator->AllocArray(utf16DataLength); + utf::ConvertMUtf8ToUtf16(utf8Data, utf::Mutf8Size(utf8Data), tmpBuffer); + + panda::common::Span data1(tmpBuffer, utf16DataLength); + panda::common::Span data2(utf16Data, utf16DataLength); + bool result = panda::BaseString::StringsAreEquals(data1, data2); + allocator->Delete(tmpBuffer); + return result; +} + +/* static */ +bool BaseString::StringsAreEqualMUtf8(BaseString *str1, const uint8_t *mutf8Data, uint32_t utf16Length, + bool canBeCompressed) +{ + bool result = true; + if (str1->GetLength() != utf16Length) { + result = false; + } else { + bool str1CanBeCompressed = !str1->IsUtf16(); + bool data2CanBeCompressed = canBeCompressed; + if (str1CanBeCompressed != data2CanBeCompressed) { + return false; + } + + ASSERT(str1CanBeCompressed == data2CanBeCompressed); + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + if (str1CanBeCompressed) { + std::vector bufStr1Utf8; + int32_t str1Count = static_cast(str1->GetLength()); // NOLINT(modernize-use-auto) + const uint8_t *str1Utf8DataFlat = + panda::BaseString::GetUtf8DataFlat(readBarrier, str1->ToBaseString(), bufStr1Utf8); + panda::common::Span data1(str1Utf8DataFlat, str1Count); + panda::common::Span data2(mutf8Data, utf16Length); + result = panda::BaseString::StringsAreEquals(data1, data2); + } else { + std::vector bufStr1Utf16; + const uint16_t *str1Utf16DataFlat = + panda::BaseString::GetUtf16DataFlat(readBarrier, str1->ToBaseString(), bufStr1Utf16); + result = IsMutf8EqualsUtf16(mutf8Data, str1Utf16DataFlat, str1->GetLength()); + } + } + return result; +} + +/* static */ +bool BaseString::StringsAreEqual(BaseString *str1, BaseString *str2) +{ + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast(ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + return panda::BaseString::StringsAreEqual(std::move(readBarrier), str1->ToBaseString(), str2->ToBaseString()); +} + +/* static */ +FlatStringInfo FlatStringInfo::FlattenTreeString(VMHandle &treeStr, const LanguageContext &ctx) +{ + panda::TreeString *treeString = treeStr->ToTreeString(); + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast(ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + + if (treeString->IsFlat(std::move(readBarrier))) { + auto readBarrierLeft = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + // NOLINTNEXTLINE(modernize-use-auto) + panda::BaseString *first = treeString->GetLeftSubString(std::move(readBarrierLeft)); + return FlatStringInfo(BaseString::Cast(first), 0, treeString->GetLength()); + } + + BaseString *s = SlowFlatten(treeStr, ctx); + return FlatStringInfo(s, 0, treeString->GetLength()); +} + +/* static */ +FlatStringInfo FlatStringInfo::FlattenSlicedString(VMHandle &slicedStr) +{ + const panda::SlicedString *slicedString = slicedStr->ToSlicedString(); + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast(ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + // NOLINTNEXTLINE(modernize-use-auto) + panda::BaseString *parent = slicedString->GetParent(std::move(readBarrier)); + return FlatStringInfo(BaseString::Cast(parent), slicedString->GetStartIndex(), slicedString->GetLength()); +} + +/* static */ +FlatStringInfo FlatStringInfo::FlattenAllString(VMHandle &str, const LanguageContext &ctx) +{ + BaseString *string = str.GetPtr(); + // 1. LineString return directly + if (string->IsLineString()) { + return FlatStringInfo(string, 0, string->GetLength()); + } + + // 2. SlicedString + if (string->IsSlicedString()) { + return FlattenSlicedString(str); + } + + // 3. TreeString + if (string->IsTreeString()) { + return FlattenTreeString(str, ctx); + } + + UNREACHABLE(); + return FlatStringInfo(string, 0, string->GetLength()); +} + +} // namespace ark::coretypes \ No newline at end of file diff --git a/static_core/runtime/entrypoints/entrypoints.cpp b/static_core/runtime/entrypoints/entrypoints.cpp index e1645344c577cc7a7977bd9e6a660e2d5d7ada0e..7f028b2abfe01bd53c38a3c6269ab17e344646b8 100644 --- a/static_core/runtime/entrypoints/entrypoints.cpp +++ b/static_core/runtime/entrypoints/entrypoints.cpp @@ -42,6 +42,7 @@ #include "utils/cframe_layout.h" #include "intrinsics.h" #include "runtime/interpreter/vregister_iterator.h" +#include "runtime/include/coretypes/base_string.h" #ifdef ARK_USE_CMC_GC #include "base_runtime.h" @@ -220,12 +221,13 @@ extern "C" coretypes::String *CreateStringFromStringEntrypoint(ObjectHeader *obj { BEGIN_ENTRYPOINT(); auto vm = ManagedThread::GetCurrent()->GetVM(); - auto str = coretypes::String::CreateFromString(static_cast(obj), vm->GetLanguageContext(), vm); + auto str = coretypes::BaseString::CreateFromString(static_cast(obj), + vm->GetLanguageContext(), vm); if (UNLIKELY(str == nullptr)) { HandlePendingException(); UNREACHABLE(); } - return str; + return coretypes::BaseString::Cast(str); } extern "C" coretypes::String *CreateStringFromCharsEntrypoint(ObjectHeader *obj) @@ -273,15 +275,16 @@ extern "C" coretypes::String *SubStringFromStringEntrypoint(ObjectHeader *obj, i BEGIN_ENTRYPOINT(); auto vm = ManagedThread::GetCurrent()->GetVM(); - auto indexes = coretypes::String::NormalizeSubStringIndexes(begin, end, static_cast(obj)); + auto indexes = + coretypes::BaseString::NormalizeSubStringIndexes(begin, end, static_cast(obj)); auto substrLength = indexes.second - indexes.first; - auto substr = coretypes::String::FastSubString(static_cast(obj), indexes.first, substrLength, - vm->GetLanguageContext(), vm); + auto substr = coretypes::BaseString::FastSubString(static_cast(obj), indexes.first, + substrLength, vm->GetLanguageContext(), vm); if (UNLIKELY(substr == nullptr)) { HandlePendingException(); UNREACHABLE(); } - return substr; + return coretypes::BaseString::Cast(substr); } extern "C" coretypes::Array *StringGetCharsEntrypoint(ObjectHeader *obj, int32_t begin, int32_t end) @@ -310,8 +313,8 @@ extern "C" coretypes::Array *StringGetCharsEntrypoint(ObjectHeader *obj, int32_t } auto vm = ManagedThread::GetCurrent()->GetVM(); auto arrayLength = end - begin; - auto array = coretypes::String::GetChars(static_cast(obj), begin, arrayLength, - vm->GetLanguageContext()); + auto array = coretypes::BaseString::GetChars(static_cast(obj), begin, arrayLength, + vm->GetLanguageContext()); if (UNLIKELY(array == nullptr)) { HandlePendingException(); UNREACHABLE(); diff --git a/static_core/runtime/entrypoints/string_index_of.h b/static_core/runtime/entrypoints/string_index_of.h index c7d37887210ab2f91442e5dfd2e2104d2bc34467..87fe22322a40b0eb3755424b53050c9bd297b148 100644 --- a/static_core/runtime/entrypoints/string_index_of.h +++ b/static_core/runtime/entrypoints/string_index_of.h @@ -19,6 +19,9 @@ #include #include "utils/bit_utils.h" #include "runtime/include/coretypes/string.h" +#include "runtime/include/coretypes/base_string.h" +#include "runtime/handle_scope-inl.h" +#include "runtime/mem/vm_handle.h" namespace ark::intrinsics { @@ -210,6 +213,35 @@ inline int32_t StringIndexOfU16(void *str, uint16_t character, int32_t offset) return impl::Utf16StringIndexOfChar(string->GetDataUtf16(), offset, length, character); } +// CC-OFFNXT(G.FUD.06) perf critical +inline int32_t BaseStringIndexOfU16(void *str, uint16_t character, int32_t offset, const LanguageContext &ctx) +{ + auto string = reinterpret_cast(str); + ASSERT(string != nullptr); + bool isUtf8 = string->IsMUtf8(); + auto length = string->GetLength(); + if (offset < 0) { + offset = 0; + } + + if (static_cast(offset) >= length) { + return -1; + } + auto thread = ark::ManagedThread::GetCurrent(); + [[maybe_unused]] HandleScope scope(thread); + VMHandle stringHandle(thread, string); + ark::coretypes::FlatStringInfo flatStr = ark::coretypes::FlatStringInfo::FlattenAllString(stringHandle, ctx); + + if (isUtf8) { + if (character > std::numeric_limits::max()) { + return -1; + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return impl::Utf8StringIndexOfChar(flatStr.GetDataUtf8Writable(), offset, length, character); + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return impl::Utf16StringIndexOfChar(flatStr.GetDataUtf16Writable(), offset, length, character); +} } // namespace ark::intrinsics #endif diff --git a/static_core/runtime/include/class.h b/static_core/runtime/include/class.h index feecabf93754eb0ad1354db2671fd715c21f4157..c4434b59c925f9bbe6c059148b5cf4c315b04110 100644 --- a/static_core/runtime/include/class.h +++ b/static_core/runtime/include/class.h @@ -34,6 +34,13 @@ class ClassLinkerContext; class ManagedThread; class ObjectHeader; +enum class StringType : uint64_t { + LINE_STRING_CLASS = 1, + SLICED_STRING_CLASS, + TREE_STRING_CLASS, + LAST_STRING_CLASS = TREE_STRING_CLASS, +}; + // NOTE (Artem Udovichenko): move BaseClass to another file but still have Class.h class BaseClass { public: @@ -41,6 +48,7 @@ public: using HeaderType = uint64_t; public: + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) explicit BaseClass(panda_file::SourceLang lang) : lang_(lang) {} ~BaseClass() = default; @@ -48,6 +56,16 @@ public: DEFAULT_COPY_SEMANTIC(BaseClass); DEFAULT_MOVE_SEMANTIC(BaseClass); + uint64_t GetHClass() const + { + return hclass_; + } + + StringType GetBitField() const + { + return bitField_; + } + uint32_t GetFlags() const { return flags_; @@ -102,6 +120,16 @@ public: } protected: + void SetHClass(uint64_t hclass) + { + hclass_ = hclass; + } + + void SetBitField(StringType bitField) + { + bitField_ = bitField; + } + void SetFlags(uint32_t flags) { flags_ = flags; @@ -109,7 +137,7 @@ protected: private: FIELD_UNUSED HeaderType hclass_ {0}; // store ptr - FIELD_UNUSED uint64_t bitField_ {0}; // store StringType + FIELD_UNUSED StringType bitField_ {0}; // store StringType uint32_t flags_ {0}; // Size of the object of this class. In case of static classes it is 0 // for abstract classes, interfaces and classes whose objects @@ -147,6 +175,11 @@ public: base_ = base; } + void SetJSHClass(uint64_t hclass) + { + SetHClass(hclass); + } + panda_file::File::EntityId GetFileId() const { return fileId_; @@ -172,6 +205,11 @@ public: return descriptor_; } + void SetDescriptor(const uint8_t *descriptor) + { + descriptor_ = descriptor; + } + void SetMethods(Span methods, uint32_t numVmethods, uint32_t numSmethods) { methods_ = methods.data(); @@ -274,6 +312,16 @@ public: accessFlags_ = accessFlags; } + void SetFinal() + { + accessFlags_ |= ACC_FINAL; + } + + void SetUnFinal() + { + accessFlags_ &= (~ACC_FINAL); + } + bool IsPublic() const { return (accessFlags_ & ACC_PUBLIC) != 0; @@ -294,6 +342,11 @@ public: return (accessFlags_ & ACC_FINAL) != 0; } + bool IsExtensible() const + { + return !IsFinal() && !IsStringClass(); + } + bool IsAnnotation() const { return (accessFlags_ & ACC_ANNOTATION) != 0; @@ -321,7 +374,7 @@ public: uint32_t GetObjectSize() const { - ASSERT(!IsVariableSize()); + // ASSERT(!IsVariableSize()); return BaseClass::GetObjectSize(); } @@ -379,6 +432,26 @@ public: SetFlags(GetFlags() | XREF_CLASS); } + void SetLineStringClass() + { + SetBitField(StringType::LINE_STRING_CLASS); + } + + void SetSlicedStringClass() + { + SetBitField(StringType::SLICED_STRING_CLASS); + } + + void SetTreeStringClass() + { + SetBitField(StringType::TREE_STRING_CLASS); + } + + inline StringType GetStringType() const + { + return GetBitField(); + } + bool IsVariableSize() const { return IsArrayClass() || IsStringClass(); diff --git a/static_core/runtime/include/class_root.h b/static_core/runtime/include/class_root.h index 69d841ab4ede4fc3ccb507311d75eab2b7aa6e48..6fce6b239deaa623768167860af7ab6fb8c7c2cc 100644 --- a/static_core/runtime/include/class_root.h +++ b/static_core/runtime/include/class_root.h @@ -45,7 +45,10 @@ enum class ClassRoot { ARRAY_TAGGED, CLASS, OBJECT, - STRING, + BASE_STRING, // -> STRING_CLASS (ets) + STRING, // -> LINE_STRING_CLASS + SLICED_STRING, // -> SLICED_STRING_CLASS + TREE_STRING, // -> TREE_STRING_CLASS ARRAY_CLASS, ARRAY_STRING, LAST_CLASS_ROOT_ENTRY = ARRAY_STRING // Must be the last in this enum diff --git a/static_core/runtime/include/coretypes/base_string.h b/static_core/runtime/include/coretypes/base_string.h new file mode 100644 index 0000000000000000000000000000000000000000..7f7b65784c09fd9bfa025f90736320a8250cd14d --- /dev/null +++ b/static_core/runtime/include/coretypes/base_string.h @@ -0,0 +1,662 @@ +/** + * Copyright (c) 2021-2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PANDA_RUNTIME_CORETYPES_COMMON_STRING_H_ +#define PANDA_RUNTIME_CORETYPES_COMMON_STRING_H_ + +#include +#include "libpandabase/utils/utf.h" +#include "runtime/include/language_context.h" +#include "runtime/include/exceptions.h" +#include "runtime/include/object_accessor.h" +#include "runtime/include/coretypes/string.h" +#include "runtime/include/exceptions.h" +#include "runtime/include/object_accessor.h" + +#include "objects/string/base_string-inl.h" +#include "objects/string/line_string-inl.h" +#include "string-inl.h" + +namespace ark::coretypes { +class BaseString : public ObjectHeader { +public: + /** + * @brief create string from another string + * @param [in] str : original string + * @return The string created + */ + PANDA_PUBLIC_API static BaseString *CreateFromString(BaseString *str, const LanguageContext &ctx, PandaVM *vm); + + /** + * @brief extract an char array from src string with utf16Length chars , positioned from the start char from src + * string + * @param [in] src : original string + * @param [in] start : the start index in original string + * @param [in] utf16Length : how many number of chars to extract + * @return the char array extracted + */ + PANDA_PUBLIC_API static Array *GetChars(BaseString *src, uint32_t start, uint32_t utf16Length, + const LanguageContext &ctx); + + /** + * @brief Alloc a LineString with specified chars + * @param [in]length : number of chars stored in LineString + * @param [in]compressed : true if compressedStringsEnabled_ and all ASCII chars stored , indicates that can be + * stored one byte one char in mutf8 format + * @return The LineString created + */ + static BaseString *AllocLineStringObject(size_t length, bool compressed, const LanguageContext &ctx, + PandaVM *vm = nullptr, bool movable = true, bool pinned = false); + + /** + * @brief make slice of src string + * @param [in]src : parent string , will flatten it + * @param [in]start : startIndex to the parent string + * @param [in]length : number of chars to get + * @return The SlicedString created + */ + static BaseString *GetSlicedString(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx, + PandaVM *vm, bool movable = true, bool pinned = false); + + /** + * @brief Create TreeString from given child Strings + * @param [in]left : left child string + * @param [in]right : right child string + * @param [in]length : equals left.length + right.length , indicates the whole number of chars in tree + * @param [in]compressed : true if all chars stored in tree are ASCII + * @return The TreeString created + */ + static BaseString *CreateTreeString(BaseString *left, BaseString *right, uint32_t length, bool compressed, + const LanguageContext &ctx, PandaVM *vm, bool movable = true, + bool pinned = false); + + /** + * @brief Concat two Strings + * @param [in]str1Handle : the first string to be concated + * @param [in]str2Handle : the second string to be concated + * @return The concated string + */ + PANDA_PUBLIC_API static BaseString *ConcatLineString(VMHandle &str1Handle, + VMHandle &str2Handle, const LanguageContext &ctx, + PandaVM *vm); + + static BaseString *Cast(ObjectHeader *object) + { + return static_cast(object); + } + + static BaseString *Cast(panda::BaseString *str) + { + return reinterpret_cast(str); + } + + static BaseString *Cast(String *str) + { + return reinterpret_cast(str); + } + + static String *Cast(BaseString *str) + { + return reinterpret_cast(str); + } + + static BaseString *Concat(BaseString *str1, BaseString *str2, const LanguageContext &ctx, PandaVM *vm); + + static BaseString *FastSubString(BaseString *src, uint32_t start, uint32_t utf16Length, const LanguageContext &ctx, + PandaVM *vm = nullptr); + static BaseString *FastSubUtf16String(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx); + static BaseString *FastSubUtf8String(BaseString *src, uint32_t start, uint32_t length, const LanguageContext &ctx); + + static BaseString *DoReplace(BaseString *src, uint16_t oldC, uint16_t newC, const LanguageContext &ctx, + PandaVM *vm); + + static bool CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non); + static bool CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non); + static bool CanBeCompressedMUtf8(const uint8_t *mutf8Data); + + int32_t IndexOf(BaseString *rhs, const LanguageContext &ctx, int pos = 0); + int32_t LastIndexOf(BaseString *rhs, const LanguageContext &ctx, int pos = INT32_MAX); + + static int32_t IndexOf(VMHandle &receiver, VMHandle &search, const LanguageContext &ctx, + int pos = 0); + + static int32_t LastIndexOf(VMHandle &receiver, VMHandle &search, const LanguageContext &ctx, + int pos); + + static int32_t Compare(VMHandle &left, VMHandle &right, const LanguageContext &ctx); + + /** + * @brief compare this string to another string specified by rstr + * @param [in] rstr : another string + * @return negative if this string less than rstr string according to dictionary order + * zero if this string equals to rstr string + * positive if this string greater than rstr string + */ + PANDA_PUBLIC_API int32_t Compare(BaseString *rstr, const LanguageContext &ctx); + + static bool StringsAreEqual(BaseString *str1, BaseString *str2); + PANDA_PUBLIC_API static bool StringsAreEqualMUtf8(BaseString *str1, const uint8_t *mutf8Data, uint32_t utf16Length); + static bool StringsAreEqualMUtf8(BaseString *str1, const uint8_t *mutf8Data, uint32_t utf16Length, + bool canBeCompressed); + + PANDA_PUBLIC_API static bool StringsAreEqualUtf16(BaseString *str1, const uint16_t *utf16Data, + uint32_t utf16DataLength) + { + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + return panda::BaseString::StringsAreEqualUtf16(readBarrier, str1->ToBaseString(), utf16Data, utf16DataLength); + } + + static bool IsMutf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16DataLength); + + static uint32_t ComputeHashcodeMutf8(const uint8_t *mutf8Data, [[maybe_unused]] uint32_t utf16Length, + bool canBeCompressed) + { + return panda::BaseString::ComputeHashcodeUtf8(mutf8Data, utf::Mutf8Size(mutf8Data), canBeCompressed); + } + + static uint32_t ComputeHashcodeMutf8(const uint8_t *mutf8Data, uint32_t utf16Length) + { + bool canBeCompressed = CanBeCompressedMUtf8(mutf8Data); + return ComputeHashcodeMutf8(mutf8Data, utf16Length, canBeCompressed); + } + + static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) + { + return panda::BaseString::ComputeHashcodeUtf16(utf16Data, length); + } + + static std::pair NormalizeSubStringIndexes(int32_t beginIndex, int32_t endIndex, + const coretypes::BaseString *str) + { + auto strLen = str->GetLength(); + std::pair normIndexes = {beginIndex, endIndex}; + + // If begin_index < 0, then it is assumed to be equal to zero. + if (normIndexes.first < 0) { + normIndexes.first = 0; + } else if (static_cast(normIndexes.first) > strLen) { + // If begin_index > str_len, then it is assumed to be equal to str_len. + normIndexes.first = static_cast(strLen); + } + // If end_index < 0, then it is assumed to be equal to zero. + if (normIndexes.second < 0) { + normIndexes.second = 0; + } else if (static_cast(normIndexes.second) > strLen) { + // If end_index > str_len, then it is assumed to be equal to str_len. + normIndexes.second = static_cast(strLen); + } + // If begin_index > end_index, then these are swapped. + if (normIndexes.first > normIndexes.second) { + std::swap(normIndexes.first, normIndexes.second); + } + ASSERT((normIndexes.second - normIndexes.first) >= 0); + return normIndexes; + } + + Array *ToCharArray(const LanguageContext &ctx) + { + return GetChars(this, 0, GetLength(), ctx); + } + + inline size_t CopyDataMUtf8(uint8_t *buf, size_t maxLength, bool isCString) + { + // May need alternative implementation like copydatautf8 in 1.0 + if (isCString) { + ASSERT(maxLength != 0); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + buf[maxLength - 1] = '\0'; + return CopyDataRegionMUtf8(buf, 0, GetLength(), maxLength) + 1; // add place for zero at the end + } + + return CopyDataRegionMUtf8(buf, 0, GetLength(), maxLength); + } + + size_t CopyDataRegionMUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength) + { + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + // check the difference between CopyDataRegionUtf8 and CopyDataRegionMUtf8 + return ToBaseString()->CopyDataRegionUtf8(std::move(readBarrier), buf, start, length, maxLength); + } + + uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) + { + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + return ToBaseString()->CopyDataUtf16(std::move(readBarrier), buf, maxLength); + } + + /** + * @brief copy data from this string to buf + * @param [in] buf : dest buf + * @param [in] start : start char offset in this string + * @param [in] length : how many chars to copy + * @param [in] maxLength : max chars of dest buf + * @return how many bytes to copy + */ + uint32_t CopyDataRegionUtf16(uint16_t *buf, uint32_t start, uint32_t length, uint32_t maxLength) + { + if (length > maxLength) { + return 0; + } + uint32_t len = GetLength(); + if (start + length > len) { + return 0; + } + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + if (IsUtf16()) { + std::vector tmpBuf; + const uint16_t *data = ToBaseString()->GetUtf16DataFlat(std::move(readBarrier), ToBaseString(), tmpBuf); + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (memcpy_s(buf, sizeof(uint16_t) * maxLength, data + start, ComputeDataSizeUtf16(length)) != EOK) { + LOG(FATAL, RUNTIME) << __func__ << " length is higher than buf size"; + } + } else { + std::vector tmpBuf; + const uint8_t *data = ToBaseString()->GetUtf8DataFlat(std::move(readBarrier), ToBaseString(), tmpBuf); + const uint8_t *src8 = data + start; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + for (uint32_t i = 0; i < length; ++i) { + buf[i] = src8[i]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } + } + return length; + } + + static size_t ComputeDataSizeUtf16(uint32_t length) + { + return length * sizeof(uint16_t); + } + + bool IsEmpty() const + { + // do not shift right length because it is always zero for empty string + return GetLength() == 0; + } + + uint32_t GetHashcode() + { + auto readBarrier = [](void *obj, size_t offset) { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + + return ToBaseString()->GetHashcode(std::move(readBarrier)); + } + + panda::BaseString *ToBaseString() + { + return panda::BaseString::Cast(reinterpret_cast(this)); + } + + const panda::BaseString *ToBaseStringConst() const + { + return panda::BaseString::ConstCast(reinterpret_cast(this)); + } + + uint32_t GetLength() const + { + return ToBaseStringConst()->GetLength(); + } + + size_t GetUtf16Length() + { + return GetLength(); + } + + size_t GetMUtf8Length() + { + if (!IsUtf16()) { + return GetLength() + 1; + } + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + return ToBaseString()->GetUtf8Length(std::move(readBarrier), true, false); + } + + size_t GetUtf8Length() + { + if (!IsUtf16()) { + return GetLength(); + } + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + return ToBaseString()->GetUtf8Length(std::move(readBarrier), false, true) - 1; + } + + /** + * @brief Check whether a UTF-16 code unit is an ASCII character. + * + * Determines whether the given 16-bit character is within the standard ASCII range (0x01–0x7F). + * + * @param data The UTF-16 code unit to check. + * @return true if the character is ASCII; false otherwise. + */ + static bool IsASCIICharacter(uint16_t data) + { + return panda::BaseString::IsASCIICharacter(data); + } + + bool IsUtf16() const + { + return ToBaseStringConst()->IsUtf16(); + } + + bool IsUtf8() const + { + return IsMUtf8(); + } + + bool IsMUtf8() const + { + return !IsUtf16(); + } + + bool IsLineString() const + { + return ToBaseStringConst()->IsLineString(); + } + + bool IsTreeString() const + { + return ToBaseStringConst()->IsTreeString(); + } + + bool IsSlicedString() const + { + return ToBaseStringConst()->IsSlicedString(); + } + + panda::LineString *ToLineString() + { + ASSERT(IsLineString()); + return panda::LineString::Cast(ToBaseString()); + } + + const panda::SlicedString *ToSlicedString() const + { + ASSERT(IsSlicedString()); + return panda::SlicedString::ConstCast(ToBaseStringConst()); + } + + panda::TreeString *ToTreeString() + { + ASSERT(IsTreeString()); + return panda::TreeString::Cast(ToBaseString()); + } + + uint16_t *GetDataUtf16() + { + ASSERT_PRINT(IsUtf16(), "String: Read data as utf16 for mutf8 string"); + return ToLineString()->GetDataUtf16Writable(); + } + + uint8_t *GetDataUtf8() + { + ASSERT_PRINT(IsUtf8(), "String: Read data as utf8 for utf16 string"); + return ToLineString()->GetDataUtf8Writable(); + } + + uint8_t *GetDataMUtf8() + { + ASSERT_PRINT(IsUtf8(), "String: Read data as mutf8 for utf16 string"); + return GetDataUtf8(); + } + + inline uint8_t *GetDataUtf8Writable() + { + ASSERT_PRINT(IsUtf8(), "String: Read data as utf8 for utf16 string"); + return ToLineString()->GetDataUtf8Writable(); + } + + inline uint16_t *GetDataUtf16Writable() + { + ASSERT_PRINT(IsUtf16(), "String: Read data as utf16 for mutf8 string"); + return ToLineString()->GetDataUtf16Writable(); + } + + size_t ObjectSize() const + { + if (IsLineString()) { + return panda::LineString::ObjectSize(ToBaseStringConst()); + } + if (IsSlicedString()) { + return panda::SlicedString::SIZE; + } + if (IsTreeString()) { + return panda::TreeString::SIZE; + } + UNREACHABLE(); + } + + /** + * @brief extract char from this string positioned at index + * @param [in] index : index to positioned + * @return the char + */ + template + uint16_t At(int32_t index) + { + auto length = GetLength(); + if (VERIFY) { + if ((index < 0) || (index >= static_cast(length))) { + ark::ThrowStringIndexOutOfBoundsException(index, length); + return 0; + } + } + + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast(ObjectAccessor::GetObject(obj, offset)); + }; + return ToBaseString()->At(std::move(readBarrier), index); + } + + /** + * @brief copy data from src to dest , dest is specified by this line string + * @param [in] src : original data + * @param [in] start : write to dest positioned at start offset + * @param [in] destSize : dest max size + * @param [in] length : how many chars to copy + */ + void WriteData(BaseString *src, uint32_t start, uint32_t destSize, uint32_t length) + { + ASSERT(IsLineString()); + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + ToLineString()->WriteData(std::move(readBarrier), src->ToBaseString(), start, destSize, length); + } + + /** + * @brief copy data from this string to buf + * @param [in] buf : dest buf + * @param [in] start : start offset in this string + * @param [in] length : number of bytes to convert and copy + * @param [in] maxLength : max length of dest buf + * @note if this string is utf16, then it will convert the whole string to buffer + * @return how many bytes to copy + */ + size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength) + { + if (length > maxLength) { + return 0; + } + uint32_t len = GetUtf8Length(); + if (start + length > len) { + return 0; + } + auto readBarrier = [](void *obj, size_t offset) -> panda::BaseString * { + return reinterpret_cast( + ObjectAccessor::GetObject(const_cast(obj), offset)); + }; + if (!IsUtf16()) { + return ToBaseString()->CopyDataRegionUtf8(std::move(readBarrier), buf, start, length, maxLength); + } + length = this->GetUtf16Length(); + std::vector tmpBuf; + const uint16_t *data = ToBaseString()->GetUtf16DataFlat(std::move(readBarrier), ToBaseString(), tmpBuf); + return ark::utf::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, false); + } + +protected: + /** + * @brief Alloc a SlicedString + * @return The SlicedString created + */ + static BaseString *AllocSlicedStringObject(const LanguageContext &ctx, PandaVM *vm = nullptr, bool movable = true, + bool pinned = false); + + /** + * @brief Alloc a TreeString + * @return The TreeString created + */ + static BaseString *AllocTreeStringObject(const LanguageContext &ctx, PandaVM *vm = nullptr, bool movable = true, + bool pinned = false); +}; + +class FlatStringInfo { +public: + /** + * @brief flow flatten string to line string + * @param [in] str : string to be flatten + * @return the flattened string + */ + static BaseString *SlowFlatten(VMHandle &str, const LanguageContext &ctx); + + /** + * @brief flatten tree string to line string + * @param [in] treeStr : tree string to be flatten + * @return the flattened string + */ + static FlatStringInfo FlattenTreeString(VMHandle &treeStr, const LanguageContext &ctx); + + /** + * @brief flatten sliced string to line string + * @param [in] slicedStr : sliced string to be flatten + * @return the flattened string + */ + static FlatStringInfo FlattenSlicedString(VMHandle &slicedStr); + + /** + * @brief flatten str to line string , in order to use it comfortably + * LineString --> LineString + * SlicedString --> LineString + * TreeString --> flatten every node in the tree to a LineString + * @param [in] str : str to be flatten + * @return the flattened string + */ + static FlatStringInfo FlattenAllString(VMHandle &str, const LanguageContext &ctx); + + FlatStringInfo(BaseString *string, uint32_t startIndex, uint32_t length) + : string_(string), startIndex_(startIndex), length_(length) + { + } + + inline bool IsUtf8() const + { + return string_->IsMUtf8(); + } + + inline bool IsUtf16() const + { + return string_->IsUtf16(); + } + + inline BaseString *GetString() const + { + return string_; + } + + inline void SetString(BaseString *string) + { + string_ = string; + } + + inline uint32_t GetStartIndex() const + { + return startIndex_; + } + + inline void SetStartIndex(uint32_t index) + { + startIndex_ = index; + } + + inline uint32_t GetLength() const + { + return length_; + } + + inline const uint8_t *GetDataUtf8() const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return string_->GetDataUtf8() + startIndex_; + } + + inline const uint16_t *GetDataUtf16() const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return string_->GetDataUtf16() + startIndex_; + } + + inline uint8_t *GetDataUtf8Writable() const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return string_->GetDataUtf8Writable() + startIndex_; + } + + inline uint16_t *GetDataUtf16Writable() const + { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + return string_->GetDataUtf16Writable() + startIndex_; + } + + inline std::u16string ToU16String(uint32_t len = 0) + { + uint32_t length = len > 0 ? len : GetLength(); + std::u16string result; + if (IsUtf16()) { + const uint16_t *data = this->GetDataUtf16(); + result = panda::Utf16ToU16String(data, length); + } else { + const uint8_t *data = this->GetDataUtf8(); + result = panda::Utf8ToU16String(data, length); + } + return result; + } + +private: + BaseString *string_ {nullptr}; + uint32_t startIndex_ {0}; + uint32_t length_ {0}; +}; + +} // namespace ark::coretypes + +#endif // PANDA_RUNTIME_CORETYPES_COMMON_STRING_H_ diff --git a/static_core/runtime/include/coretypes/string.h b/static_core/runtime/include/coretypes/string.h index a6106834ed956bffe46cc31e9f58636548d94307..e08ba71a66bf56246198acb297cf87ec4d6e12e1 100644 --- a/static_core/runtime/include/coretypes/string.h +++ b/static_core/runtime/include/coretypes/string.h @@ -246,7 +246,7 @@ public: { uint32_t length; if (compressedStringsEnabled_) { - length = length_ >> 1U; + length = length_ >> STRING_LENGTH_TWO_SHIFT; } else { length = length_; } @@ -363,9 +363,11 @@ protected: void SetLength(uint32_t length, bool compressed = false) { if (compressedStringsEnabled_) { - ASSERT(length < 0x80000000U); + ASSERT(length < MAX_STRING_LENGTH); // Use 0u for compressed/utf8 expression - length_ = (length << 1U) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED); + // NOLINTNEXTLINE(hicpp-signed-bitwise) + length_ = (length << STRING_LENGTH_TWO_SHIFT) | (static_cast(STRING_UNINTERNED) << 1U) | + (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED); } else { length_ = length; } @@ -385,11 +387,18 @@ protected: private: PANDA_PUBLIC_API static bool compressedStringsEnabled_; static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; + static constexpr uint32_t STRING_LENGTH_TWO_SHIFT = 2U; + static constexpr uint32_t MAX_STRING_LENGTH = 1 << 31; enum CompressedStatus { STRING_COMPRESSED, STRING_UNCOMPRESSED, }; + enum InternStatus { + STRING_UNINTERNED, + STRING_INTERNED, + }; + static bool CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length); static bool CanBeCompressedUtf16(const uint16_t *utf16Data, uint32_t utf16Length, uint16_t non); static bool CanBeCompressedMUtf8(const uint8_t *mutf8Data, uint32_t mutf8Length, uint16_t non); @@ -409,6 +418,7 @@ private: static bool StringsAreEquals(Span &str1, Span &str2); // In last bit of length_ we store if this string is compressed or not. + // In last second bit of length_ we store if this string is intern or not. uint32_t length_; uint32_t hashcode_; // A pointer to the string data stored after the string header. diff --git a/static_core/runtime/intrinsics.cpp b/static_core/runtime/intrinsics.cpp index a90c807dcc24fe8b89dee326fe325277a5e3803e..f8563e1b6e4e3822b2121071eecdbd730f6df030 100644 --- a/static_core/runtime/intrinsics.cpp +++ b/static_core/runtime/intrinsics.cpp @@ -29,6 +29,7 @@ #include "runtime/include/compiler_interface.h" #include "runtime/include/coretypes/array.h" #include "runtime/include/coretypes/string.h" +#include "runtime/include/coretypes/base_string.h" #include "runtime/include/panda_vm.h" #include "runtime/include/runtime.h" #include "runtime/include/thread.h" @@ -164,18 +165,11 @@ template void PrintStringInternal(coretypes::String *v) { static auto &outstream = IS_ERR ? std::cerr : std::cout; - if (v->IsUtf16()) { - uint16_t *vdataPtr = v->GetDataUtf16(); - uint32_t vlength = v->GetLength(); - size_t mutf8Len = utf::Utf16ToMUtf8Size(vdataPtr, vlength); - - PandaVector out(mutf8Len); - utf::ConvertRegionUtf16ToMUtf8(vdataPtr, out.data(), vlength, mutf8Len, 0); - - outstream << reinterpret_cast(out.data()); - } else { - outstream << std::string_view(reinterpret_cast(v->GetDataMUtf8()), v->GetLength()); - } + coretypes::BaseString *bs = coretypes::BaseString::Cast(v); + size_t len = bs->GetUtf8Length(); + PandaVector out(len); + bs->CopyDataRegionUtf8(out.data(), 0, len, len); + outstream << std::string_view(reinterpret_cast(out.data()), out.size()); } void PrintString(coretypes::String *v) diff --git a/static_core/runtime/mem/object_helpers.cpp b/static_core/runtime/mem/object_helpers.cpp index 0b07a34047a52a27c18e477fabca46a6064a2509..8aafd1761e3b4828fa7cdf030a2b2ceb3ed8ce6b 100644 --- a/static_core/runtime/mem/object_helpers.cpp +++ b/static_core/runtime/mem/object_helpers.cpp @@ -22,6 +22,7 @@ #include "libpandabase/utils/utf.h" #include "runtime/include/thread.h" #include "runtime/include/panda_vm.h" +#include "runtime/include/coretypes/base_string.h" #include "runtime/mem/free_object.h" #include "runtime/mem/gc/dynamic/gc_dynamic_data.h" @@ -68,13 +69,16 @@ static void DumpArrayClassObject(ObjectHeader *objectHeader, std::basic_ostream< static void DumpStringClass(ObjectHeader *objectHeader, std::basic_ostream> *oStream) { - auto *strObject = static_cast(objectHeader); - if (strObject->GetLength() > 0 && !strObject->IsUtf16()) { + auto *strObject = static_cast(objectHeader); + if (strObject->GetLength() > 0 && strObject->IsUtf8()) { *oStream << "length = " << std::dec << strObject->GetLength() << std::endl; constexpr size_t BUFF_SIZE = 256; std::array buff {0}; - auto strRes = strncpy_s(&buff[0], BUFF_SIZE, reinterpret_cast(strObject->GetDataMUtf8()), - std::min(BUFF_SIZE - 1, static_cast(strObject->GetLength()))); + PandaVector srcVector(strObject->GetUtf8Length()); + strObject->CopyDataRegionUtf8(srcVector.data(), 0, srcVector.size(), srcVector.size()); + + auto strRes = strncpy_s(&buff[0], BUFF_SIZE, reinterpret_cast(srcVector.data()), + std::min(BUFF_SIZE - 1, static_cast(srcVector.size()))); if (UNLIKELY(strRes != EOK)) { LOG(ERROR, RUNTIME) << "Couldn't copy string by strncpy_s, error code: " << strRes; } diff --git a/static_core/runtime/mem/panda_string.cpp b/static_core/runtime/mem/panda_string.cpp index 226e7626c82b2ad302b6758256fc304cee1f65de..c954ee7dfdabfc00ac885e958c688ffc6c052382 100644 --- a/static_core/runtime/mem/panda_string.cpp +++ b/static_core/runtime/mem/panda_string.cpp @@ -20,6 +20,7 @@ #include "libpandabase/macros.h" #include "runtime/include/coretypes/string.h" +#include "runtime/include/coretypes/base_string.h" namespace ark { @@ -85,16 +86,12 @@ PandaString ConvertToString(const std::string &str) PandaString ConvertToString(coretypes::String *s) { ASSERT(s != nullptr); - if (s->IsUtf16()) { - // Should convert utf-16 to utf-8, because uint16_t likely great than MAX_CHAR, will convert fail - size_t len = utf::Utf16ToMUtf8Size(s->GetDataUtf16(), s->GetUtf16Length()) - 1; - PandaVector buf(len); - len = utf::ConvertRegionUtf16ToMUtf8(s->GetDataUtf16(), buf.data(), s->GetUtf16Length(), len, 0); - Span sp(buf.data(), len); - return ConvertToString(sp); - } + coretypes::BaseString *bs = coretypes::BaseString::Cast(s); + size_t len = bs->GetUtf8Length(); + PandaVector buf(len); + bs->CopyDataRegionUtf8(buf.data(), 0, len, len); - Span sp(s->GetDataMUtf8(), s->GetLength()); + Span sp(buf.data(), len); return ConvertToString(sp); } diff --git a/static_core/runtime/object_header.cpp b/static_core/runtime/object_header.cpp index 2a9c960f063e4abecd9a5bc73daeca39a2817b4b..8b6871a633db3d059c112981e382a4cd2c240c0a 100644 --- a/static_core/runtime/object_header.cpp +++ b/static_core/runtime/object_header.cpp @@ -19,6 +19,7 @@ #include "runtime/include/class.h" #include "runtime/include/coretypes/array.h" #include "runtime/include/coretypes/class.h" +#include "runtime/include/coretypes/base_string.h" #include "runtime/include/hclass.h" #include "runtime/include/runtime.h" #include "runtime/include/thread.h" @@ -282,7 +283,7 @@ size_t ObjectHeader::ObjectSizeStatic(BaseClass *baseKlass) const } if (klass->IsStringClass()) { - return static_cast(this)->ObjectSize(); + return static_cast(this)->ObjectSize(); } if (klass->IsClassClass()) { diff --git a/static_core/runtime/options.yaml b/static_core/runtime/options.yaml index 19f754f7e9bb0278920d8202ece3c6972b27a1dd..da7841167b36a551cc7cdb272e4e412d1d179d91 100644 --- a/static_core/runtime/options.yaml +++ b/static_core/runtime/options.yaml @@ -942,3 +942,8 @@ options: type: bool default: true description: Whether to create cache for strings created from integer/floating point numbers. + +- name: use-all-strings + type: bool + default: false + description: Whether to create strings with LineString/SlicedString/TreeString or only LineString. diff --git a/static_core/runtime/runtime.cpp b/static_core/runtime/runtime.cpp index 21cc62078feddeb77799ee5c4365b3457fda8d7f..fdd24e984ced58a4faa6a040e33404660c19f68b 100644 --- a/static_core/runtime/runtime.cpp +++ b/static_core/runtime/runtime.cpp @@ -1208,7 +1208,7 @@ Expected Runtime::ExecutePandaFile(std::string_view filenam ++quickenedFiles; } pandaFiles++; - return true; + return static_cast(true); }); if (quickenedFiles != 0 && quickenedFiles != pandaFiles) { LOG(ERROR, RUNTIME) << "All input files should be either quickened or not. Got " << quickenedFiles @@ -1219,9 +1219,9 @@ Expected Runtime::ExecutePandaFile(std::string_view filenam } else { LOG(ERROR, RUNTIME) << "File " << pf.GetFilename() << " is not quickened"; } - return true; + return static_cast(true); }); - return false; + return static_cast(false); } return Execute(entryPoint, args); diff --git a/static_core/runtime/string_table.cpp b/static_core/runtime/string_table.cpp index 841a4871853cb97985375dfbb2eb4de608c579a1..bbee4cfda4c2e0175bda263987f3bcc331d46eb1 100644 --- a/static_core/runtime/string_table.cpp +++ b/static_core/runtime/string_table.cpp @@ -17,13 +17,14 @@ #include "runtime/include/runtime.h" #include "runtime/mem/object_helpers.h" +#include "runtime/include/coretypes/base_string.h" namespace ark { coretypes::String *StringTable::GetOrInternString(const uint8_t *mutf8Data, uint32_t utf16Length, const LanguageContext &ctx) { - bool canBeCompressed = coretypes::String::CanBeCompressedMUtf8(mutf8Data); + bool canBeCompressed = coretypes::BaseString::CanBeCompressedMUtf8(mutf8Data); auto *str = internalTable_.GetString(mutf8Data, utf16Length, canBeCompressed, ctx); if (str == nullptr) { str = table_.GetOrInternString(mutf8Data, utf16Length, canBeCompressed, ctx); @@ -89,11 +90,12 @@ void StringTable::Table::VisitStrings(const StringVisitor &visitor) coretypes::String *StringTable::Table::GetString(const uint8_t *utf8Data, uint32_t utf16Length, bool canBeCompressed, [[maybe_unused]] const LanguageContext &ctx) { - uint32_t hashCode = coretypes::String::ComputeHashcodeMutf8(utf8Data, utf16Length, canBeCompressed); + uint32_t hashCode = coretypes::BaseString::ComputeHashcodeMutf8(utf8Data, utf16Length, canBeCompressed); os::memory::ReadLockHolder holder(tableLock_); for (auto it = table_.find(hashCode); it != table_.end(); it++) { auto foundString = it->second; - if (coretypes::String::StringsAreEqualMUtf8(foundString, utf8Data, utf16Length, canBeCompressed)) { + if (coretypes::BaseString::StringsAreEqualMUtf8(coretypes::BaseString::Cast((foundString)), utf8Data, + utf16Length, canBeCompressed)) { return foundString; } } @@ -103,11 +105,12 @@ coretypes::String *StringTable::Table::GetString(const uint8_t *utf8Data, uint32 coretypes::String *StringTable::Table::GetString(const uint16_t *utf16Data, uint32_t utf16Length, [[maybe_unused]] const LanguageContext &ctx) { - uint32_t hashCode = coretypes::String::ComputeHashcodeUtf16(const_cast(utf16Data), utf16Length); + uint32_t hashCode = coretypes::BaseString::ComputeHashcodeUtf16(utf16Data, utf16Length); os::memory::ReadLockHolder holder(tableLock_); for (auto it = table_.find(hashCode); it != table_.end(); it++) { auto foundString = it->second; - if (coretypes::String::StringsAreEqualUtf16(foundString, utf16Data, utf16Length)) { + if (coretypes::BaseString::StringsAreEqualUtf16(coretypes::BaseString::Cast((foundString)), utf16Data, + utf16Length)) { return foundString; } } @@ -118,10 +121,11 @@ coretypes::String *StringTable::Table::GetString(coretypes::String *string, [[ma { ASSERT(string != nullptr); os::memory::ReadLockHolder holder(tableLock_); - auto hash = string->GetHashcode(); + auto hash = coretypes::BaseString::Cast(string)->GetHashcode(); for (auto it = table_.find(hash); it != table_.end(); it++) { auto foundString = it->second; - if (coretypes::String::StringsAreEqual(foundString, string)) { + if (coretypes::BaseString::StringsAreEqual(coretypes::BaseString::Cast((foundString)), + coretypes::BaseString::Cast((string)))) { return foundString; } } @@ -131,19 +135,20 @@ coretypes::String *StringTable::Table::GetString(coretypes::String *string, [[ma void StringTable::Table::ForceInternString(coretypes::String *string, [[maybe_unused]] const LanguageContext &ctx) { os::memory::WriteLockHolder holder(tableLock_); - table_.insert(std::pair(string->GetHashcode(), string)); + table_.insert(std::pair(coretypes::BaseString::Cast(string)->GetHashcode(), string)); } coretypes::String *StringTable::Table::InternString(coretypes::String *string, [[maybe_unused]] const LanguageContext &ctx) { ASSERT(string != nullptr); - uint32_t hashCode = string->GetHashcode(); + uint32_t hashCode = coretypes::BaseString::Cast(string)->GetHashcode(); os::memory::WriteLockHolder holder(tableLock_); // Check string is not present before actually creating and inserting for (auto it = table_.find(hashCode); it != table_.end(); it++) { auto foundString = it->second; - if (coretypes::String::StringsAreEqual(foundString, string)) { + if (coretypes::BaseString::StringsAreEqual(coretypes::BaseString::Cast((foundString)), + coretypes::BaseString::Cast((string)))) { return foundString; } } diff --git a/static_core/runtime/tests/mem_stats_additional_info_test.cpp b/static_core/runtime/tests/mem_stats_additional_info_test.cpp index 8a8b9ff5939ea4a649e98cbd0f399ef5db3baac8..d6cfbd4ae2ae82471b9234d3485deb5c19d141f3 100644 --- a/static_core/runtime/tests/mem_stats_additional_info_test.cpp +++ b/static_core/runtime/tests/mem_stats_additional_info_test.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "gtest/gtest.h" diff --git a/static_core/runtime/tooling/pt_default_lang_extension.cpp b/static_core/runtime/tooling/pt_default_lang_extension.cpp index a616fd42c4de2505b1145f93fd4dc11d1d9e79d2..b261cd79a80d153111ec02043c18346c570ba05a 100644 --- a/static_core/runtime/tooling/pt_default_lang_extension.cpp +++ b/static_core/runtime/tooling/pt_default_lang_extension.cpp @@ -21,6 +21,7 @@ #include "include/object_header.h" #include "include/hclass.h" #include "runtime/mem/object_helpers-inl.h" +#include "runtime/include/coretypes/base_string.h" namespace ark::tooling { @@ -114,7 +115,7 @@ std::optional PtStaticDefaultExtension::GetAsString(const ObjectHea std::string value; - auto string = coretypes::String::Cast(const_cast(object)); + auto string = coretypes::BaseString::Cast(const_cast(object)); for (auto index = 0U; index < string->GetLength(); ++index) { value.push_back(string->At(index)); }