diff --git a/src/node_buffer.cc b/src/node_buffer.cc index e40a21288ee79d..c681ecb675c982 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -761,9 +761,27 @@ void StringWrite(const FunctionCallbackInfo& args) { void SlowByteLengthUtf8(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); - // Fast case: avoid StringBytes on UTF8 string. Jump to v8. - size_t result = args[0].As()->Utf8LengthV2(args.GetIsolate()); - args.GetReturnValue().Set(static_cast(result)); + Isolate* isolate = args.GetIsolate(); + Local source = args[0].As(); + + static constexpr int kSmallStringThreshold = 128; + int length = source->Length(); + + if (length <= kSmallStringThreshold || source->IsOneByte()) { + args.GetReturnValue().Set( + static_cast(source->Utf8LengthV2(isolate))); + return; + } + + String::ValueView view(isolate, source); + auto data = reinterpret_cast(view.data16()); + if (simdutf::validate_utf16(data, length)) { + args.GetReturnValue().Set( + static_cast(simdutf::utf8_length_from_utf16(data, length))); + return; + } + args.GetReturnValue().Set( + static_cast(source->Utf8LengthV2(isolate))); } uint32_t FastByteLengthUtf8( @@ -776,49 +794,19 @@ uint32_t FastByteLengthUtf8( CHECK(sourceValue->IsString()); Local sourceStr = sourceValue.As(); - if (!sourceStr->IsExternalOneByte()) { + static constexpr int kSmallStringThreshold = 128; + int length = sourceStr->Length(); + + if (length <= kSmallStringThreshold || sourceStr->IsOneByte()) { return sourceStr->Utf8LengthV2(isolate); } - auto source = sourceStr->GetExternalOneByteStringResource(); - // For short inputs, the function call overhead to simdutf is maybe - // not worth it, reserve simdutf for long strings. - if (source->length() > 128) { - return simdutf::utf8_length_from_latin1(source->data(), source->length()); - } - - uint32_t length = source->length(); - const auto input = reinterpret_cast(source->data()); - - uint32_t answer = length; - uint32_t i = 0; - - auto pop = [](uint64_t v) { - return static_cast(((v >> 7) & UINT64_C(0x0101010101010101)) * - UINT64_C(0x0101010101010101) >> - 56); - }; - for (; i + 32 <= length; i += 32) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - memcpy(&v, input + i + 8, 8); - answer += pop(v); - memcpy(&v, input + i + 16, 8); - answer += pop(v); - memcpy(&v, input + i + 24, 8); - answer += pop(v); + String::ValueView view(isolate, sourceStr); + auto data = reinterpret_cast(view.data16()); + if (simdutf::validate_utf16(data, length)) { + return simdutf::utf8_length_from_utf16(data, length); } - for (; i + 8 <= length; i += 8) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - } - for (; i + 1 <= length; i += 1) { - answer += input[i] >> 7; - } - - return answer; + return sourceStr->Utf8LengthV2(isolate); } static CFunction fast_byte_length_utf8(CFunction::Make(FastByteLengthUtf8)); @@ -1252,8 +1240,7 @@ static void IsAscii(const FunctionCallbackInfo& args) { env, "Cannot validate on a detached buffer"); } - args.GetReturnValue().Set( - !simdutf::validate_ascii_with_errors(abv.data(), abv.length()).error); + args.GetReturnValue().Set(simdutf::validate_ascii(abv.data(), abv.length())); } void SetBufferPrototype(const FunctionCallbackInfo& args) {