Skip to content

Commit 3da34b6

Browse files
committed
src: optimize utf-8 byte length calculation using simdutf
1 parent a9817fc commit 3da34b6

1 file changed

Lines changed: 19 additions & 46 deletions

File tree

src/node_buffer.cc

Lines changed: 19 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -764,40 +764,30 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
764764
Isolate* isolate = args.GetIsolate();
765765
Local<String> source = args[0].As<String>();
766766

767-
static constexpr int kSmallStringThreshold = 128;
768-
if (source->Length() <= kSmallStringThreshold) {
769-
size_t result = source->Utf8LengthV2(isolate);
770-
args.GetReturnValue().Set(static_cast<uint64_t>(result));
767+
int length = source->Length();
768+
769+
if (source->IsOneByte()) {
770+
args.GetReturnValue().Set(
771+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
771772
return;
772773
}
773774

774-
size_t utf8_length;
775-
776-
// Fast path for external one-byte strings (common case for ASCII/Latin1)
777-
if (source->IsExternalOneByte()) {
778-
auto ext = source->GetExternalOneByteStringResource();
779-
utf8_length = simdutf::utf8_length_from_latin1(ext->data(), ext->length());
780-
args.GetReturnValue().Set(static_cast<uint64_t>(utf8_length));
775+
static constexpr int kSmallStringThreshold = 128;
776+
if (length <= kSmallStringThreshold) {
777+
args.GetReturnValue().Set(
778+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
781779
return;
782780
}
783781

784-
// For non-external strings, use ValueView
785782
String::ValueView view(isolate, source);
786-
size_t length = view.length();
787-
788-
if (view.is_one_byte()) {
789-
auto data = reinterpret_cast<const char*>(view.data8());
790-
utf8_length = simdutf::utf8_length_from_latin1(data, length);
791-
} else {
792-
auto data = reinterpret_cast<const char16_t*>(view.data16());
793-
if (simdutf::validate_utf16(data, length)) {
794-
utf8_length = simdutf::utf8_length_from_utf16(data, length);
795-
} else {
796-
utf8_length = source->Utf8LengthV2(isolate);
797-
}
783+
auto data = reinterpret_cast<const char16_t*>(view.data16());
784+
if (simdutf::validate_utf16(data, length)) {
785+
args.GetReturnValue().Set(
786+
static_cast<uint64_t>(simdutf::utf8_length_from_utf16(data, length)));
787+
return;
798788
}
799-
800-
args.GetReturnValue().Set(static_cast<uint64_t>(utf8_length));
789+
args.GetReturnValue().Set(
790+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
801791
}
802792

803793
uint32_t FastByteLengthUtf8(
@@ -812,33 +802,16 @@ uint32_t FastByteLengthUtf8(
812802

813803
int length = sourceStr->Length();
814804

815-
// For short inputs, use V8's path - function call overhead not worth it
816-
static constexpr int kSmallStringThreshold = 128;
817-
if (length <= kSmallStringThreshold) {
805+
if (sourceStr->IsOneByte()) {
818806
return sourceStr->Utf8LengthV2(isolate);
819807
}
820808

821-
// Fast path for external one-byte strings (common case for ASCII/Latin1)
822-
if (sourceStr->IsExternalOneByte()) {
823-
auto ext = sourceStr->GetExternalOneByteStringResource();
824-
return simdutf::utf8_length_from_latin1(ext->data(), ext->length());
825-
}
826-
827-
// For one-byte (Latin1/ASCII) strings, V8 is already fast and ValueView
828-
// creation has overhead. Use higher threshold before switching to simdutf.
829-
static constexpr int kOneByteLargeThreshold = 1024;
830-
if (sourceStr->IsOneByte() && length <= kOneByteLargeThreshold) {
809+
static constexpr int kSmallStringThreshold = 128;
810+
if (length <= kSmallStringThreshold) {
831811
return sourceStr->Utf8LengthV2(isolate);
832812
}
833813

834-
// For larger strings or two-byte strings, use ValueView + simdutf
835814
String::ValueView view(isolate, sourceStr);
836-
837-
if (view.is_one_byte()) {
838-
auto data = reinterpret_cast<const char*>(view.data8());
839-
return simdutf::utf8_length_from_latin1(data, length);
840-
}
841-
842815
auto data = reinterpret_cast<const char16_t*>(view.data16());
843816
if (simdutf::validate_utf16(data, length)) {
844817
return simdutf::utf8_length_from_utf16(data, length);

0 commit comments

Comments
 (0)