Skip to content

Commit ef28e76

Browse files
committed
src: optimize utf-8 byte length calculation using simdutf
1 parent 330e3ee commit ef28e76

1 file changed

Lines changed: 40 additions & 43 deletions

File tree

src/node_buffer.cc

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -761,9 +761,33 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
761761
void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
762762
CHECK(args[0]->IsString());
763763

764-
// Fast case: avoid StringBytes on UTF8 string. Jump to v8.
765-
size_t result = args[0].As<String>()->Utf8LengthV2(args.GetIsolate());
766-
args.GetReturnValue().Set(static_cast<uint64_t>(result));
764+
Isolate* isolate = args.GetIsolate();
765+
Local<String> source = args[0].As<String>();
766+
767+
int length = source->Length();
768+
769+
if (source->IsOneByte()) {
770+
args.GetReturnValue().Set(
771+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
772+
return;
773+
}
774+
775+
static constexpr int kSmallStringThreshold = 128;
776+
if (length <= kSmallStringThreshold) {
777+
args.GetReturnValue().Set(
778+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
779+
return;
780+
}
781+
782+
String::ValueView view(isolate, source);
783+
auto data = reinterpret_cast<const char16_t*>(view.data16());
784+
if (simdutf::validate_utf16(data, length)) {
785+
args.GetReturnValue().Set(
786+
static_cast<uint64_t>(simdutf::utf8_length_from_utf16(data, length)));
787+
return;
788+
}
789+
args.GetReturnValue().Set(
790+
static_cast<uint64_t>(source->Utf8LengthV2(isolate)));
767791
}
768792

769793
uint32_t FastByteLengthUtf8(
@@ -776,49 +800,23 @@ uint32_t FastByteLengthUtf8(
776800
CHECK(sourceValue->IsString());
777801
Local<String> sourceStr = sourceValue.As<String>();
778802

779-
if (!sourceStr->IsExternalOneByte()) {
803+
int length = sourceStr->Length();
804+
805+
if (sourceStr->IsOneByte()) {
780806
return sourceStr->Utf8LengthV2(isolate);
781807
}
782-
auto source = sourceStr->GetExternalOneByteStringResource();
783-
// For short inputs, the function call overhead to simdutf is maybe
784-
// not worth it, reserve simdutf for long strings.
785-
if (source->length() > 128) {
786-
return simdutf::utf8_length_from_latin1(source->data(), source->length());
787-
}
788-
789-
uint32_t length = source->length();
790-
const auto input = reinterpret_cast<const uint8_t*>(source->data());
791-
792-
uint32_t answer = length;
793-
uint32_t i = 0;
794-
795-
auto pop = [](uint64_t v) {
796-
return static_cast<size_t>(((v >> 7) & UINT64_C(0x0101010101010101)) *
797-
UINT64_C(0x0101010101010101) >>
798-
56);
799-
};
800808

801-
for (; i + 32 <= length; i += 32) {
802-
uint64_t v;
803-
memcpy(&v, input + i, 8);
804-
answer += pop(v);
805-
memcpy(&v, input + i + 8, 8);
806-
answer += pop(v);
807-
memcpy(&v, input + i + 16, 8);
808-
answer += pop(v);
809-
memcpy(&v, input + i + 24, 8);
810-
answer += pop(v);
811-
}
812-
for (; i + 8 <= length; i += 8) {
813-
uint64_t v;
814-
memcpy(&v, input + i, 8);
815-
answer += pop(v);
816-
}
817-
for (; i + 1 <= length; i += 1) {
818-
answer += input[i] >> 7;
809+
static constexpr int kSmallStringThreshold = 128;
810+
if (length <= kSmallStringThreshold) {
811+
return sourceStr->Utf8LengthV2(isolate);
819812
}
820813

821-
return answer;
814+
String::ValueView view(isolate, sourceStr);
815+
auto data = reinterpret_cast<const char16_t*>(view.data16());
816+
if (simdutf::validate_utf16(data, length)) {
817+
return simdutf::utf8_length_from_utf16(data, length);
818+
}
819+
return sourceStr->Utf8LengthV2(isolate);
822820
}
823821

824822
static CFunction fast_byte_length_utf8(CFunction::Make(FastByteLengthUtf8));
@@ -1252,8 +1250,7 @@ static void IsAscii(const FunctionCallbackInfo<Value>& args) {
12521250
env, "Cannot validate on a detached buffer");
12531251
}
12541252

1255-
args.GetReturnValue().Set(
1256-
!simdutf::validate_ascii_with_errors(abv.data(), abv.length()).error);
1253+
args.GetReturnValue().Set(simdutf::validate_ascii(abv.data(), abv.length()));
12571254
}
12581255

12591256
void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {

0 commit comments

Comments
 (0)