@@ -761,9 +761,33 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
761761void SlowByteLengthUtf8 (const FunctionCallbackInfo<Value>& args) {
762762 CHECK (args[0 ]->IsString ());
763763
764- // Fast case: avoid StringBytes on UTF8 string. Jump to v8.
765- size_t result = args[0 ].As <String>()->Utf8LengthV2 (args.GetIsolate ());
766- args.GetReturnValue ().Set (static_cast <uint64_t >(result));
764+ Isolate* isolate = args.GetIsolate ();
765+ Local<String> source = args[0 ].As <String>();
766+
767+ int length = source->Length ();
768+
769+ if (source->IsOneByte ()) {
770+ args.GetReturnValue ().Set (
771+ static_cast <uint64_t >(source->Utf8LengthV2 (isolate)));
772+ return ;
773+ }
774+
775+ static constexpr int kSmallStringThreshold = 128 ;
776+ if (length <= kSmallStringThreshold ) {
777+ args.GetReturnValue ().Set (
778+ static_cast <uint64_t >(source->Utf8LengthV2 (isolate)));
779+ return ;
780+ }
781+
782+ String::ValueView view (isolate, source);
783+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
784+ if (simdutf::validate_utf16 (data, length)) {
785+ args.GetReturnValue ().Set (
786+ static_cast <uint64_t >(simdutf::utf8_length_from_utf16 (data, length)));
787+ return ;
788+ }
789+ args.GetReturnValue ().Set (
790+ static_cast <uint64_t >(source->Utf8LengthV2 (isolate)));
767791}
768792
769793uint32_t FastByteLengthUtf8 (
@@ -776,49 +800,23 @@ uint32_t FastByteLengthUtf8(
776800 CHECK (sourceValue->IsString ());
777801 Local<String> sourceStr = sourceValue.As <String>();
778802
779- if (!sourceStr->IsExternalOneByte ()) {
803+ int length = sourceStr->Length ();
804+
805+ if (sourceStr->IsOneByte ()) {
780806 return sourceStr->Utf8LengthV2 (isolate);
781807 }
782- auto source = sourceStr->GetExternalOneByteStringResource ();
783- // For short inputs, the function call overhead to simdutf is maybe
784- // not worth it, reserve simdutf for long strings.
785- if (source->length () > 128 ) {
786- return simdutf::utf8_length_from_latin1 (source->data (), source->length ());
787- }
788-
789- uint32_t length = source->length ();
790- const auto input = reinterpret_cast <const uint8_t *>(source->data ());
791-
792- uint32_t answer = length;
793- uint32_t i = 0 ;
794-
795- auto pop = [](uint64_t v) {
796- return static_cast <size_t >(((v >> 7 ) & UINT64_C (0x0101010101010101 )) *
797- UINT64_C (0x0101010101010101 ) >>
798- 56 );
799- };
800808
801- for (; i + 32 <= length; i += 32 ) {
802- uint64_t v;
803- memcpy (&v, input + i, 8 );
804- answer += pop (v);
805- memcpy (&v, input + i + 8 , 8 );
806- answer += pop (v);
807- memcpy (&v, input + i + 16 , 8 );
808- answer += pop (v);
809- memcpy (&v, input + i + 24 , 8 );
810- answer += pop (v);
811- }
812- for (; i + 8 <= length; i += 8 ) {
813- uint64_t v;
814- memcpy (&v, input + i, 8 );
815- answer += pop (v);
816- }
817- for (; i + 1 <= length; i += 1 ) {
818- answer += input[i] >> 7 ;
809+ static constexpr int kSmallStringThreshold = 128 ;
810+ if (length <= kSmallStringThreshold ) {
811+ return sourceStr->Utf8LengthV2 (isolate);
819812 }
820813
821- return answer;
814+ String::ValueView view (isolate, sourceStr);
815+ auto data = reinterpret_cast <const char16_t *>(view.data16 ());
816+ if (simdutf::validate_utf16 (data, length)) {
817+ return simdutf::utf8_length_from_utf16 (data, length);
818+ }
819+ return sourceStr->Utf8LengthV2 (isolate);
822820}
823821
824822static CFunction fast_byte_length_utf8 (CFunction::Make(FastByteLengthUtf8));
@@ -1252,8 +1250,7 @@ static void IsAscii(const FunctionCallbackInfo<Value>& args) {
12521250 env, " Cannot validate on a detached buffer" );
12531251 }
12541252
1255- args.GetReturnValue ().Set (
1256- !simdutf::validate_ascii_with_errors (abv.data (), abv.length ()).error );
1253+ args.GetReturnValue ().Set (simdutf::validate_ascii (abv.data (), abv.length ()));
12571254}
12581255
12591256void SetBufferPrototype (const FunctionCallbackInfo<Value>& args) {
0 commit comments