diff --git a/ext/bson/write.c b/ext/bson/write.c index c426129a1..42d565e77 100644 --- a/ext/bson/write.c +++ b/ext/bson/write.c @@ -34,6 +34,17 @@ static void pvt_put_bson_key(byte_buffer_t *b, VALUE string); static VALUE pvt_bson_byte_buffer_put_bson_partial_string(VALUE self, const char *str, int32_t length); static VALUE pvt_bson_byte_buffer_put_binary_string(VALUE self, const char *str, int32_t length); +/* Raise ArgumentError if length exceeds the BSON int32_t string-length limit. + * The bound is INT32_MAX - 5 rather than INT32_MAX because the binary-string + * path writes length + 5 bytes total (4-byte int32 length prefix + payload + + * 1-byte null terminator). Allowing length == INT32_MAX would make that + * addition overflow signed 32-bit arithmetic (undefined behavior in C). */ +static void pvt_check_string_length(long length) { + if (length > INT32_MAX - 5) { + rb_raise(rb_eArgError, "String length %ld exceeds BSON maximum", length); + } +} + static int fits_int32(int64_t i64){ return i64 >= INT32_MIN && i64 <= INT32_MAX; } @@ -211,7 +222,7 @@ static VALUE pvt_bson_encode_to_utf8(VALUE string) { VALUE encoding; VALUE utf8_string; const char *str; - int32_t length; + long length; existing_encoding_name = rb_funcall( rb_funcall(string, rb_intern("encoding"), 0), @@ -222,6 +233,7 @@ static VALUE pvt_bson_encode_to_utf8(VALUE string) { str = RSTRING_PTR(utf8_string); length = RSTRING_LEN(utf8_string); + pvt_check_string_length(length); rb_bson_utf8_validate(str, length, true, "String"); } else { @@ -238,17 +250,18 @@ VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string) { VALUE utf8_string; const char *str; - int32_t length; + long length; utf8_string = pvt_bson_encode_to_utf8(string); /* At this point utf8_string contains valid utf-8 byte sequences only */ str = RSTRING_PTR(utf8_string); length = RSTRING_LEN(utf8_string); + pvt_check_string_length(length); RB_GC_GUARD(utf8_string); - return pvt_bson_byte_buffer_put_binary_string(self, str, length); + return pvt_bson_byte_buffer_put_binary_string(self, str, (int32_t)length); } /** @@ -271,7 +284,7 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE obj) { VALUE string; const char *str; - int32_t length; + long length; switch (TYPE(obj)) { case T_STRING: @@ -289,8 +302,9 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE obj) str = RSTRING_PTR(string); length = RSTRING_LEN(string); + pvt_check_string_length(length); RB_GC_GUARD(string); - return pvt_bson_byte_buffer_put_bson_partial_string(self, str, length); + return pvt_bson_byte_buffer_put_bson_partial_string(self, str, (int32_t)length); } /** @@ -317,10 +331,11 @@ VALUE rb_bson_byte_buffer_put_symbol(VALUE self, VALUE symbol) { VALUE symbol_str = rb_sym_to_s(symbol); const char *str = RSTRING_PTR(symbol_str); - const int32_t length = RSTRING_LEN(symbol_str); + long length = RSTRING_LEN(symbol_str); + pvt_check_string_length(length); RB_GC_GUARD(symbol_str); - return pvt_bson_byte_buffer_put_binary_string(self, str, length); + return pvt_bson_byte_buffer_put_binary_string(self, str, (int32_t)length); } /** @@ -328,9 +343,10 @@ VALUE rb_bson_byte_buffer_put_symbol(VALUE self, VALUE symbol) */ void pvt_put_bson_key(byte_buffer_t *b, VALUE string){ char *c_str = RSTRING_PTR(string); - size_t length = RSTRING_LEN(string); + long length = RSTRING_LEN(string); + pvt_check_string_length(length); - pvt_put_cstring(b, c_str, length, "Key"); + pvt_put_cstring(b, c_str, (int32_t)length, "Key"); } void pvt_replace_int32(byte_buffer_t *b, int32_t position, int32_t newval) diff --git a/spec/bson/string_length_overflow_spec.rb b/spec/bson/string_length_overflow_spec.rb new file mode 100644 index 000000000..b4ca9a1bd --- /dev/null +++ b/spec/bson/string_length_overflow_spec.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +# Copyright (C) 2026 MongoDB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'spec_helper' + +# Tests for the fix of RUBY-3894: Heap Buffer Overflow in put_string. +# +# BSON encodes string lengths as int32_t. A Ruby string longer than INT32_MAX +# bytes caused a silent integer truncation in the C extension, which then +# passed the resulting negative value to the UTF-8 validator as size_t, wrapping +# to near UINT64_MAX and driving reads far past the heap allocation. +# +# These tests require approximately 2 GB of free memory and only run when +# STRESS=1 is set in the environment. They are also MRI-only because the bug +# lives in the C native extension. +describe 'BSON string length overflow protection' do + before(:all) do + skip 'C native extension not used on JRuby' if BSON::Environment.jruby? + skip 'Set STRESS=1 to run tests that require ~2 GB of free memory' unless ENV['STRESS'] == '1' + end + + let(:buffer) { BSON::ByteBuffer.new } + + # 2**31 bytes is one byte past INT32_MAX, which is the smallest input that + # triggers the int32_t truncation. + let(:huge_string) { 'A' * (2**31) } + + describe '#put_string' do + it 'raises ArgumentError instead of overflowing' do + expect do + buffer.put_string(huge_string) + end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/) + end + end + + describe '#put_cstring' do + it 'raises ArgumentError instead of overflowing' do + expect do + buffer.put_cstring(huge_string) + end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/) + end + end + + describe '#put_symbol' do + it 'raises ArgumentError instead of overflowing' do + expect do + buffer.put_symbol(huge_string.to_sym) + end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/) + end + end +end