Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions ext/bson/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ static void pvt_put_bson_key(byte_buffer_t *b, VALUE string);
static VALUE pvt_bson_byte_buffer_put_bson_partial_string(VALUE self, const char *str, int32_t length);
static VALUE pvt_bson_byte_buffer_put_binary_string(VALUE self, const char *str, int32_t length);

/* Raise ArgumentError if length exceeds the BSON int32_t string-length limit.
* The bound is INT32_MAX - 5 rather than INT32_MAX because the binary-string
* path writes length + 5 bytes total (4-byte int32 length prefix + payload +
* 1-byte null terminator). Allowing length == INT32_MAX would make that
* addition overflow signed 32-bit arithmetic (undefined behavior in C). */
static void pvt_check_string_length(long length) {
if (length > INT32_MAX - 5) {
rb_raise(rb_eArgError, "String length %ld exceeds BSON maximum", length);
}
}

static int fits_int32(int64_t i64){
return i64 >= INT32_MIN && i64 <= INT32_MAX;
}
Expand Down Expand Up @@ -211,7 +222,7 @@ static VALUE pvt_bson_encode_to_utf8(VALUE string) {
VALUE encoding;
VALUE utf8_string;
const char *str;
int32_t length;
long length;

existing_encoding_name = rb_funcall(
rb_funcall(string, rb_intern("encoding"), 0),
Expand All @@ -222,6 +233,7 @@ static VALUE pvt_bson_encode_to_utf8(VALUE string) {

str = RSTRING_PTR(utf8_string);
length = RSTRING_LEN(utf8_string);
pvt_check_string_length(length);

rb_bson_utf8_validate(str, length, true, "String");
} else {
Expand All @@ -238,17 +250,18 @@ VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string)
{
VALUE utf8_string;
const char *str;
int32_t length;
long length;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want to change this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's because RSTRING_LEN returns long, not int32_t, so the result could (in extreme cases) be cast from long to a large negative int32_t, which could (potentially) cause problems.


utf8_string = pvt_bson_encode_to_utf8(string);
/* At this point utf8_string contains valid utf-8 byte sequences only */

str = RSTRING_PTR(utf8_string);
length = RSTRING_LEN(utf8_string);
pvt_check_string_length(length);

RB_GC_GUARD(utf8_string);

return pvt_bson_byte_buffer_put_binary_string(self, str, length);
return pvt_bson_byte_buffer_put_binary_string(self, str, (int32_t)length);
}

/**
Expand All @@ -271,7 +284,7 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE obj)
{
VALUE string;
const char *str;
int32_t length;
long length;

switch (TYPE(obj)) {
case T_STRING:
Expand All @@ -289,8 +302,9 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE obj)

str = RSTRING_PTR(string);
length = RSTRING_LEN(string);
pvt_check_string_length(length);
RB_GC_GUARD(string);
return pvt_bson_byte_buffer_put_bson_partial_string(self, str, length);
return pvt_bson_byte_buffer_put_bson_partial_string(self, str, (int32_t)length);
}

/**
Expand All @@ -317,20 +331,22 @@ VALUE rb_bson_byte_buffer_put_symbol(VALUE self, VALUE symbol)
{
VALUE symbol_str = rb_sym_to_s(symbol);
const char *str = RSTRING_PTR(symbol_str);
const int32_t length = RSTRING_LEN(symbol_str);
long length = RSTRING_LEN(symbol_str);
pvt_check_string_length(length);

RB_GC_GUARD(symbol_str);
return pvt_bson_byte_buffer_put_binary_string(self, str, length);
return pvt_bson_byte_buffer_put_binary_string(self, str, (int32_t)length);
}

/**
* Write a hash key to the byte buffer, validating it if requested
*/
void pvt_put_bson_key(byte_buffer_t *b, VALUE string){
char *c_str = RSTRING_PTR(string);
size_t length = RSTRING_LEN(string);
long length = RSTRING_LEN(string);
pvt_check_string_length(length);

pvt_put_cstring(b, c_str, length, "Key");
pvt_put_cstring(b, c_str, (int32_t)length, "Key");
}

void pvt_replace_int32(byte_buffer_t *b, int32_t position, int32_t newval)
Expand Down
64 changes: 64 additions & 0 deletions spec/bson/string_length_overflow_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# frozen_string_literal: true

# Copyright (C) 2026 MongoDB Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

require 'spec_helper'

# Tests for the fix of RUBY-3894: Heap Buffer Overflow in put_string.
#
# BSON encodes string lengths as int32_t. A Ruby string longer than INT32_MAX
# bytes caused a silent integer truncation in the C extension, which then
# passed the resulting negative value to the UTF-8 validator as size_t, wrapping
# to near UINT64_MAX and driving reads far past the heap allocation.
#
# These tests require approximately 2 GB of free memory and only run when
# STRESS=1 is set in the environment. They are also MRI-only because the bug
# lives in the C native extension.
describe 'BSON string length overflow protection' do
before(:all) do
skip 'C native extension not used on JRuby' if BSON::Environment.jruby?
skip 'Set STRESS=1 to run tests that require ~2 GB of free memory' unless ENV['STRESS'] == '1'
end

let(:buffer) { BSON::ByteBuffer.new }

# 2**31 bytes is one byte past INT32_MAX, which is the smallest input that
# triggers the int32_t truncation.
let(:huge_string) { 'A' * (2**31) }

describe '#put_string' do
it 'raises ArgumentError instead of overflowing' do
expect do
buffer.put_string(huge_string)
end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/)
end
end

describe '#put_cstring' do
it 'raises ArgumentError instead of overflowing' do
expect do
buffer.put_cstring(huge_string)
end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/)
end
end

describe '#put_symbol' do
it 'raises ArgumentError instead of overflowing' do
expect do
buffer.put_symbol(huge_string.to_sym)
end.to raise_error(ArgumentError, /String length \d+ exceeds BSON maximum/)
end
end
end
Loading