From 2491570c5a6c5acec63d4f29d72a5d0db4e87ec1 Mon Sep 17 00:00:00 2001 From: Christian Leopoldseder Date: Mon, 27 Apr 2026 07:44:39 -0700 Subject: [PATCH] feat: GenAI SDK client(multimodal) - Add `to_batch_job_source` and `get_batch_job_destination` to `MultimodalDataset` PiperOrigin-RevId: 906352851 --- .../test_create_multimodal_datasets.py | 9 ++++++ .../genai/test_multimodal_datasets_genai.py | 31 +++++++++++++++++++ vertexai/_genai/_datasets_utils.py | 7 +++++ vertexai/_genai/types/common.py | 22 +++++++++++++ 4 files changed, 69 insertions(+) diff --git a/tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py b/tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py index 7925380121..78a65b78b8 100644 --- a/tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py +++ b/tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py @@ -81,6 +81,15 @@ def mock_generate_multimodal_dataset_display_name(): yield mock_generate +@pytest.fixture +def mock_get_batch_job_unique_name(): + with mock.patch.object( + _datasets_utils, "get_batch_job_unique_name" + ) as mock_unique_name: + mock_unique_name.return_value = "12345678901234_abcde" + yield mock_unique_name + + def test_create_dataset(client): create_dataset_operation = client.datasets._create_multimodal_dataset( name="projects/vertex-sdk-dev/locations/us-central1", diff --git a/tests/unit/vertexai/genai/test_multimodal_datasets_genai.py b/tests/unit/vertexai/genai/test_multimodal_datasets_genai.py index c120bcc95c..699a46eb9e 100644 --- a/tests/unit/vertexai/genai/test_multimodal_datasets_genai.py +++ b/tests/unit/vertexai/genai/test_multimodal_datasets_genai.py @@ -36,6 +36,15 @@ def mock_import_bigframes(): yield mock_import_bigframes +@pytest.fixture +def mock_get_batch_job_unique_name(): + with mock.patch.object( + _datasets_utils, "get_batch_job_unique_name" + ) as mock_unique_name: + mock_unique_name.return_value = "12345678901234_abcde" + yield mock_unique_name + + class TestMultimodalDataset: def test_read_config(self): @@ -157,6 +166,28 @@ def test_to_bigframes(self, mock_import_bigframes): "project.dataset.table" ) + def test_get_batch_job_destination(self, mock_get_batch_job_unique_name): + dataset = types.MultimodalDataset( + name="projects/vertex-sdk-dev/locations/us-central1/datasets/12345", + display_name="test_multimodal_dataset", + metadata={ + "inputConfig": { + "bigquerySource": { + "uri": "bq://target_project.target_dataset.target_table" + }, + }, + }, + ) + destination = dataset.get_batch_job_destination() + assert ( + destination.vertex_dataset.display_name + == "test_multimodal_dataset_batch_output_12345678901234_abcde" + ) + assert ( + destination.vertex_dataset.bigquery_destination + == "bq://target_project.target_dataset.target_table_batch_output_12345678901234_abcde" + ) + class TestGeminiRequestReadConfig: def test_single_turn_template(self): diff --git a/vertexai/_genai/_datasets_utils.py b/vertexai/_genai/_datasets_utils.py index bf2ffd7cf2..866cb18f85 100644 --- a/vertexai/_genai/_datasets_utils.py +++ b/vertexai/_genai/_datasets_utils.py @@ -242,6 +242,13 @@ def generate_multimodal_dataset_display_name() -> str: return f"MultimodalDataset {datetime.datetime.now().isoformat(sep=' ')}" +def get_batch_job_unique_name() -> str: + """Generates a unique name suffix for a batch job destination.""" + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + unique_id = uuid.uuid4().hex[0:5] + return f"{timestamp}_{unique_id}" + + def save_dataframe_to_bigquery( dataframe: "bigframes.pandas.DataFrame", # type: ignore # noqa: F821 target_table_id: str, diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 12e3a4549f..50a23f950b 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -15334,6 +15334,28 @@ def to_bigframes( raise ValueError("Multimodal dataset bigquery source uri is not set.") return bigframes.pandas.read_gbq_table(self.bigquery_uri.removeprefix("bq://")) + def to_batch_job_source(self) -> "genai_types.BatchJobSource": + """Converts the dataset to a BatchJobSource.""" + return genai_types.BatchJobSource( + vertex_dataset_name=self.name, + ) + + def get_batch_job_destination(self) -> "genai_types.BatchJobDestination": + """Converts the dataset to a BatchJobDestination.""" + from .. import _datasets_utils + + unique_name = _datasets_utils.get_batch_job_unique_name() + bigquery_uri = self.bigquery_uri + if bigquery_uri is None: + raise ValueError("Multimodal dataset bigquery source uri is not set.") + curr_display_name = self.display_name or "genai_batch_job" + return genai_types.BatchJobDestination( + vertex_dataset=genai_types.VertexMultimodalDatasetDestination( + display_name=f"{curr_display_name}_batch_output_{unique_name}", + bigquery_destination=f"{bigquery_uri}_batch_output_{unique_name}", + ) + ) + class MultimodalDatasetDict(TypedDict, total=False): """Represents a multimodal dataset."""