Merge pull request #53944 from GraemeMalcolm/main

ttorble · web-flow · commit e479d66b391f · 2026-03-24T07:36:55.000Z
Updated gen-ai image module
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/1-introduction.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/1-introduction.yml
@@ -4,7 +4,7 @@ title: Introduction
 metadata:
   title: Introduction
   description: "Get started with vision-enabled generative AI models."
-  ms.date: 04/29/2025
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/2-deploy-multimodal-model.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/2-deploy-multimodal-model.yml
@@ -1,10 +1,10 @@
 ### YamlMime:ModuleUnit
 uid: learn.wwl.develop-generative-ai-vision-apps.deploy-multimodal-models
-title: Deploy a multimodal model
+title: Use a vision-capable model in the Microsoft Foundry portal
 metadata:
-  title: Deploy a multimodal model
-  description: "Deploy a multimodal model that can respond to image-based prompts."
-  ms.date: 04/29/2025
+  title: Use a vision-capable model in the Microsoft Foundry portal
+  description: "Learn how to use a vision-capable model in the Microsoft Foundry portal."
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/3-develop-visual-chat-app.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/3-develop-visual-chat-app.yml
@@ -3,12 +3,11 @@ uid: learn.wwl.develop-generative-ai-vision-apps.develop-visual-chat-apps
 title: Develop a vision-based chat app
 metadata:
   title: Develop a vision-based chat app
-  description: "Use Microsoft Foundry, Azure AI Model Inference, and Azure OpenAI SDKs to develop a vision-based chat app."
-  ms.date: 04/29/2025
+  description: "Use Microsoft Foundry and OpenAI APIs to develop a vision-based chat app."
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
 durationInMinutes: 5
 content: |
   [!include[](includes/3-develop-visual-chat-app.md)]
-
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/4-exercise.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/4-exercise.yml
@@ -3,8 +3,8 @@ uid: learn.wwl.develop-generative-ai-vision-apps.exercise
 title: Exercise - Develop a vision-enabled chat app
 metadata:
   title: Exercise - Develop a vision-enabled chat app
-  description: "Get practical experience of deploying a multimodal model and creating a vision-enabled chat app."
-  ms.date: 04/29/2025
+  description: "Get practical experience of creating a vision-enabled chat app."
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/5-knowledge-check.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/5-knowledge-check.yml
@@ -4,45 +4,44 @@ title: Module assessment
 metadata:
   title: Module assessment
   description: "Check your learning on vision-enabled generative AI."
-  ms.date: 04/29/2025
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
 durationInMinutes: 3
 content: |
 quiz:
   questions:
-  - content: "Which kind of model can you use to respond to visual input?"
-    choices:
-    - content: "Only OpenAI GPT models"
-      isCorrect: false
-      explanation: "Incorrect."
-    - content: "Embedding models"
-      isCorrect: false
-      explanation: Incorrect."
-    - content: "Multimodal models"
-      isCorrect: true
-      explanation: "Correct."
-  - content: "How can you submit a prompt that asks a model to analyze an image?"
-    choices:
-    - content: "Submit one prompt with an image-based message followed by another prompt with a text-based message."
-      isCorrect: false
-      explanation: "Incorrect."
-    - content: "Submit a prompt that contains a multi-part user message, containing both text content and image content."
-      isCorrect: true
-      explanation: "Correct."
-    - content: "Submit the image as the system message and the instruction or question as the user message."
-      isCorrect: false
-      explanation: "Incorrect."
-  - content: "How can you include an image in a message?"
-    choices:
-    - content: "As a URL or as binary data"
-      isCorrect: true
-      explanation: "Correct."
-    - content: "Only as a URL"
-      isCorrect: false
-      explanation: "Incorrect."
-    - content: "Only as binary data"
-      isCorrect: false
-      explanation: "Incorrect."
-
+    - content: "Which kind of model can you use to respond to visual input?"
+      choices:
+        - content: "Only OpenAI GPT models"
+          isCorrect: false
+          explanation: "Incorrect."
+        - content: "Embedding models"
+          isCorrect: false
+          explanation: Incorrect."
+        - content: "Multimodal models"
+          isCorrect: true
+          explanation: "Correct."
+    - content: "How can you submit a prompt that asks a model to analyze an image?"
+      choices:
+        - content: "Submit one prompt with an image-based message followed by another prompt with a text-based message."
+          isCorrect: false
+          explanation: "Incorrect."
+        - content: "Submit a prompt that contains a multi-part user message, containing both text content and image content."
+          isCorrect: true
+          explanation: "Correct."
+        - content: "Submit the image as the system message and the instruction or question as the user message."
+          isCorrect: false
+          explanation: "Incorrect."
+    - content: "How can you include an image in a message?"
+      choices:
+        - content: "As a URL or as binary data"
+          isCorrect: true
+          explanation: "Correct."
+        - content: "Only as a URL"
+          isCorrect: false
+          explanation: "Incorrect."
+        - content: "Only as binary data"
+          isCorrect: false
+          explanation: "Incorrect."
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/6-summary.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/6-summary.yml
@@ -4,7 +4,7 @@ title: Summary
 metadata:
   title: Summary
   description: "Reflect on what you've learned about vision-enabled generative AI models."
-  ms.date: 04/29/2025
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: unit
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/2-deploy-multimodal-model.md b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/2-deploy-multimodal-model.md
@@ -1,12 +1,11 @@
 To handle prompts that include images, you need to deploy a *multimodal* generative AI model - in other words, a model that supports not only text-based input, but image-based (and in some cases, audio-based) input as well. Multimodal models available in Microsoft Foundry include (among others):
 
 - Microsoft **Phi-4-multimodal-instruct**
-- OpenAI **gpt-4o**
-- OpenAI **gpt-4o-mini**
-
+- OpenAI **gpt-4.1**
+- OpenAI **gpt-4.1-mini**
 
 > [!TIP]
-> To learn more about available models in Microsoft Foundry, see the **[Model catalog and collections in Microsoft Foundry portal](/azure/ai-foundry/how-to/model-catalog-overview)** article in the Microsoft Foundry documentation.
+> To learn more about available models in Microsoft Foundry, see the **[Microsoft Foundry Models overview](/azure/foundry/concepts/foundry-models-overview)** article in the Microsoft Foundry documentation.
 
 ## Testing multimodal models with image-based prompts
 
@@ -15,4 +14,3 @@ After deploying a multimodal model, you can test it in the chat playground in Mi
 ![Screenshot of the chat playground with an image-based prompt.](../media/image-prompt.png)
 
 In the chat playground, you can upload an image from a local file and add text to the message to elicit a response from a multimodal model.
-
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/3-develop-visual-chat-app.md b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/3-develop-visual-chat-app.md
@@ -1,47 +1,62 @@
 To develop a client app that engages in vision-based chats with a multimodal model, you can use the same basic techniques used for text-based chats. You require a connection to the endpoint where the model is deployed, and you use that endpoint to submit prompts that consists of messages to the model and process the responses.
 
-The key difference is that prompts for a vision-based chat include multi-part user messages that contain both a *text* (or *audio* where supported) content item and an *image* content item.
+The key difference is that prompts for a vision-based chat include multi-part user messages that contain both a *text* content item and an *image* content item.
 
 ![Diagram of a multi-part prompt being submitted to a model.](../media/multi-part-prompt.png)
 
-The JSON representation of a prompt that includes a multi-part user message looks something like this:
+## Submit an image-based prompt using the *Responses* API
 
-```json
-{ 
-    "messages": [ 
-        { "role": "system", "content": "You are a helpful assistant." }, 
-        { "role": "user", "content": [  
-            { 
-                "type": "text", 
-                "text": "Describe this picture:" 
-            },
-            { 
-                "type": "image_url",
-                "image_url": {
-                    "url": "https://....."
-                }
-            }
+To include an image in a prompt using the *Responses* API, specify a URL for a web-based image file, or load a local image and encode its data in Base64 format and submit a URL in the format  `data:image/jpeg;base64,{image_data}` (replacing "jpeg" with "png" pr other formats as appropriate).
+
+The following Python example shows how to submit an image in a prompt using the *Responses* API:
+
+```python
+# Read the image data from a local file
+image_path = Path("dragon-fruit.jpeg")
+image_format = "jpeg"
+with open(image_path, "rb") as image_file:
+    image_data = base64.b64encode(image_file.read()).decode("utf-8")
+
+data_url = f"data:image/{image_format};base64,{image_data}" # You can also use a web URL
+
+# Send the image data in a prompt to the model
+response = client.responses.create(
+    model="gpt-4.1",
+    input=[
+        {"role": "developer", "content": "You are an AI assistant for chefs planning recipes."},
+        {"role": "user", "content": [  
+            { "type": "input_text", "text": "What desserts could I make with this?"},
+            { "type": "input_image", "image_url": data_url}
         ] } 
     ]
-} 
+)
+print(response.output_text)
 ```
 
-The image content item can be:
+## Submit an image-based prompt using the *ChatCompletions* API
 
-- A URL to an image file in a web site.
-- Binary image data
+When using the Azure OpenAI endpoint to submit prompts to models that don't support the *Responses* API, you can use the *CatCompletions* API; like this:
 
-When using binary data to submit a local image file, the **image_url** content takes the form of a base64 encoded value in a data URL format:
+```python
+# Read the image data from a local file
+image_path = Path("orange.jpeg")
+image_format = "jpeg"
+with open(image_path, "rb") as image_file:
+    image_data = base64.b64encode(image_file.read()).decode("utf-8")
 
-```json
-{
-    "type": "image_url",
-    "image_url": {
-       "url": "data:image/jpeg;base64,<binary_image_data>"
-    }
-}
-```
+data_url = f"data:image/{image_format};base64,{image_data}" # You can also use a web URL
 
-Depending on the model type, and where you deployed it, you can use Microsoft Azure AI Model Inference or OpenAI APIs to submit vision-based prompts. These libraries also provide language-specific SDKs that abstract the underlying REST APIs.
+# Send the image data in a prompt to the model
+response = client.chat.completions.create(
+    model="Phi-4-multimodal-instruct",
+    messages=[
+        {"role": "system", "content": "You are an AI assistant for chefs planning recipes."},
+        { "role": "user", "content": [  
+            { "type": "text", "text": "What can I make with this fruit?"},
+            { "type": "image_url", "image_url": {"url": data_url}}
+        ] }
+    ]
+)
+print(response.choices[0].message.content)
 
-In the exercise that follows in this module, you can use the Python or .NET SDK for the Azure AI Model Inference API and the OpenAI API to develop a vision-enabled chat application.
+```
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/4-exercise.md b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/4-exercise.md
@@ -5,4 +5,4 @@ If you have an Azure subscription, you can complete this exercise to develop a v
 
 Launch the exercise and follow the instructions.
 
-[![Button to launch exercise.](../media/launch-exercise.png)](https://go.microsoft.com/fwlink/?linkid=2356207&azure-portal=true)
+[![Button to launch exercise.](../media/launch-exercise.png)](https://go.microsoft.com/fwlink/?linkid=2356866&azure-portal=true)
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/6-summary.md b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/includes/6-summary.md
@@ -3,5 +3,4 @@ In this module, you learned about vision-enabled generative AI models and how to
 Vision-enabled models let you create AI solutions that can understand images and respond to related questions or instructions. Beyond just identifying objects in pictures, some models can also use reasoning based on what they see. For instance, they can interpret a chart or assess if an object is damaged.
 
 > [!TIP]
-> For more information about working with multimodal models in Microsoft Foundry, see **[How to use image and audio in chat completions with Azure AI model inference](/azure/ai-foundry/model-inference/how-to/use-chat-multi-modal)** and **[Quickstart: Use images in your AI chats](/azure/ai-services/openai/gpt-v-quickstart)**.
-
+> For more information about analyzing images with the OpenAI Responses API, see, see **[Images and vision](https://developers.openai.com/api/docs/guides/images-vision?format=url#analyze-images?azure-portal=true)** in the OpenAI developer guide.
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/index.yml b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/index.yml
@@ -3,7 +3,7 @@ uid: learn.wwl.develop-generative-ai-vision-apps
 metadata:
   title: Develop a vision-enabled generative AI application
   description: A picture says a thousand words, and multimodal generative AI models can interpret images to respond to visual prompts. Learn how to build vision-enabled chat apps.
-  ms.date: 08/28/2025
+  ms.date: 03/23/2026
   author: gmalc
   ms.author: gmalc
   ms.topic: module-standard-task-based # Please don't edit, used for our analytics
@@ -23,20 +23,18 @@ prerequisites: |
 iconUrl: /learn/achievements/generic-badge.svg
 levels:
   - intermediate
-roles: 
+roles:
   - ai-engineer
-products: 
-  - microsoft-foundry  
-subjects: 
+products:
+  - microsoft-foundry
+subjects:
   - artificial-intelligence
 units:
-- learn.wwl.develop-generative-ai-vision-apps.introduction
-- learn.wwl.develop-generative-ai-vision-apps.deploy-multimodal-models
-- learn.wwl.develop-generative-ai-vision-apps.develop-visual-chat-apps
-- learn.wwl.develop-generative-ai-vision-apps.exercise
-- learn.wwl.develop-generative-ai-vision-apps.knowledge-check
-- learn.wwl.develop-generative-ai-vision-apps.summary
+  - learn.wwl.develop-generative-ai-vision-apps.introduction
+  - learn.wwl.develop-generative-ai-vision-apps.deploy-multimodal-models
+  - learn.wwl.develop-generative-ai-vision-apps.develop-visual-chat-apps
+  - learn.wwl.develop-generative-ai-vision-apps.exercise
+  - learn.wwl.develop-generative-ai-vision-apps.knowledge-check
+  - learn.wwl.develop-generative-ai-vision-apps.summary
 badge:
   uid: learn.wwl.develop-generative-ai-vision-apps.badge
-
-
diff --git a/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/media/image-prompt.png b/learn-pr/wwl-data-ai/develop-generative-ai-vision-apps/media/image-prompt.png

Original file line number	Diff line number	Diff line change
`@@ -5,4 +5,4 @@ If you have an Azure subscription, you can complete this exercise to develop a v`
`5`	`5`
`6`	`6`	`Launch the exercise and follow the instructions.`
`7`	`7`
`8`		`-[![Button to launch exercise.](../media/launch-exercise.png)](https://go.microsoft.com/fwlink/?linkid=2356207&azure-portal=true)`
	`8`	`+[![Button to launch exercise.](../media/launch-exercise.png)](https://go.microsoft.com/fwlink/?linkid=2356866&azure-portal=true)`