update yamls

madiepev · madiepev · commit 258f809a376d · 2026-03-03T22:57:16.000+01:00
diff --git a/learn-pr/wwl-data-ai/optimize-finetune-agents/1-introduction.yml b/learn-pr/wwl-data-ai/optimize-finetune-agents/1-introduction.yml
@@ -3,7 +3,7 @@ uid: learn.wwl.optimize-finetune-agents.introduction
 title: Introduction
 metadata:
   title: Introduction
-  description: Introduction to fine-tuning a foundation model from the model catalog in Microsoft Foundry.
+  description: Introduction to advanced fine-tuning decision-making for AI agents, covering method selection, synthetic data strategies, performance assessment, and model lifecycle management.
   author: madiepev
   ms.author: madiepev
   ms.date: 02/25/2026
diff --git a/learn-pr/wwl-data-ai/optimize-finetune-agents/4-prepare-data.yml b/learn-pr/wwl-data-ai/optimize-finetune-agents/4-prepare-data.yml
@@ -1,9 +1,9 @@
 ### YamlMime:ModuleUnit
 uid: learn.wwl.optimize-finetune-agents.prepare-data
-title: Prepare your data to fine-tune a chat completion model
+title: Prepare training data for fine-tuning
 metadata:
-  title: Prepare your data to fine-tune a chat completion model
-  description: Learn how to prepare training data from real conversations or generate synthetic data using Microsoft Foundry's data generation tools for fine-tuning chat completion models.
+  title: Prepare training data for fine-tuning
+  description: Learn how to validate data formats for SFT, DPO, and RFT fine-tuning methods, apply data quality principles, and follow a systematic workflow to create or generate high-quality training datasets.
   author: madiepev
   ms.author: madiepev
   ms.date: 02/25/2026
diff --git a/learn-pr/wwl-data-ai/optimize-finetune-agents/5-optimize-finetune.yml b/learn-pr/wwl-data-ai/optimize-finetune-agents/5-optimize-finetune.yml
@@ -1,9 +1,9 @@
 ### YamlMime:ModuleUnit
 uid: learn.wwl.optimize-finetune-agents.optimize-finetune
-title: Explore fine-tuning language models in Microsoft Foundry portal
+title: Design your optimization strategy and configure training hyperparameters
 metadata:
-  title: Explore fine-tuning foundation models in Microsoft Foundry portal
-  description: Explore fine-tuning foundation models from the model catalog in Microsoft Foundry portal.
+  title: Design your optimization strategy and configure training hyperparameters
+  description: Learn how to design an optimization strategy by evaluating baseline performance, setting measurable targets, splitting your dataset, and configuring hyperparameters for SFT, DPO, and RFT training jobs.
   author: madiepev
   ms.author: madiepev
   ms.date: 02/25/2026
diff --git a/learn-pr/wwl-data-ai/optimize-finetune-agents/7-knowledge-check.yml b/learn-pr/wwl-data-ai/optimize-finetune-agents/7-knowledge-check.yml
@@ -3,7 +3,7 @@ uid: learn.wwl.optimize-finetune-agents.knowledge-check
 title: Module assessment
 metadata:
   title: Module assessment
-  description: Knowledge check about fine-tuning a language model.
+  description: Knowledge check covering fine-tuning method selection for agent quality problems, DPO preference pair data format requirements, and optimization strategy design.
   author: madiepev
   ms.author: madiepev
   ms.date: 02/25/2026
@@ -15,38 +15,38 @@ durationInMinutes: 3
 content: |
 quiz:
   questions:
-  - content: "How must data be formatted for fine-tuning?"
+  - content: "An AI agent generates trip plans that fail to consider how customer constraints like fitness level, experience, budget, and weather conditions interact with each other. Which fine-tuning method best addresses this problem?"
     choices:
-    - content: "JSONL"
-      isCorrect: true
-      explanation: "Correct. JSONL or JSON Lines is the expected input format."
-    - content: "YAML"
+    - content: "Supervised Fine-Tuning (SFT)"
       isCorrect: false
-      explanation: "Incorrect. In the context of the Microsoft Foundry, YAML is used to specify the configuration of jobs or flows."
-    - content: "HTML"
+      explanation: "Incorrect. SFT excels at format consistency and style adherence, but doesn't develop the multi-step reasoning needed to balance interacting constraints."
+    - content: "Reinforcement Fine-Tuning (RFT)"
+      isCorrect: true
+      explanation: "Correct. RFT develops complex reasoning capabilities by using a grader function to reward outputs that appropriately balance interacting constraints, making it the right choice for multi-step planning problems."
+    - content: "Direct Preference Optimization (DPO)"
       isCorrect: false
-      explanation: "Incorrect. HTML is a markup language for web pages."
-  - content: "What does fine-tuning optimize in your model?"
+      explanation: "Incorrect. DPO specializes in subjective preferences and tone alignment, not in developing reasoning logic for multi-step constraint balancing."
+  - content: "Which fine-tuning method requires training data structured as preference pairs, each containing a prompt alongside both a preferred and a non-preferred response?"
     choices:
-    - content: "What the model needs to know."
+    - content: "Supervised Fine-Tuning (SFT)"
       isCorrect: false
-      explanation: "Incorrect. To optimize what the model needs to know, RAG is more effective."
-    - content: "How the model needs to act."
-      isCorrect: true
-      explanation: "Correct. Fine-tuning can help to maximize the consistency of the model's behavior."
-    - content: "Which words aren't allowed."
+      explanation: "Incorrect. SFT requires labeled examples, where each example pairs a prompt with the single ideal response you want the model to produce."
+    - content: "Reinforcement Fine-Tuning (RFT)"
       isCorrect: false
-      explanation: "Incorrect. To filter specific content like words, you can use a content filter."
-  - content: "Which advanced option refers to one full cycle through the training dataset?"
+      explanation: "Incorrect. RFT requires prompts combined with a separate grader function that scores the model's generated responses during training."
+    - content: "Direct Preference Optimization (DPO)"
+      isCorrect: true
+      explanation: "Correct. DPO uses preference pairs to teach the model which response better reflects your values. Each example includes a prompt, a preferred response, and a non-preferred response."
+  - content: "What is the purpose of evaluating the base model before submitting a fine-tuning job?"
     choices:
-    - content: "seed"
+    - content: "To establish a baseline so you can measure whether fine-tuning improved performance."
+      isCorrect: true
+      explanation: "Correct. Running your evaluation dataset through the unmodified base model and recording metric scores gives you a reference point for comparing fine-tuned results."
+    - content: "To automatically generate labeled training examples from the base model's outputs."
       isCorrect: false
-      explanation: "Incorrect. The seed controls the reproducibility of the job."
-    - content: "batch_size"
+      explanation: "Incorrect. Evaluating the base model measures its current performance—it doesn't produce training data. Training data must be prepared separately before fine-tuning begins."
+    - content: "To determine the correct number of epochs to use during training."
       isCorrect: false
-      explanation: "Incorrect. The batch size specifies the number of training examples used."
-    - content: "n_epochs"
-      isCorrect: true
-      explanation: "Correct. When you set the number of epochs, you set the number of full cycles to run through the dataset."
+      explanation: "Incorrect. Baseline evaluation measures current model quality against your metrics. Epoch count is a hyperparameter you set based on training results, adjusted one at a time after evaluating each run."
 
 
diff --git a/learn-pr/wwl-data-ai/optimize-finetune-agents/8-summary.yml b/learn-pr/wwl-data-ai/optimize-finetune-agents/8-summary.yml
@@ -3,7 +3,7 @@ uid: learn.wwl.optimize-finetune-agents.summary
 title: Summary
 metadata:
   title: Summary
-  description: Summary of fine-tuning language models.
+  description: Summary of advanced fine-tuning for AI agents, covering method selection, training data preparation, optimization strategy design, and hyperparameter configuration.
   author: madiepev
   ms.author: madiepev
   ms.date: 02/25/2026