Merge pull request #53612 from DivyaGundreddy/LP160474-M4

v-regandowner · web-flow · commit 053c614d1e3b · 2026-02-25T12:35:26.000-05:00
transform-development-workflows-sql-server-2025
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/01-introduction.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/01-introduction.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.introduction
+title: Introduction
+metadata:
+  title: Introduction
+  description: "Introduction"
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 1
+content: |
+  [!include[](includes/01-introduction.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/02-vector-ai-integration.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/02-vector-ai-integration.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.vector-ai-integration
+title: AI and vector integration
+metadata:
+  title: AI and vector integration
+  description: "Explore AI and vector features for embeddings, search, and external model integration."
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 5
+content: |
+  [!include[](includes/02-vector-ai-integration.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/03-pattern-matching-text.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/03-pattern-matching-text.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.pattern-matching-text-extraction
+title: Pattern matching and text extraction
+metadata:
+  title: Pattern matching and text extraction
+  description: "Use REGEXP and SUBSTRING to find, extract, and manipulate text patterns in T-SQL."
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 7
+content: |
+  [!include[](includes/03-pattern-matching-text.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/04-json-string-aggregation.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/04-json-string-aggregation.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.json-string-aggregation
+title: JSON and string aggregation
+metadata:
+  title: JSON and string aggregation
+  description: "Create JSON arrays, objects, and delimited strings with new T-SQL aggregation functions."
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 6
+content: |
+  [!include[](includes/04-json-string-aggregation.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/05-encoding-similarity.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/05-encoding-similarity.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.encoding-similarity
+title: Encoding and similarity functions
+metadata:
+  title: Encoding and similarity functions
+  description: "Encode data with Base64 and compare text using new string similarity functions."
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 6
+content: |
+  [!include[](includes/05-encoding-similarity.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/06-date-numeric-enhancements.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/06-date-numeric-enhancements.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.date-numeric-enhancements
+title: Date and numeric enhancements
+metadata:
+  title: Date and numeric enhancements
+  description: "Work with CURRENT_DATE, bigint DATEADD, and PRODUCT() for precise date and math operations."
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 6
+content: |
+  [!include[](includes/06-date-numeric-enhancements.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/07-knowledge-check.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/07-knowledge-check.yml
@@ -0,0 +1,58 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.knowledge-check
+title: Module assessment
+metadata:
+  title: Module assessment
+  description: "Knowledge check"
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 5
+quiz:
+  title: Check your knowledge
+  questions:
+  - content: Descriptions contain codes like AB12345 embedded in text. The result must return the matched code text per row. Which option fits?
+    choices:
+    - content: REGEXP_SUBSTR
+      isCorrect: true
+      explanation: Returns the substring that matches the pattern, producing the code text directly.
+    - content: REGEXP_INSTR
+      isCorrect: false
+      explanation: Returns the starting position of the match, not the matched text.
+    - content: REGEXP_MATCHES
+      isCorrect: false
+      explanation: Returns all matches as a rowset; use when multiple rows of matches are needed.
+  - content: A report needs one JSON field per customer listing all product IDs in order. Which option aligns with that output?
+    choices:
+    - content: JSON_OBJECTAGG
+      isCorrect: false
+      explanation: Produces key-value objects, not arrays of values.
+    - content: STRING_CONCAT_WS
+      isCorrect: false
+      explanation: Creates delimited text, not structured JSON.
+    - content: JSON_ARRAYAGG
+      isCorrect: true
+      explanation: Aggregates values into a JSON array while preserving order.
+  - content: Similarity scores fluctuate across rows because vector magnitudes vary. The pipeline must make scores comparable across rows. What helps?
+    choices:
+    - content: VECTOR_NORMALIZE
+      isCorrect: true
+      explanation: Normalizes each vector to unit length so similarity is magnitude-invariant.
+    - content: VECTOR_DISTANCE
+      isCorrect: false
+      explanation: Computes distance; doesn't standardize magnitude.
+    - content: CREATE VECTOR INDEX
+      isCorrect: false
+      explanation: Speeds up search but doesn’t change score scaling.
+  - content: A nightly job must store the current date as a partition key with no time component. Which choice avoids manual truncation?
+    choices:
+    - content: GETDATE
+      isCorrect: false
+      explanation: Includes time, which complicates partition matching.
+    - content: CURRENT_DATE
+      isCorrect: true
+      explanation: Returns the date only, suitable for date-based partitions.
+    - content: SYSDATETIME
+      isCorrect: false
+      explanation: Returns higher-precision datetime, not date-onl
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/08-summary.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/08-summary.yml
@@ -0,0 +1,13 @@
+### YamlMime:ModuleUnit
+uid: learn.wwl.introduction-sql-server-t-sql-enhancements.summary
+title: Summary
+metadata:
+  title: Summary
+  description: "Summary"
+  ms.date: 10/14/2025
+  author: MScalopez
+  ms.author: calopez
+  ms.topic: unit
+durationInMinutes: 1
+content: |
+  [!include[](includes/08-summary.md)]
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/01-introduction.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/01-introduction.md
@@ -0,0 +1,24 @@
+SQL Server 2025 introduces a range of new T‑SQL features and enhancements that support modern workloads while keeping queries clear and maintainable. This module focuses on language additions for AI and vectors, pattern matching, JSON output, string processing, and improved date and numeric operations.
+
+In this module, we cover the following topics:
+
+- **Vector and AI integration**: Learn how to generate embeddings and work with vector data using functions and features such as `AI_GENERATE_EMBEDDINGS`, `AI_GENERATE_CHUNKS`, `VECTOR_DISTANCE`, `VECTOR_NORM`, `VECTOR_NORMALIZE`, `VECTORPROPERTY`, `CREATE EXTERNAL MODEL`, `CREATE VECTOR INDEX`, and `VECTOR_SEARCH`.
+- **Pattern matching and text extraction**: Use `REGEXP_LIKE`, `REGEXP_SUBSTR`, `REGEXP_REPLACE`, `REGEXP_INSTR`, `REGEXP_COUNT`, `REGEXP_MATCHES`, and `REGEXP_SPLIT_TO_TABLE`, plus the enhanced `SUBSTRING` behavior.
+- **JSON and string aggregation**: Build structured output with `JSON_ARRAYAGG` and `JSON_OBJECTAGG`, and create delimited text with `STRING_CONCAT_WS`. You can also use the `||` operator for string concatenation and `UNISTR` for Unicode escape sequences.
+- **Encoding and similarity functions**: Encode and decode text with `BASE64_ENCODE` and `BASE64_DECODE`, and compare strings with `STRING_SIMILARITY`, `EDIT_DISTANCE`, `EDIT_DISTANCE_SIMILARITY`, `JARO_WINKLER_DISTANCE`, and `JARO_WINKLER_SIMILARITY`.
+- **Date and numeric enhancements**: Work with `CURRENT_DATE`, `DATEADD` with `bigint`, and the `PRODUCT()` aggregate for multiplicative calculations.
+
+## Learning objectives
+
+Upon completing this module, you should be able to:
+
+- Understand the new and enhanced T‑SQL features in SQL Server 2025.  
+- Apply these capabilities to integrate AI, parse and format text, build JSON output, and support analytics.  
+- Choose the right function or operator to keep queries readable and efficient.
+
+## Prerequisites
+
+- SQL Server 2025  
+- Basic working knowledge of SQL Server and query processing  
+- Fundamental knowledge of Transact‑SQL (T‑SQL)  
+- Familiarity with functions, operators, and JSON handling in SQL Server
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/02-vector-ai-integration.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/02-vector-ai-integration.md
@@ -0,0 +1,104 @@
+SQL Server 2025 introduces a new set of AI and vector functions that enable database developers to integrate AI-powered capabilities directly into T-SQL. These new capabilities make it possible to generate embeddings, calculate vector similarity, and search across AI-enriched data without leaving SQL Server. This level of integration reduces the need for external services, simplifies application architecture, and supports real-time intelligent workloads.
+
+## AI and Vector Functions Overview
+
+The new AI features in SQL Server 2025 fall into three main categories: AI generation, vector operations, and vector indexing and search.
+
+### AI Generation Functions
+
+- **AI_GENERATE_CHUNKS** – Splits large text or documents into semantically coherent chunks that can later be embedded or stored for retrieval-augmented generation (RAG) scenarios.  
+- **AI_GENERATE_EMBEDDINGS** – Generates embeddings from text input using an external model registered in SQL Server. These embeddings can be stored in tables for use in vector search, similarity analysis, or semantic ranking.
+
+### Vector Operations
+
+- **VECTOR_DISTANCE** – Computes the distance between two vector values, supporting distance metrics such as cosine, Euclidean, and dot product.  
+- **VECTOR_NORM** – Returns the vector norm (magnitude) for a given vector.  
+- **VECTOR_NORMALIZE** – Returns a normalized version of a vector, typically used before comparison or similarity searches.  
+- **VECTORPROPERTY** – Returns metadata about a vector, such as its dimensions or element type.
+
+### External Models and Vector Indexes
+
+SQL Server 2025 allows you to register and manage external AI models using T-SQL.  
+- **CREATE EXTERNAL MODEL / ALTER EXTERNAL MODEL / DROP EXTERNAL MODEL** – Manage AI models that are hosted locally or through supported model providers.  
+- **CREATE VECTOR INDEX** – Creates an index optimized for vector data to accelerate similarity searches.  
+- **VECTOR_SEARCH** – Performs similarity search operations on vector data using the vector index, returning the closest matches based on the selected distance metric.
+
+These capabilities allow SQL Server to serve as a foundation for retrieval-augmented generation, recommendation engines, and semantic search applications entirely within the database engine.
+
+### Half-precision vector storage and binary ingest
+
+Vectors can now use **half-precision floating-point (fp16)** elements to reduce memory usage and improve scan performance in embedding-heavy workloads.  
+You can also **bulk-load vectors** in binary format using `BULK INSERT` or `OPENROWSET(BULK ...)`, which simplifies importing large embedding sets created outside SQL Server.
+
+## Example Scenario: Building a Product Recommendation Query
+
+Imagine you work for a retail company that stores product descriptions in a SQL Server 2025 database. The marketing team wants to build a recommendation feature that suggests products semantically similar to a selected item. Using the new AI and vector features, you can generate embeddings for product descriptions, store them in a table, and perform similarity searches without external processing.
+
+### Create and Register the Model
+
+Before generating embeddings, you must register an external model.
+
+```sql
+CREATE EXTERNAL MODEL embedding_model
+FROM OPENAI
+WITH (ENDPOINT = 'https://api.openai.com/v1/embeddings',
+      API_KEY = SECRET('openai_key'),
+      MODEL_NAME = 'text-embedding-3-small');
+```
+
+### Generate and Store Embeddings
+
+Once the model is registered, you can generate embeddings for your product descriptions and store them in a new table.
+
+```sql
+CREATE TABLE ProductEmbeddings
+(
+    ProductID INT PRIMARY KEY,
+    Description NVARCHAR(MAX),
+    Embedding VECTOR(1536)
+);
+
+INSERT INTO ProductEmbeddings (ProductID, Description, Embedding)
+SELECT ProductID,
+       Description,
+       AI_GENERATE_EMBEDDINGS('embedding_model', Description)
+FROM Products;
+```
+
+### Create a Vector Index and Run a Search
+
+To improve search performance, create a vector index to speed up similarity searches.
+
+```sql
+CREATE VECTOR INDEX idx_ProductEmbedding
+ON ProductEmbeddings (Embedding)
+WITH (DISTANCE_METRIC = 'cosine');
+```
+
+Now you can perform a semantic search for related products:
+
+```sql
+DECLARE @query NVARCHAR(MAX) = 'waterproof hiking backpack';
+DECLARE @vector VECTOR(1536) = AI_GENERATE_EMBEDDINGS('embedding_model', @query);
+
+SELECT TOP 5 ProductID, Description,
+       VECTOR_DISTANCE(Embedding, @vector, 'cosine') AS SimilarityScore
+FROM ProductEmbeddings
+ORDER BY SimilarityScore ASC;
+```
+
+### Results
+
+| ProductID | Description | SimilarityScore |
+|------------|--------------|----------------|
+| 105 | "Lightweight waterproof travel backpack" | 0.07 |
+| 116 | "Hiking pack with rain cover and hydration slot" | 0.10 |
+| 117 | "Compact outdoor day pack with water resistance" | 0.12 |
+| 101 | "Trail-ready backpack with external straps" | 0.15 |
+| 119 | "Travel and camping waterproof duffel" | 0.18 |
+
+This example demonstrates how to integrate an external AI model, generate embeddings directly within T-SQL, and perform a similarity search using built-in vector functions. Everything runs inside SQL Server, which simplifies development and allows intelligent workloads to remain secure and governed under existing database policies.
+
+## Summary
+
+SQL Server 2025 introduces native AI capabilities that allow developers to build intelligent database applications directly in T-SQL. Functions such as `AI_GENERATE_EMBEDDINGS`, `VECTOR_DISTANCE`, and `VECTOR_SEARCH` streamline integration with AI models while maintaining performance and security. Together, these features make SQL Server 2025 a strong platform for semantic search, recommendations, and context-aware analytics without relying on external compute pipelines.
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/03-pattern-matching-text.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/03-pattern-matching-text.md
@@ -0,0 +1,107 @@
+SQL Server 2025 adds new regular expression (REGEXP) functions and enhances the `SUBSTRING` function to simplify working with structured text. These additions allow developers to perform flexible text searches, extract complex patterns, and clean or transform data directly in T-SQL without relying on client-side scripting or CLR functions.
+
+## Overview of Pattern Matching and Text Extraction Functions
+
+The new REGEXP family of functions supports regular expression evaluation directly inside SQL Server. Each function enables specific pattern-based text operations.
+
+### REGEXP_LIKE  
+Checks if a string matches a regular expression pattern. Returns 1 if it matches, 0 otherwise.
+
+### REGEXP_SUBSTR  
+Extracts the first substring that matches a regular expression pattern. Useful for pulling out specific information such as phone numbers, dates, or email addresses.
+
+### REGEXP_REPLACE  
+Searches a string for a pattern and replaces all occurrences that match the pattern. It can be used for data cleanup and formatting.
+
+### REGEXP_INSTR  
+Returns the starting position of the first substring that matches the pattern within the given text. Ideal for locating key markers in structured text.
+
+### REGEXP_COUNT
+Counts the number of matches of a regular expression pattern in a given string. This count is useful when you need to measure pattern frequency, such as counting digits, words, or symbols in text.
+
+### REGEXP_MATCHES
+Returns all substrings that match a regular expression pattern as a table result. Use it when you need multiple captures from a single string rather than only the first match.
+
+### REGEXP_SPLIT_TO_TABLE  
+Splits a string into multiple rows using a regular expression delimiter.
+
+### SUBSTRING Enhancement  
+The `SUBSTRING` function now supports an optional length parameter. When omitted, it automatically extracts from the specified start position to the end of the string, reducing the need for manual LEN() calculations.
+
+Together, these capabilities let you search, extract, and manipulate text patterns directly in SQL Server 2025 with concise, readable T-SQL.
+
+---
+
+## Example Scenario: Extracting and Cleaning Contact Data
+
+A marketing team maintains a database of customer messages in a column named `MessageText`. Many entries include phone numbers in different formats. You need to extract the first phone number from each message, normalize it to a standard format, and identify messages that contain invalid phone numbers.
+
+### Sample Data
+
+Let's assume you have the following sample data in a table called `CustomerMessages`:
+
+| MessageID | MessageText |
+|------------|-------------|
+| 1 | "Call me at (713) 555-1298 or office 555-8811." |
+| 2 | "Reach out to me: +1-832-555-7821 thank you!" |
+| 3 | "My number is 713-555-9876 ext. 33" |
+| 4 | "No phone listed yet." |
+
+### Query: Identify, Extract, and Standardize
+
+```sql
+-- Extract the first phone number pattern and format it consistently
+SELECT MessageID,
+       REGEXP_SUBSTR(MessageText, '\d{3}[)\-\s]*\d{3}[\-\s]*\d{4}') AS RawNumber,
+       REGEXP_REPLACE(
+           REGEXP_SUBSTR(MessageText, '\d{3}[)\-\s]*\d{3}[\-\s]*\d{4}'),
+           '\D', ''
+       ) AS DigitsOnly,
+       CASE 
+           WHEN REGEXP_LIKE(MessageText, '\d{3}[)\-\s]*\d{3}[\-\s]*\d{4}') = 1 THEN 'Valid'
+           ELSE 'Missing'
+       END AS PhoneStatus
+FROM dbo.CustomerMessages;
+```
+
+### Results
+
+| MessageID | RawNumber | DigitsOnly | PhoneStatus |
+|------------|------------|-------------|--------------|
+| 1 | (713) 555-1298 | 7135551298 | Valid |
+| 2 | +1-832-555-7821 | 18325557821 | Valid |
+| 3 | 713-555-9876 | 7135559876 | Valid |
+| 4 | NULL | NULL | Missing |
+
+This example uses `REGEXP_SUBSTR` to extract the first matching pattern, `REGEXP_REPLACE` to strip non-numeric characters, and `REGEXP_LIKE` to verify valid numbers. The query standardizes phone numbers into a consistent digits-only format directly in T-SQL.
+
+---
+
+## Example 2: Splitting Data with REGEXP_SPLIT_TO_TABLE and SUBSTRING
+
+Suppose another table, `CustomerFeedback`, stores comma-separated tags describing customer interests. You want to separate them into individual rows and extract the first keyword for quick indexing.
+
+```sql
+SELECT FeedbackID,
+       value AS Tag,
+       SUBSTRING(value, 1) AS FirstWord
+FROM CustomerFeedback
+CROSS APPLY REGEXP_SPLIT_TO_TABLE(Tags, '\s*,\s*');
+```
+
+This query splits each comma-delimited string into rows while using the new `SUBSTRING` behavior (without specifying length) to extract the entire remaining text from the start position.
+
+### Results
+
+| FeedbackID | Tag | FirstWord |
+|-------------|-----|-----------|
+| 1 | travel | travel |
+| 1 | photography | photography |
+| 2 | hiking | hiking |
+| 2 | camping | camping |
+
+---
+
+## Summary
+
+The new REGEXP functions and the enhanced SUBSTRING function in SQL Server 2025 deliver native pattern-matching and text-extraction capabilities. These additions eliminate the need for external string processing, making it easier to clean, parse, and analyze textual data inside the database engine. With these tools, developers can simplify ETL pipelines, improve data quality, and enable advanced text-driven analytics directly in T-SQL.
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/04-json-string-aggregation.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/04-json-string-aggregation.md
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/05-encoding-similarity.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/05-encoding-similarity.md
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/06-date-numeric-enhancements.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/06-date-numeric-enhancements.md
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/08-summary.md b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/includes/08-summary.md
diff --git a/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/index.yml b/learn-pr/sqlserver/introduction-sql-server-t-sql-enhancements/index.yml