diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h
index 9b43dcc6cb..ff776f97b0 100644
--- a/tools/clang/lib/Headers/hlsl/dx/linalg.h
+++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h
@@ -165,11 +165,19 @@ template <ComponentEnum CompTy> struct ComponentTypeTraits {
   static const uint ElementsPerScalar = 4;
 };
 
+template <typename T> struct TypeTraits {
+  static const ComponentEnum CompType =
+      (ComponentEnum)dxil::ComponentType::Invalid;
+};
+
 #define __MATRIX_SCALAR_COMPONENT_MAPPING(enum_val, type)                      \
   template <> struct ComponentTypeTraits<enum_val> {                           \
     using Type = type;                                                         \
     static const bool IsNativeScalar = true;                                   \
     static const uint ElementsPerScalar = 1;                                   \
+  };                                                                           \
+  template <> struct TypeTraits<type> {                                        \
+    static const ComponentEnum CompType = enum_val;                            \
   };
 
 #if __HLSL_ENABLE_16_BIT
@@ -498,14 +506,61 @@ Multiply(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
 template <typename OutputElTy, typename InputElTy, typename BiasElTy,
           SIZE_TYPE M, SIZE_TYPE K, ComponentEnum MatrixDT>
 // clang-format off
-typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value, vector<OutputElTy, M> >::type
+typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value &&
+                             __detail::TypeTraits<BiasElTy>::CompType ==
+                                 __detail::TypeTraits<OutputElTy>::CompType,
+                         vector<OutputElTy, M> >::type
 // clang-format on
 MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
             vector<InputElTy, K> Vec, vector<BiasElTy, M> Bias) {
   vector<OutputElTy, M> Result;
-  __builtin_LinAlg_MatrixVectorMultiplyAdd(Result, MatrixA.__handle,
-                                           hlsl::is_signed<OutputElTy>::value,
-                                           Vec, MatrixDT, Bias, MatrixDT);
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(
+      Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value, Vec,
+      __detail::TypeTraits<InputElTy>::CompType, Bias,
+      __detail::TypeTraits<OutputElTy>::CompType);
+  return Result;
+}
+
+template <typename OutputElTy, typename InputElTy, typename BiasElTy,
+          SIZE_TYPE M, SIZE_TYPE K, ComponentEnum MatrixDT>
+// clang-format off
+typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value &&
+                             __detail::TypeTraits<BiasElTy>::CompType !=
+                                 __detail::TypeTraits<OutputElTy>::CompType,
+                         vector<OutputElTy, M> >::type
+// clang-format on
+MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
+            vector<InputElTy, K> Vec, vector<BiasElTy, M> Bias) {
+  vector<OutputElTy, M> BiasVecConv;
+  __builtin_LinAlg_Convert(BiasVecConv, Bias,
+                           __detail::TypeTraits<BiasElTy>::CompType,
+                           __detail::TypeTraits<OutputElTy>::CompType);
+  vector<OutputElTy, M> Result;
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(
+      Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value, Vec,
+      __detail::TypeTraits<InputElTy>::CompType, BiasVecConv,
+      __detail::TypeTraits<OutputElTy>::CompType);
+  return Result;
+}
+
+template <typename OutputElTy, typename InputElTy, ComponentEnum InputInterp,
+          typename BiasElTy, SIZE_TYPE M, SIZE_TYPE VecK, SIZE_TYPE K,
+          ComponentEnum MatrixDT>
+// clang-format off
+typename hlsl::enable_if<
+    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value &&
+        __detail::TypeTraits<BiasElTy>::CompType ==
+            __detail::TypeTraits<OutputElTy>::CompType,
+    vector<OutputElTy, M> >::type
+// clang-format on
+MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
+            InterpretedVector<InputElTy, VecK, InputInterp> InterpVec,
+            vector<BiasElTy, M> Bias) {
+  vector<OutputElTy, M> Result;
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(
+      Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value,
+      InterpVec.Data, InterpVec.Interpretation, Bias,
+      __detail::TypeTraits<OutputElTy>::CompType);
   return Result;
 }
 
@@ -514,55 +569,121 @@ template <typename OutputElTy, typename InputElTy, ComponentEnum InputInterp,
           ComponentEnum MatrixDT>
 // clang-format off
 typename hlsl::enable_if<
-    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value,
+    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value &&
+        __detail::TypeTraits<BiasElTy>::CompType !=
+            __detail::TypeTraits<OutputElTy>::CompType,
     vector<OutputElTy, M> >::type
 // clang-format on
 MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
             InterpretedVector<InputElTy, VecK, InputInterp> InterpVec,
             vector<BiasElTy, M> Bias) {
+
+  vector<OutputElTy, M> BiasVecConv;
+  __builtin_LinAlg_Convert(BiasVecConv, Bias,
+                           __detail::TypeTraits<BiasElTy>::CompType,
+                           __detail::TypeTraits<OutputElTy>::CompType);
+
   vector<OutputElTy, M> Result;
   __builtin_LinAlg_MatrixVectorMultiplyAdd(
       Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value,
-      InterpVec.Data, InterpVec.Interpretation, Bias, MatrixDT);
+      InterpVec.Data, InterpVec.Interpretation, BiasVecConv,
+      __detail::TypeTraits<OutputElTy>::CompType);
+  return Result;
+}
+
+template <typename OutputElTy, typename InputElTy, ComponentEnum BiasInterp,
+          SIZE_TYPE M, SIZE_TYPE K, ComponentEnum MatrixDT>
+// clang-format off
+typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value &&
+                      __detail::TypeTraits<OutputElTy>::CompType == BiasInterp,
+                         vector<OutputElTy, M> >::type
+// clang-format on
+MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
+            vector<InputElTy, K> Vec, VectorRef<BiasInterp, M> BiasRef) {
+  using BiasOutputVecTy = vector<OutputElTy, M>;
+  BiasOutputVecTy BiasVec =
+      BiasRef.Buf.template Load<BiasOutputVecTy>(BiasRef.Offset);
+
+  BiasOutputVecTy Result;
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(Result, MatrixA.__handle,
+                                           hlsl::is_signed<OutputElTy>::value,
+                                           Vec, MatrixDT, BiasVec, BiasInterp);
   return Result;
 }
 
-template <typename OutputElTy, typename InputElTy, ComponentEnum BiasElTy,
+template <typename OutputElTy, typename InputElTy, ComponentEnum BiasInterp,
           SIZE_TYPE M, SIZE_TYPE K, ComponentEnum MatrixDT>
 // clang-format off
-typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value,
+typename hlsl::enable_if<hlsl::is_arithmetic<InputElTy>::value &&
+                      __detail::TypeTraits<OutputElTy>::CompType != BiasInterp,
                          vector<OutputElTy, M> >::type
 // clang-format on
 MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
-            vector<InputElTy, K> Vec, VectorRef<BiasElTy, M> BiasRef) {
+            vector<InputElTy, K> Vec, VectorRef<BiasInterp, M> BiasRef) {
   using BiasVecTy =
-      vector<typename __detail::ComponentTypeTraits<BiasElTy>::Type, M>;
+      vector<typename __detail::ComponentTypeTraits<BiasInterp>::Type,
+             __detail::ScalarCountFromPackedComponents<BiasInterp, M>::Value>;
   BiasVecTy BiasVec = BiasRef.Buf.template Load<BiasVecTy>(BiasRef.Offset);
+
+  vector<OutputElTy, M> BiasVecConv;
+  ComponentEnum OutputCompType = __detail::TypeTraits<OutputElTy>::CompType;
+  __builtin_LinAlg_Convert(BiasVecConv, BiasVec, BiasInterp, OutputCompType);
+
   vector<OutputElTy, M> Result;
-  __builtin_LinAlg_MatrixVectorMultiplyAdd(Result, MatrixA.__handle,
-                                           hlsl::is_signed<OutputElTy>::value,
-                                           Vec, MatrixDT, BiasVec, BiasElTy);
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(
+      Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value, Vec,
+      __detail::TypeTraits<InputElTy>::CompType, BiasVecConv, OutputCompType);
   return Result;
 }
 
 template <typename OutputElTy, typename InputElTy, ComponentEnum InputInterp,
-          ComponentEnum BiasElTy, SIZE_TYPE M, SIZE_TYPE VecK, SIZE_TYPE K,
+          ComponentEnum BiasInterp, SIZE_TYPE M, SIZE_TYPE VecK, SIZE_TYPE K,
           ComponentEnum MatrixDT>
 // clang-format off
 typename hlsl::enable_if<
-    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value,
+    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value &&
+    __detail::TypeTraits<OutputElTy>::CompType == BiasInterp,
     vector<OutputElTy, M> >::type
 // clang-format on
 MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
             InterpretedVector<InputElTy, VecK, InputInterp> InterpVec,
-            VectorRef<BiasElTy, M> BiasRef) {
+            VectorRef<BiasInterp, M> BiasRef) {
+  using BiasOutputVecTy = vector<OutputElTy, M>;
+  BiasOutputVecTy BiasVec =
+      BiasRef.Buf.template Load<BiasOutputVecTy>(BiasRef.Offset);
+
+  vector<OutputElTy, M> Result;
+  __builtin_LinAlg_MatrixVectorMultiplyAdd(
+      Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value,
+      InterpVec.Data, InterpVec.Interpretation, BiasVec, BiasInterp);
+  return Result;
+}
+
+template <typename OutputElTy, typename InputElTy, ComponentEnum InputInterp,
+          ComponentEnum BiasInterp, SIZE_TYPE M, SIZE_TYPE VecK, SIZE_TYPE K,
+          ComponentEnum MatrixDT>
+// clang-format off
+typename hlsl::enable_if<
+    VecK == __detail::ScalarCountFromPackedComponents<InputInterp, K>::Value &&
+    __detail::TypeTraits<OutputElTy>::CompType != BiasInterp,
+    vector<OutputElTy, M> >::type
+// clang-format on
+MultiplyAdd(Matrix<MatrixDT, M, K, MatrixUse::A, MatrixScope::Thread> MatrixA,
+            InterpretedVector<InputElTy, VecK, InputInterp> InterpVec,
+            VectorRef<BiasInterp, M> BiasRef) {
   using BiasVecTy =
-      vector<typename __detail::ComponentTypeTraits<BiasElTy>::Type, M>;
+      vector<typename __detail::ComponentTypeTraits<BiasInterp>::Type,
+             __detail::ScalarCountFromPackedComponents<BiasInterp, M>::Value>;
   BiasVecTy BiasVec = BiasRef.Buf.template Load<BiasVecTy>(BiasRef.Offset);
+
+  ComponentEnum OutputCompType = __detail::TypeTraits<OutputElTy>::CompType;
+  vector<OutputElTy, M> BiasVecConv;
+  __builtin_LinAlg_Convert(BiasVecConv, BiasVec, BiasInterp, OutputCompType);
+
   vector<OutputElTy, M> Result;
   __builtin_LinAlg_MatrixVectorMultiplyAdd(
       Result, MatrixA.__handle, hlsl::is_signed<OutputElTy>::value,
-      InterpVec.Data, InterpVec.Interpretation, BiasVec, BiasElTy);
+      InterpVec.Data, InterpVec.Interpretation, BiasVecConv, OutputCompType);
   return Result;
 }
 
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl
index 58f19b887c..e952f2f721 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/api/vectors.hlsl
@@ -8,6 +8,7 @@ using MatrixATy = Matrix<ComponentType::F16, 8, 4, MatrixUse::A, MatrixScope::Th
 using MatrixAccum_8_8_Ty = Matrix<ComponentType::F16, 8, 8, MatrixUse::Accumulator, MatrixScope::Thread>;
 using MatrixAccum_8_4_Ty = Matrix<ComponentType::F16, 8, 4, MatrixUse::Accumulator, MatrixScope::Thread>;
 using Matrix_7_15_ATy = Matrix<ComponentType::F16, 7, 15, MatrixUse::A, MatrixScope::Thread>;
+using MatrixPacked_7_15_ATy = Matrix<ComponentType::F8_E4M3FN, 7, 15, MatrixUse::A, MatrixScope::Thread>;
 
 ByteAddressBuffer BAB : register(t0);
 
@@ -46,8 +47,10 @@ void main(uint ID : SV_GroupID) {
 
   // CHECK: %[[VEC_BIAS:.*]] = extractvalue %dx.types.ResRet.v8i16 %[[RAWLOAD]], 0
 
-  // CHECK: %[[VEC5:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8i16(i32 -2147483622,
-  // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x i16> %[[VEC_BIAS]], i32 2)
+  // CHECK: %[[BIAS_CONV:.*]] = call <8 x half> @dx.op.linAlgConvert.v8f16.v8i16(i32 -2147483618, <8 x i16> %[[VEC_BIAS]], i32 2, i32 8)
+  // CHECK-SAME: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+  // CHECK: %[[VEC5:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[BIAS_CONV]], i32 8)
   // CHECK-SAME:; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
   VectorRef<ComponentType::I16, 8> memBias = {BAB, 4096};
   vector<half, 8> vec5 = MultiplyAdd<half>(Mat1, interpVec2, memBias);
@@ -58,8 +61,10 @@ void main(uint ID : SV_GroupID) {
 
   // CHECK: %[[VEC_BIAS:.*]] = extractvalue %dx.types.ResRet.v8i16 %[[RAWLOAD]], 0
 
-  // CHECK: %[[VEC6:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8i16(i32 -2147483622,
-  // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x i16> %[[VEC_BIAS]], i32 2)
+  // CHECK: %[[BIAS_CONV:.*]] = call <8 x half> @dx.op.linAlgConvert.v8f16.v8i16(i32 -2147483618, <8 x i16> %[[VEC_BIAS]], i32 2, i32 8)
+  // CHECK-SAME: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+  // CHECK: %[[VEC6:.*]] = call <8 x half> @dx.op.linAlgMatVecMulAdd.v8f16.mC8M8N4U0S0.v4f16.v8f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC8M8N4U0S0 %[[MAT1]], i1 true, <4 x half> %[[VEC20]], i32 8, <8 x half> %[[BIAS_CONV]], i32 8)
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
   vector<half, 8> vec6 = MultiplyAdd<half>(Mat1, interpVec2, memBias);
 
@@ -77,13 +82,13 @@ void main(uint ID : SV_GroupID) {
   InterpretedVector<float, 8, ComponentType::F32> convertedVec;
   convertedVec = Convert<ComponentType::F32, ComponentType::F16>(vec6);
 
-  // CHECK: call <4 x i32> @dx.op.linAlgConvert.v4i32.v16f16(i32 -2147483618, <16 x half> %21, i32 8, i32 21)
+  // CHECK: call <4 x i32> @dx.op.linAlgConvert.v4i32.v16f16(i32 -2147483618, <16 x half> %{{[0-9]+}}, i32 8, i32 21)
   // CHECK: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
   typedef vector<half, 16> half16;
   half16 srcF16 = BAB.Load<half16>(128);
   InterpretedVector<uint, 4, ComponentEnum::F8_E4M3FN> convertedPacked = Convert<ComponentEnum::F8_E4M3FN, ComponentEnum::F16>(srcF16);
 
-  // CHECK: call <1 x i32> @dx.op.linAlgConvert.v1i32.v3f16(i32 -2147483618, <3 x half> %25, i32 8, i32 21)
+  // CHECK: call <1 x i32> @dx.op.linAlgConvert.v1i32.v3f16(i32 -2147483618, <3 x half> %{{[0-9]+}}, i32 8, i32 21)
   // CHECK-SAME: ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
   half3 ThreeF16 = BAB.Load<half3>(256);
   InterpretedVector<uint, 1, ComponentEnum::F8_E4M3FN> convertedPacked2 =
@@ -112,16 +117,16 @@ void main(uint ID : SV_GroupID) {
 
   // CHECK: %[[LOAD1:.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2)
   // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  // CHECK: %[[MEM_BIAS1:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD1]], 0
-  // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622,
-  // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %29, i32 8, <7 x half> %37, i32 8)
+  // CHECK-NEXT: %[[MEM_BIAS1:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD1]], 0
+  // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v15f16.v7f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8)
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
   VectorRef<ComponentType::F16, 7> memBias7 = {BAB, 512};
   vector<half, 7> vec9 = MultiplyAdd<half>(Mat_7_15, vecH15, memBias7);
 
   // CHECK: %[[LOAD2:.*]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2)
   // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  // CHECK: %[[MEM_BIAS2:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD2]], 0
+  // CHECK-NEXT: %[[MEM_BIAS2:.*]] = extractvalue %dx.types.ResRet.v7f16 %[[LOAD2]], 0
   // CHECK-NEXT: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %[[MEM_BIAS2]], i32 8)
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
   vector<half, 7> vec10 = MultiplyAdd<half>(Mat_7_15, interpVecH15, memBias7);
@@ -133,15 +138,51 @@ void main(uint ID : SV_GroupID) {
   InterpretedVector<uint, 4, ComponentEnum::F8_E4M3FN> interpVecH15Packed = Convert<ComponentEnum::F8_E4M3FN, ComponentEnum::F16>(vecH15);
 
   // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622,
-  // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %43, i32 21, <7 x half> %31, i32 8)
+  // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %{{[0-9]+}}, i32 21, <7 x half> %{{[0-9]+}}, i32 8)
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
   vector<half, 7> vec11 = MultiplyAdd<half>(Mat_7_15, interpVecH15Packed, vecH7);
 
-  // CHECK: %[[LOAD3:.+]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %45, i32 512, i32 undef, i32 2)
+  // CHECK: %[[LOAD3:.+]] = call %dx.types.ResRet.v7f16 @dx.op.rawBufferVectorLoad.v7f16(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 2)
   // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  // CHECK-NEXT: %[[MEM_BIAS3:.*]] = extractvalue %dx.types.ResRet.v7f16 %46, 0
+  // CHECK-NEXT: %[[MEM_BIAS3:.*]] = extractvalue %dx.types.ResRet.v7f16 %{{[0-9]+}}, 0
   // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC8M7N15U0S0.v4i32.v7f16(i32 -2147483622,
   // CHECK-SAME: %dx.types.LinAlgMatrixC8M7N15U0S0 %[[MAT_7_15]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS3]], i32 8)
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
    vector<half, 7> vec12 = MultiplyAdd<half>(Mat_7_15, interpVecH15Packed, memBias7);
+
+  // Test Convert and MultiplyAdd with odd sizes and packed types
+
+  // CHECK: %[[MAT_7_15_PACKED:.*]] = call %dx.types.LinAlgMatrixC21M7N15U0S0 @dx.op.linAlgMatrixLoadFromDescriptor.mC21M7N15U0S0(i32 -2147483634,
+  // CHECK-SAME: %dx.types.Handle %{{[0-9]+}}, i32 0, i32 16, i32 1, i32 128)  ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align)
+  MatrixPacked_7_15_ATy Mat_7_15_Packed = MatrixPacked_7_15_ATy::Load<MatrixLayoutEnum::ColMajor>(BAB, 0, 16);
+
+  // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC21M7N15U0S0.v15f16.v7f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC21M7N15U0S0 %[[MAT_7_15_PACKED]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %{{[0-9]+}}, i32 8)
+  // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
+  vector<half, 7> vec21 = MultiplyAdd<half>(Mat_7_15_Packed, vecH15, vecH7);
+
+  // CHECK: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC21M7N15U0S0.v4i32.v7f16(i32 -2147483622, %dx.types.LinAlgMatrixC21M7N15U0S0 %[[MAT_7_15_PACKED]],
+  // CHECK-SAME: i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %{{[0-9]+}}, i32 8)
+  // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
+  vector<half, 7> vec22 = MultiplyAdd<half>(Mat_7_15_Packed, interpVecH15Packed, vecH7);
+
+  // CHECK: %[[LOAD4:.*]] = call %dx.types.ResRet.v2i32 @dx.op.rawBufferVectorLoad.v2i32(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 4)
+  // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
+  // CHECK-NEXT: %[[MEM_BIAS_PACKED1:.*]] = extractvalue %dx.types.ResRet.v2i32 %[[LOAD4]], 0
+  // CHECK-NEXT: %[[MEM_BIAS_CONV1:.*]] = call <7 x half> @dx.op.linAlgConvert.v7f16.v2i32(i32 -2147483618,
+  // CHECK-SAME: <2 x i32> %[[MEM_BIAS_PACKED1]], i32 21, i32 8)  ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+  // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC21M7N15U0S0.v15f16.v7f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC21M7N15U0S0 %[[MAT_7_15_PACKED]], i1 true, <15 x half> %{{[0-9]+}}, i32 8, <7 x half> %[[MEM_BIAS_CONV1]], i32 8)
+  // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
+  VectorRef<ComponentType::F8_E4M3FN, 7> memBias7Packed = {BAB, 512};
+  vector<half, 7> vec23 = MultiplyAdd<half>(Mat_7_15_Packed, vecH15, memBias7Packed);
+
+  // CHECK: %[[LOAD5:.*]] = call %dx.types.ResRet.v2i32 @dx.op.rawBufferVectorLoad.v2i32(i32 303, %dx.types.Handle %{{[0-9]+}}, i32 512, i32 undef, i32 4)
+  // CHECK-SAME: ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
+  // CHECK-NEXT: %[[MEM_BIAS_PACKED2:.*]] = extractvalue %dx.types.ResRet.v2i32 %[[LOAD5]], 0
+  // CHECK-NEXT: %[[MEM_BIAS_CONV2:.*]] = call <7 x half> @dx.op.linAlgConvert.v7f16.v2i32(i32 -2147483618, <2 x i32> %[[MEM_BIAS_PACKED2]], i32 21, i32 8)  ; LinAlgConvert(inputVector,inputInterpretation,outputInterpretation)
+  // CHECK-NEXT: call <7 x half> @dx.op.linAlgMatVecMulAdd.v7f16.mC21M7N15U0S0.v4i32.v7f16(i32 -2147483622,
+  // CHECK-SAME: %dx.types.LinAlgMatrixC21M7N15U0S0 %[[MAT_7_15_PACKED]], i1 true, <4 x i32> %[[INTERP_VEC_H15_PACKED]], i32 21, <7 x half> %[[MEM_BIAS_CONV2]], i32 8)
+  // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,isOutputSigned,inputVector,inputInterpretation,biasVector,biasInterpretation)
+  vector<half, 7> vec24 = MultiplyAdd<half>(Mat_7_15_Packed, interpVecH15Packed, memBias7Packed);
 }