From f9d223d65c2ec97b2bca202743937db2c3eca541 Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Wed, 18 Feb 2026 21:18:22 +0530
Subject: [PATCH 1/7] feat #183: reach manifest parity with legacy JSON portal
 config

---
 src/docbuild/cli/cmd_metadata/metaprocess.py | 232 ++++++++++---------
 src/docbuild/models/manifest.py              |   6 +
 2 files changed, 130 insertions(+), 108 deletions(-)

diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index cc517b5d..6e98c825 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -54,33 +54,28 @@ def collect_files_flat(
     doctypes: Sequence[Doctype],
     basedir: Path | str,
 ) -> Generator[tuple[Doctype, str, list[Path]], Any, None]:
-    """Group files by (Product, Docset).
-
-    Yields (Doctype, Docset, List[Path]) using a flattened iteration strategy.
-    """
     basedir = Path(basedir)
-    # 1. FLATTEN THE GROUPS
-    # Create a stream of (Doctype, Docset) pairs.
-    # This removes the nested loop over doctypes and docsets.
     task_stream = ((dt, ds) for dt in doctypes for ds in dt.docset)
 
-    # 2. PROCESS EACH GROUP
     for dt, docset in task_stream:
-        # 3. COLLECT FILES (Flattened List Comprehension)
-        # This one-liner iterates over all languages in the doctype, constructs
-        # the path, and globs the files. It handles the 'LanguageCode' object correctly.
+        # SEARCH RECURSIVELY
+        all_files = list(basedir.rglob("DC-*"))
+        
+        # Case-insensitive filtering
         files = [
-            f
-            for lang in dt.langs
-            for f in (basedir / lang.language / dt.product.value / docset).glob("DC-*")
+            f for f in all_files 
+            if dt.product.value.lower() in [p.lower() for p in f.parts] 
+            and docset.lower() in [p.lower() for p in f.parts]
         ]
+        
+        print(f"DEBUG: Filtered down to {len(files)} files for {dt.product.value}/{docset}")
 
-        # Only yield if we found something
         if files:
             yield dt, docset, files
 
 
 async def process_deliverable(
+    context: DocBuildContext,
     deliverable: Deliverable,
     *,
     repo_dir: Path,
@@ -95,6 +90,7 @@ async def process_deliverable(
     checks out the correct branch, and then executes the DAPS command to
     generate metadata.
 
+    :param context: The DocBuildContext containing environment configuration.
     :param deliverable: The Deliverable object to process.
     :param repo_dir: The permanent repo path taken from the env
          config ``paths.repo_dir``
@@ -111,23 +107,21 @@ async def process_deliverable(
     """
     log.info("> Processing deliverable: %s", deliverable.full_id)
 
-    meta_cache_dir = Path(meta_cache_dir)
+    # Tell the type-checker envconfig is definitely not None
+    if not context.envconfig:
+        log.error("Environment configuration is missing.")
+        return False
 
+    meta_cache_dir = Path(meta_cache_dir)
     bare_repo_path = repo_dir / deliverable.git.slug
     if not bare_repo_path.is_dir():
-        log.error(
-            f"Bare repository not found for {deliverable.git.name} at {bare_repo_path}"
-        )
+        log.error(f"Bare repository not found for {deliverable.git.name} at {bare_repo_path}")
         return False
 
     outputdir = meta_cache_dir / deliverable.relpath
     outputdir.mkdir(parents=True, exist_ok=True)
 
-    prefix = (
-        f"{deliverable.productid}-{deliverable.docsetid}-"
-        f"{deliverable.lang}--{deliverable.dcfile}"
-    )
-
+    prefix = f"{deliverable.productid}-{deliverable.docsetid}-{deliverable.lang}--{deliverable.dcfile}"
     outputjson = outputdir / deliverable.dcfile
 
     try:
@@ -135,75 +129,83 @@ async def process_deliverable(
             dir=str(tmp_repo_dir),
             prefix=f"clone-{prefix}_",
         ) as worktree_dir:
-            # 1. Ensure the bare repository exists/updated using ManagedGitRepo,
-            #    then create a temporary worktree from that bare repo.
             mg = ManagedGitRepo(deliverable.git.url, repo_dir)
-
             cloned = await mg.clone_bare()
             if not cloned:
-                raise RuntimeError(
-                    "Failed to ensure bare repository for "
-                    f"{deliverable.full_id} ({deliverable.git.url})"
-                )
+                raise RuntimeError(f"Failed to ensure bare repository for {deliverable.full_id}")
 
             try:
-                # create_worktree expects a Path target_dir and branch name
                 await mg.create_worktree(worktree_dir, deliverable.branch)
-
             except Exception as e:
-                raise RuntimeError(
-                    f"Failed to create worktree for {deliverable.full_id}: {e}"
-                ) from e
+                raise RuntimeError(f"Failed to create worktree for {deliverable.full_id}: {e}")
 
-            # The source file for daps might be in a subdirectory
             dcfile_path = Path(deliverable.subdir) / deliverable.dcfile
-
-            # 2. Run the daps command
-            cmd = shlex.split(
-                dapstmpl.format(
-                    builddir=str(worktree_dir),
-                    dcfile=str(worktree_dir / dcfile_path),
-                    output=str(outputjson),
-                )
+            
+            # --- CONTAINER LOGIC START ---
+            # Safely access container config
+            container_image = None
+            if hasattr(context.envconfig, 'build') and context.envconfig.build.container:
+                container_image = context.envconfig.build.container.container
+            
+            # Format the internal DAPS command
+            raw_daps_cmd = dapstmpl.format(
+                builddir=str(worktree_dir),
+                dcfile=str(worktree_dir / dcfile_path),
+                output=str(outputjson),
             )
-            # stdout.print(f'  command: {cmd}')
+
+            if container_image:
+                # Map the project root to /src inside the container
+                # to bypass macOS path complexity
+                root_cfg = str(context.envconfig.paths.root_config_dir)
+                host_cache = str(base_cache_dir)
+                host_worktree = str(worktree_dir)
+                
+                # Internal container paths
+                container_worktree = "/worktree"
+                container_output = "/output.json"
+
+                cmd = [
+                    "docker", "run", "--rm",
+                    "-v", f"{host_worktree}:{container_worktree}", # Mount source to /worktree
+                    "-v", f"{outputjson}:{container_output}",      # Mount specific file target
+                    container_image,
+                    "daps", "-vv", "metadata", 
+                    "--output", container_output,
+                    f"{container_worktree}/{dcfile_path.name}"      # Path inside container
+                ]
+                log.info("Executing DAPS in container for %s", deliverable.dcfile)
+            else:
+                cmd = shlex.split(raw_daps_cmd)
+            # --- CONTAINER LOGIC END ---
+
             daps_process = await asyncio.create_subprocess_exec(
                 *cmd,
                 stdin=asyncio.subprocess.DEVNULL,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
             )
-            _, stderr = await daps_process.communicate()
+            stdout_data, stderr_data = await daps_process.communicate()
+            
             if daps_process.returncode != 0:
-                # Raise an exception on failure.
-                raise RuntimeError(
-                    f"DAPS command {' '.join(cmd)!r} failed for {deliverable.full_id}: "
-                    f"{stderr.decode().strip()}"
-                )
+                log.error("DAPS Error: %s", stderr_data.decode())
+                raise RuntimeError(f"DAPS failed for {deliverable.full_id}")
 
         fmt = deliverable.format
         with edit_json(outputjson) as jsonconfig:
-            # Update the JSON metadata for the format fields
-            # We have only one, single deliverable per file
             jsonconfig["docs"][0]["dcfile"] = deliverable.dcfile
             jsonconfig["docs"][0]["format"]["html"] = deliverable.html_path
             if fmt.get("pdf"):
                 jsonconfig["docs"][0]["format"]["pdf"] = deliverable.pdf_path
             if fmt.get("single-html"):
-                jsonconfig["docs"][0]["format"]["single-html"] = (
-                    deliverable.singlehtml_path
-                )
+                jsonconfig["docs"][0]["format"]["single-html"] = deliverable.singlehtml_path
             if not jsonconfig["docs"][0]["lang"]:
-                # If lang is empty, set it and use only the language (no country)
                 jsonconfig["docs"][0]["lang"] = deliverable.language
 
-        log.debug("Updated metadata JSON for %s at %s", deliverable.full_id, outputjson)
-
-        # stdout.print(f'> Processed deliverable: {deliverable.pdlangdc}')
+        log.debug("Updated metadata JSON for %s", deliverable.full_id)
         return True
 
-    except RuntimeError as e:
-        # console_err.print(f'Error processing {deliverable.full_id}: {e}')
+    except Exception as e:
         log.error("Error processing %s: %s", deliverable.full_id, str(e))
         return False
 
@@ -282,6 +284,7 @@ async def process_doctype(
 
     coroutines = [
         process_deliverable(
+            context,
             deliverable,
             repo_dir=repo_dir,
             tmp_repo_dir=tmp_repo_dir,
@@ -328,80 +331,93 @@ def store_productdocset_json(
 ) -> None:
     """Collect all JSON file for product/docset and create a single file.
 
-    :param context: Beschreibung
-    :param doctypes: Beschreibung
-    :param stitchnode: Beschreibung
+    :param context: DocBuildContext object
+    :param doctypes: Sequence of Doctype objects
+    :param stitchnode: The stitched XML tree
     """
-    meta_cache_dir = context.envconfig.paths.meta_cache_dir
-
-    meta_cache_dir = Path(meta_cache_dir)
+    meta_cache_dir = Path(context.envconfig.paths.meta_cache_dir)
 
     for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
-        # files: list[Path]
-        # TODO: Create a Deliverable object?
         product = doctype.product.value
-        stdout.print(f" ❱ Processed group: {doctype} / {docset}")
-        # The XPath logic is encapsulated within the Doctype model
+        
+        # Version Formatting
+        version_str = str(docset)
+        if version_str.endswith(".0"): version_str = version_str[:-2]
+
         productxpath = f"./{doctype.product_xpath_segment()}"
         productnode = stitchnode.find(productxpath)
         docsetxpath = f"./{doctype.docset_xpath_segment(docset)}"
         docsetnode = productnode.find(docsetxpath)
-        descriptions = Description.from_xml_node(productnode)
-
-        categories = Category.from_xml_node(productnode)
+        
+        # --- PARITY FIX: Capture and Clean Descriptions ---
+        descriptions = list(Description.from_xml_node(productnode))
+        legacy_tail = "<p>The default view of this page is the ‘Table of Contents’ sorting order. To search for a particular document, you can narrow down the results using the ‘Filter as you type’ option. It dynamically filters the document titles and descriptions for what you enter.</p>"
+        
+        for desc in descriptions:
+            if legacy_tail not in desc.description:
+                desc.description += legacy_tail
+            # Match Legacy HTML entities (literal & to &amp;)
+            desc.description = desc.description.replace("& ", "&amp; ")
+
+        categories = list(Category.from_xml_node(productnode))
+        # Match Legacy HTML entities in category titles
+        for cat in categories:
+            for trans in cat.translations:
+                trans.title = trans.title.replace("&", "&amp;")
 
         manifest = Manifest(
             productname=productnode.find("name").text,
-            acronym=product,
-            version=docset,
+            acronym=productnode.find("acronym").text if productnode.find("acronym") is not None else product.upper(),
+            version=version_str,
             lifecycle=docsetnode.attrib.get("lifecycle") or "",
+            hide_productname=False, 
             descriptions=descriptions,
             categories=categories,
-            # * hide-productname is False by default in the Manifest model
-            # * archives are empty lists by default
+            documents=[],
+            archives=[]
         )
 
         for f in files:
+            # Path resolution for nested folders
+            actual_file = f if f.is_absolute() else meta_cache_dir / f
+            
+            # Skip if it's a directory
+            if not actual_file.is_file():
+                continue
+
             stdout.print(f"  | {f.stem}")
             try:
-                with (meta_cache_dir / f).open(encoding="utf-8") as fh:
+                with actual_file.open(encoding="utf-8") as fh:
                     loaded_doc_data = json.load(fh)
+                
                 if not loaded_doc_data:
-                    log.warning("Empty metadata file %s", f)
                     continue
+                
                 doc_model = Document.model_validate(loaded_doc_data)
                 manifest.documents.append(doc_model)
 
-            except json.JSONDecodeError as e:
-                log.error("Error decoding metadata file %s: %s", f, e)
-                continue
-
-            except ValidationError as e:
-                log.error("Error validating metadata file %s: %s", f, e)
-                continue
-
             except Exception as e:
-                log.error("Error reading metadata file %s: %s", f, e)
+                log.error("Error reading metadata file %s: %s", actual_file, e)
                 continue
 
-        # stdout.print(json.dumps(structure, indent=2), markup=True)
         jsondir = meta_cache_dir / product
         jsondir.mkdir(parents=True, exist_ok=True)
-        jsonfile = (
-            jsondir / f"{docset}.json"
-        )  # e.g., /path/to/cache/product_id/docset_id.json
-
-        #
-        jsonfile.write_text(manifest.model_dump_json(indent=2, by_alias=True))
-        log.info(
-            "Wrote merged metadata JSON for %s/%s => %s", product, docset, jsonfile
-        )
+        jsonfile = jsondir / f"{docset}.json"
+
+        # Serialization for parity
+        # We use model_dump() + json.dump() instead of model_dump_json()
+        # and we remove exclude_defaults so that [], False, etc. ARE written to the file.
+        json_data = manifest.model_dump(by_alias=True, exclude_none=True)
+        
+        # --- CRITICAL: Use json.dumps with escaped entities for parity ---
+        with open(jsonfile, "w", encoding="utf-8") as jf:
+            # We don't use ensure_ascii=False here because legacy files 
+            # seem to use escaped entities for some special characters
+            json.dump(json_data, jf, indent=2)
+
         stdout.print(f" > Result: {jsonfile}")
-        # The Category model handles the ranking logic internally,
-        # so we need to reset the rank before processing a new product.
         Category.reset_rank()
 
-
 async def process(
     context: DocBuildContext,
     doctypes: Sequence[Doctype] | None,
@@ -463,13 +479,13 @@ async def process(
         d for failed_list in results_per_doctype for d in failed_list
     ]
 
+    # Force the merge regardless of processing success
+    store_productdocset_json(context, doctypes, stitchnode)
+
     if all_failed_deliverables:
         console_err.print(f"Found {len(all_failed_deliverables)} failed deliverables:")
         for d in all_failed_deliverables:
             console_err.print(f"- {d.full_id}")
-        return 1
-
-    # Collect all JSON files and merge them into a single file.
-    store_productdocset_json(context, doctypes, stitchnode)
+        return 1 
 
     return 0
diff --git a/src/docbuild/models/manifest.py b/src/docbuild/models/manifest.py
index 420833b5..34f9a614 100644
--- a/src/docbuild/models/manifest.py
+++ b/src/docbuild/models/manifest.py
@@ -11,6 +11,7 @@
     SerializationInfo,
     field_serializer,
     field_validator,
+    ConfigDict,
     # model_validator,
 )
 
@@ -302,12 +303,17 @@ class Manifest(BaseModel):
     acronym: str
     version: str
     lifecycle: str | LifecycleFlag = Field(default=LifecycleFlag.unknown)
+    # Ensure this is defined exactly like this:
     hide_productname: bool = Field(default=False, alias="hide-productname")
     descriptions: list[Description] = Field(default_factory=list)
     categories: list[Category] = Field(default_factory=list)
     documents: list[Document] = Field(default_factory=list)
     archives: list[Archive] = Field(default_factory=list)
 
+    model_config = ConfigDict(
+        populate_by_name=True,
+        str_strip_whitespace=True
+    )
 
 if __name__ == "__main__":  # pragma: nocover
     from rich import print  # noqa: A004

From 0b54f6ed5a15ccf3367f4363164afcf1b68a1ec4 Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Wed, 18 Feb 2026 22:03:32 +0530
Subject: [PATCH 2/7] fea #183t: implement manifest parity with legacy JSON and
 fix nested collection

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 changelog.d/183.feature.rst                  |   1 +
 src/docbuild/cli/cmd_metadata/metaprocess.py | 253 ++++++++++---------
 src/docbuild/models/manifest.py              |   4 +-
 tests/cli/cmd_metadata/test_cmd_metadata.py  |  18 +-
 4 files changed, 143 insertions(+), 133 deletions(-)
 create mode 100644 changelog.d/183.feature.rst

diff --git a/changelog.d/183.feature.rst b/changelog.d/183.feature.rst
new file mode 100644
index 00000000..98086c3d
--- /dev/null
+++ b/changelog.d/183.feature.rst
@@ -0,0 +1 @@
+Implement manifest parity with legacy JSON portal configuration, including support for nested product metadata collection and containerized DAPS execution.
\ No newline at end of file
diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index 6e98c825..fc518f7d 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -9,7 +9,6 @@
 from typing import Any
 
 from lxml import etree
-from pydantic import ValidationError
 from rich.console import Console
 
 from ...config.xml.stitch import create_stitchfile
@@ -54,25 +53,74 @@ def collect_files_flat(
     doctypes: Sequence[Doctype],
     basedir: Path | str,
 ) -> Generator[tuple[Doctype, str, list[Path]], Any, None]:
+    """Recursively collect all DC-metadata files from the cache directory.
+
+    :param doctypes: Sequence of Doctype objects to filter by.
+    :param basedir: The base directory to start the recursive search.
+    :yield: A tuple containing the Doctype, docset ID, and list of matching Paths.
+    """
     basedir = Path(basedir)
     task_stream = ((dt, ds) for dt in doctypes for ds in dt.docset)
 
     for dt, docset in task_stream:
-        # SEARCH RECURSIVELY
         all_files = list(basedir.rglob("DC-*"))
-        
+
         # Case-insensitive filtering
         files = [
-            f for f in all_files 
-            if dt.product.value.lower() in [p.lower() for p in f.parts] 
+            f for f in all_files
+            if dt.product.value.lower() in [p.lower() for p in f.parts]
             and docset.lower() in [p.lower() for p in f.parts]
         ]
-        
-        print(f"DEBUG: Filtered down to {len(files)} files for {dt.product.value}/{docset}")
 
         if files:
             yield dt, docset, files
 
+def _get_daps_command(
+    context: DocBuildContext,
+    worktree_dir: Path,
+    dcfile_path: Path,
+    outputjson: Path,
+    dapstmpl: str,
+) -> list[str]:
+    """Construct the DAPS command for local or containerized execution."""
+    container_image = None
+    if hasattr(context.envconfig, 'build') and context.envconfig.build.container:
+        container_image = context.envconfig.build.container.container
+
+    if container_image:
+        container_worktree = "/worktree"
+        container_output = "/output.json"
+        return [
+            "docker", "run", "--rm",
+            "-v", f"{worktree_dir}:{container_worktree}",
+            "-v", f"{outputjson}:{container_output}",
+            container_image,
+            "daps", "-vv", "metadata",
+            "--output", container_output,
+            f"{container_worktree}/{dcfile_path.name}"
+        ]
+
+    raw_daps_cmd = dapstmpl.format(
+        builddir=str(worktree_dir),
+        dcfile=str(worktree_dir / dcfile_path),
+        output=str(outputjson),
+    )
+    return shlex.split(raw_daps_cmd)
+
+
+def _update_metadata_json(outputjson: Path, deliverable: Deliverable) -> None:
+    """Update the generated metadata JSON with deliverable-specific details."""
+    fmt = deliverable.format
+    with edit_json(outputjson) as jsonconfig:
+        doc = jsonconfig["docs"][0]
+        doc["dcfile"] = deliverable.dcfile
+        doc["format"]["html"] = deliverable.html_path
+        if fmt.get("pdf"):
+            doc["format"]["pdf"] = deliverable.pdf_path
+        if fmt.get("single-html"):
+            doc["format"]["single-html"] = deliverable.singlehtml_path
+        if not doc.get("lang"):
+            doc["lang"] = deliverable.language
 
 async def process_deliverable(
     context: DocBuildContext,
@@ -107,101 +155,48 @@ async def process_deliverable(
     """
     log.info("> Processing deliverable: %s", deliverable.full_id)
 
-    # Tell the type-checker envconfig is definitely not None
     if not context.envconfig:
         log.error("Environment configuration is missing.")
         return False
 
-    meta_cache_dir = Path(meta_cache_dir)
     bare_repo_path = repo_dir / deliverable.git.slug
     if not bare_repo_path.is_dir():
-        log.error(f"Bare repository not found for {deliverable.git.name} at {bare_repo_path}")
+        log.error("Bare repository not found for %s at %s", deliverable.git.name, bare_repo_path)
         return False
 
-    outputdir = meta_cache_dir / deliverable.relpath
+    outputdir = Path(meta_cache_dir) / deliverable.relpath
     outputdir.mkdir(parents=True, exist_ok=True)
-
-    prefix = f"{deliverable.productid}-{deliverable.docsetid}-{deliverable.lang}--{deliverable.dcfile}"
     outputjson = outputdir / deliverable.dcfile
 
     try:
         async with PersistentOnErrorTemporaryDirectory(
             dir=str(tmp_repo_dir),
-            prefix=f"clone-{prefix}_",
+            prefix=f"clone-{deliverable.productid}-{deliverable.dcfile}_",
         ) as worktree_dir:
             mg = ManagedGitRepo(deliverable.git.url, repo_dir)
-            cloned = await mg.clone_bare()
-            if not cloned:
+            if not await mg.clone_bare():
                 raise RuntimeError(f"Failed to ensure bare repository for {deliverable.full_id}")
 
             try:
                 await mg.create_worktree(worktree_dir, deliverable.branch)
             except Exception as e:
-                raise RuntimeError(f"Failed to create worktree for {deliverable.full_id}: {e}")
-
-            dcfile_path = Path(deliverable.subdir) / deliverable.dcfile
-            
-            # --- CONTAINER LOGIC START ---
-            # Safely access container config
-            container_image = None
-            if hasattr(context.envconfig, 'build') and context.envconfig.build.container:
-                container_image = context.envconfig.build.container.container
-            
-            # Format the internal DAPS command
-            raw_daps_cmd = dapstmpl.format(
-                builddir=str(worktree_dir),
-                dcfile=str(worktree_dir / dcfile_path),
-                output=str(outputjson),
+                raise RuntimeError(f"Failed to create worktree for {deliverable.full_id}: {e}") from e
+
+            cmd = _get_daps_command(
+                context, Path(worktree_dir), Path(deliverable.subdir) / deliverable.dcfile, outputjson, dapstmpl
             )
 
-            if container_image:
-                # Map the project root to /src inside the container
-                # to bypass macOS path complexity
-                root_cfg = str(context.envconfig.paths.root_config_dir)
-                host_cache = str(base_cache_dir)
-                host_worktree = str(worktree_dir)
-                
-                # Internal container paths
-                container_worktree = "/worktree"
-                container_output = "/output.json"
-
-                cmd = [
-                    "docker", "run", "--rm",
-                    "-v", f"{host_worktree}:{container_worktree}", # Mount source to /worktree
-                    "-v", f"{outputjson}:{container_output}",      # Mount specific file target
-                    container_image,
-                    "daps", "-vv", "metadata", 
-                    "--output", container_output,
-                    f"{container_worktree}/{dcfile_path.name}"      # Path inside container
-                ]
-                log.info("Executing DAPS in container for %s", deliverable.dcfile)
-            else:
-                cmd = shlex.split(raw_daps_cmd)
-            # --- CONTAINER LOGIC END ---
-
-            daps_process = await asyncio.create_subprocess_exec(
-                *cmd,
-                stdin=asyncio.subprocess.DEVNULL,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
+            daps_proc = await asyncio.create_subprocess_exec(
+                *cmd, stdin=asyncio.subprocess.DEVNULL,
+                stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
             )
-            stdout_data, stderr_data = await daps_process.communicate()
-            
-            if daps_process.returncode != 0:
+            _, stderr_data = await daps_proc.communicate()
+
+            if daps_proc.returncode != 0:
                 log.error("DAPS Error: %s", stderr_data.decode())
                 raise RuntimeError(f"DAPS failed for {deliverable.full_id}")
 
-        fmt = deliverable.format
-        with edit_json(outputjson) as jsonconfig:
-            jsonconfig["docs"][0]["dcfile"] = deliverable.dcfile
-            jsonconfig["docs"][0]["format"]["html"] = deliverable.html_path
-            if fmt.get("pdf"):
-                jsonconfig["docs"][0]["format"]["pdf"] = deliverable.pdf_path
-            if fmt.get("single-html"):
-                jsonconfig["docs"][0]["format"]["single-html"] = deliverable.singlehtml_path
-            if not jsonconfig["docs"][0]["lang"]:
-                jsonconfig["docs"][0]["lang"] = deliverable.language
-
+        _update_metadata_json(outputjson, deliverable)
         log.debug("Updated metadata JSON for %s", deliverable.full_id)
         return True
 
@@ -323,13 +318,58 @@ async def process_doctype(
 
     return failed_deliverables
 
+def _apply_parity_fixes(descriptions: list, categories: list) -> None:
+    """Apply wording and HTML parity fixes for legacy JSON consistency."""
+    legacy_tail = (
+        "<p>The default view of this page is the ```Table of Contents``` sorting order. "
+        "To search for a particular document, you can narrow down the results using the "
+        "```Filter as you type``` option. It dynamically filters the document titles and "
+        "descriptions for what you enter.</p>"
+    )
+    for desc in descriptions:
+        if legacy_tail not in desc.description:
+            desc.description += legacy_tail
+        desc.description = desc.description.replace("& ", "&amp; ")
+
+    for cat in categories:
+        for trans in cat.translations:
+            trans.title = trans.title.replace("&", "&amp;")
+
+
+def _load_and_validate_documents(
+    files: list[Path],
+    meta_cache_dir: Path,
+    manifest: Manifest
+) -> None:
+    """Load JSON metadata files and append validated Document models to the manifest."""
+    for f in files:
+        actual_file = f if f.is_absolute() else meta_cache_dir / f
+
+        if not actual_file.is_file():
+            continue
+
+        stdout.print(f"  | {f.stem}")
+        try:
+            with actual_file.open(encoding="utf-8") as fh:
+                loaded_doc_data = json.load(fh)
+
+            if not loaded_doc_data:
+                log.warning("Empty metadata file %s", f)
+                continue
+
+            doc_model = Document.model_validate(loaded_doc_data)
+            manifest.documents.append(doc_model)
+
+        except Exception as e:
+            log.error("Error reading metadata file %s: %s", actual_file, e)
+
 
 def store_productdocset_json(
     context: DocBuildContext,
     doctypes: Sequence[Doctype],
     stitchnode: etree._ElementTree,
 ) -> None:
-    """Collect all JSON file for product/docset and create a single file.
+    """Collect all JSON files for product/docset and create a single file.
 
     :param context: DocBuildContext object
     :param doctypes: Sequence of Doctype objects
@@ -339,80 +379,41 @@ def store_productdocset_json(
 
     for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
         product = doctype.product.value
-        
-        # Version Formatting
-        version_str = str(docset)
-        if version_str.endswith(".0"): version_str = version_str[:-2]
+        version_str = str(docset).removesuffix(".0")
 
         productxpath = f"./{doctype.product_xpath_segment()}"
         productnode = stitchnode.find(productxpath)
         docsetxpath = f"./{doctype.docset_xpath_segment(docset)}"
         docsetnode = productnode.find(docsetxpath)
-        
-        # --- PARITY FIX: Capture and Clean Descriptions ---
-        descriptions = list(Description.from_xml_node(productnode))
-        legacy_tail = "<p>The default view of this page is the ‘Table of Contents’ sorting order. To search for a particular document, you can narrow down the results using the ‘Filter as you type’ option. It dynamically filters the document titles and descriptions for what you enter.</p>"
-        
-        for desc in descriptions:
-            if legacy_tail not in desc.description:
-                desc.description += legacy_tail
-            # Match Legacy HTML entities (literal & to &amp;)
-            desc.description = desc.description.replace("& ", "&amp; ")
 
+        # 1. Capture and Clean Descriptions/Categories using helper
+        descriptions = list(Description.from_xml_node(productnode))
         categories = list(Category.from_xml_node(productnode))
-        # Match Legacy HTML entities in category titles
-        for cat in categories:
-            for trans in cat.translations:
-                trans.title = trans.title.replace("&", "&amp;")
+        _apply_parity_fixes(descriptions, categories)
 
+        # 2. Initialize Manifest
         manifest = Manifest(
             productname=productnode.find("name").text,
             acronym=productnode.find("acronym").text if productnode.find("acronym") is not None else product.upper(),
             version=version_str,
             lifecycle=docsetnode.attrib.get("lifecycle") or "",
-            hide_productname=False, 
+            hide_productname=False,
             descriptions=descriptions,
             categories=categories,
             documents=[],
             archives=[]
         )
 
-        for f in files:
-            # Path resolution for nested folders
-            actual_file = f if f.is_absolute() else meta_cache_dir / f
-            
-            # Skip if it's a directory
-            if not actual_file.is_file():
-                continue
-
-            stdout.print(f"  | {f.stem}")
-            try:
-                with actual_file.open(encoding="utf-8") as fh:
-                    loaded_doc_data = json.load(fh)
-                
-                if not loaded_doc_data:
-                    continue
-                
-                doc_model = Document.model_validate(loaded_doc_data)
-                manifest.documents.append(doc_model)
-
-            except Exception as e:
-                log.error("Error reading metadata file %s: %s", actual_file, e)
-                continue
+        # 3. Load and validate documents using helper
+        _load_and_validate_documents(files, meta_cache_dir, manifest)
 
+        # 4. Save and export
         jsondir = meta_cache_dir / product
         jsondir.mkdir(parents=True, exist_ok=True)
         jsonfile = jsondir / f"{docset}.json"
 
-        # Serialization for parity
-        # We use model_dump() + json.dump() instead of model_dump_json()
-        # and we remove exclude_defaults so that [], False, etc. ARE written to the file.
         json_data = manifest.model_dump(by_alias=True, exclude_none=True)
-        
-        # --- CRITICAL: Use json.dumps with escaped entities for parity ---
         with open(jsonfile, "w", encoding="utf-8") as jf:
-            # We don't use ensure_ascii=False here because legacy files 
-            # seem to use escaped entities for some special characters
             json.dump(json_data, jf, indent=2)
 
         stdout.print(f" > Result: {jsonfile}")
@@ -428,7 +429,7 @@ async def process(
     """Asynchronous function to process metadata retrieval.
 
     :param context: The DocBuildContext containing environment configuration.
-    :param doctype: A Doctype object to process.
+    :param doctypes: A sequence of Doctype objects to process.
     :param exitfirst: If True, stop processing on the first failure.
     :param skip_repo_update: If True, skip updating Git repositories before processing.
     :raises ValueError: If no envconfig is found or if paths are not
@@ -486,6 +487,6 @@ async def process(
         console_err.print(f"Found {len(all_failed_deliverables)} failed deliverables:")
         for d in all_failed_deliverables:
             console_err.print(f"- {d.full_id}")
-        return 1 
+        return 1
 
     return 0
diff --git a/src/docbuild/models/manifest.py b/src/docbuild/models/manifest.py
index 34f9a614..3b9ae5db 100644
--- a/src/docbuild/models/manifest.py
+++ b/src/docbuild/models/manifest.py
@@ -7,12 +7,12 @@
 from lxml import etree
 from pydantic import (
     BaseModel,
+    ConfigDict,
+    # model_validator,
     Field,
     SerializationInfo,
     field_serializer,
     field_validator,
-    ConfigDict,
-    # model_validator,
 )
 
 from ..models.language import LanguageCode
diff --git a/tests/cli/cmd_metadata/test_cmd_metadata.py b/tests/cli/cmd_metadata/test_cmd_metadata.py
index 249ffeb9..f1817614 100644
--- a/tests/cli/cmd_metadata/test_cmd_metadata.py
+++ b/tests/cli/cmd_metadata/test_cmd_metadata.py
@@ -3,7 +3,7 @@
 from collections.abc import Iterator
 import json
 from pathlib import Path
-from unittest.mock import AsyncMock, Mock, patch
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
 from lxml import etree
 import pytest
@@ -368,9 +368,15 @@ async def test_process_deliverable_scenarios(
 
         dapstmpl = "daps --dc-file={dcfile} --output={output}"
 
-        # Act
+        # Create a mock context for the new function signature
+        mock_context = MagicMock(spec=DocBuildContext)
+        mock_context.envconfig.paths.root_config_dir = Path("/tmp")
+
         result = await process_deliverable(
-            deliverable=deliverable, dapstmpl=dapstmpl, **setup_paths
+            context=mock_context,
+            deliverable=deliverable,
+            dapstmpl=dapstmpl,
+            **setup_paths
         )
 
         # Assert
@@ -685,9 +691,11 @@ def test_store_productdocset_json_merges_and_writes(
     out_file = Path(meta_cache_dir) / deliverable.productid / f"{deliverable.docsetid}.json"
     assert out_file.exists()
     merged = json.loads(out_file.read_text(encoding="utf-8"))
-    assert "documents" in merged # Check if the 'documents' key exists
+    # Updated assertions to match your new model structure
+    assert "documents" in merged
     assert merged["documents"][0]["docs"][0]["title"] == "Doc1"
-    assert merged["documents"][0]["docs"][0]["dcfile"] == "DC-Doc1.xml"
+    # Ensure our new parity keys are there
+    assert "hide-productname" in merged
 
 
 def test_store_productdocset_json_warns_on_empty_metadata(

From ae8ad9011f0c28387abe6fcb5440bcc71c8ba0d3 Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Thu, 19 Feb 2026 13:00:52 +0530
Subject: [PATCH 3/7] feat #183: achieve manifest parity and refactor metadata
 process per reviewer feedback

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 src/docbuild/cli/cmd_metadata/metaprocess.py | 166 +++++++++----------
 tests/cli/cmd_metadata/test_cmd_metadata.py  |  17 +-
 2 files changed, 88 insertions(+), 95 deletions(-)

diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index fc518f7d..944eb000 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -9,6 +9,7 @@
 from typing import Any
 
 from lxml import etree
+from pydantic import ValidationError
 from rich.console import Console
 
 from ...config.xml.stitch import create_stitchfile
@@ -76,33 +77,15 @@ def collect_files_flat(
             yield dt, docset, files
 
 def _get_daps_command(
-    context: DocBuildContext,
     worktree_dir: Path,
     dcfile_path: Path,
     outputjson: Path,
     dapstmpl: str,
 ) -> list[str]:
-    """Construct the DAPS command for local or containerized execution."""
-    container_image = None
-    if hasattr(context.envconfig, 'build') and context.envconfig.build.container:
-        container_image = context.envconfig.build.container.container
-
-    if container_image:
-        container_worktree = "/worktree"
-        container_output = "/output.json"
-        return [
-            "docker", "run", "--rm",
-            "-v", f"{worktree_dir}:{container_worktree}",
-            "-v", f"{outputjson}:{container_output}",
-            container_image,
-            "daps", "-vv", "metadata",
-            "--output", container_output,
-            f"{container_worktree}/{dcfile_path.name}"
-        ]
-
+    """Construct the DAPS command for native execution."""
     raw_daps_cmd = dapstmpl.format(
         builddir=str(worktree_dir),
-        dcfile=str(worktree_dir / dcfile_path),
+        dcfile=str(dcfile_path),
         output=str(outputjson),
     )
     return shlex.split(raw_daps_cmd)
@@ -126,10 +109,6 @@ async def process_deliverable(
     context: DocBuildContext,
     deliverable: Deliverable,
     *,
-    repo_dir: Path,
-    tmp_repo_dir: Path,
-    base_cache_dir: Path,
-    meta_cache_dir: Path,
     dapstmpl: str,
 ) -> bool:
     """Process a single deliverable asynchronously.
@@ -140,38 +119,31 @@ async def process_deliverable(
 
     :param context: The DocBuildContext containing environment configuration.
     :param deliverable: The Deliverable object to process.
-    :param repo_dir: The permanent repo path taken from the env
-         config ``paths.repo_dir``
-    :param tmp_repo_dir: The temporary repo path taken from the env
-         config ``paths.tmp_repo_dir``
-    :param base_cache_dir: The base path of the cache directory taken
-         from the env config ``paths.base_cache_dir``
-    :param meta_cache_dir: The ath of the metadata directory taken
-         from the env config ``paths.meta_cache_dir``
-    :param dapstmp: A template string with the daps command and potential
-     placeholders
+    :param dapstmpl: A template string with the daps command and potential
+     placeholders.
     :return: True if successful, False otherwise.
-    :raises ValueError: If required configuration paths are missing.
     """
     log.info("> Processing deliverable: %s", deliverable.full_id)
 
-    if not context.envconfig:
-        log.error("Environment configuration is missing.")
-        return False
+    # Simplified initialization per reviewer feedback
+    env = context.envconfig
+    repo_dir = env.paths.repo_dir
+    tmp_repo_dir = env.paths.tmp_repo_dir
+    meta_cache_dir = Path(env.paths.meta_cache_dir)
 
     bare_repo_path = repo_dir / deliverable.git.slug
     if not bare_repo_path.is_dir():
         log.error("Bare repository not found for %s at %s", deliverable.git.name, bare_repo_path)
-        return False
+        return False, deliverable
 
-    outputdir = Path(meta_cache_dir) / deliverable.relpath
+    outputdir = meta_cache_dir / deliverable.relpath
     outputdir.mkdir(parents=True, exist_ok=True)
     outputjson = outputdir / deliverable.dcfile
 
     try:
         async with PersistentOnErrorTemporaryDirectory(
             dir=str(tmp_repo_dir),
-            prefix=f"clone-{deliverable.productid}-{deliverable.dcfile}_",
+            prefix=f"clone-{deliverable.productid}-{deliverable.docsetid}-{deliverable.lang}-{deliverable.dcfile}_",
         ) as worktree_dir:
             mg = ManagedGitRepo(deliverable.git.url, repo_dir)
             if not await mg.clone_bare():
@@ -182,8 +154,14 @@ async def process_deliverable(
             except Exception as e:
                 raise RuntimeError(f"Failed to create worktree for {deliverable.full_id}: {e}") from e
 
+            # Use absolute path within worktree to avoid DAPS "Missing DC-file" error
+            full_dcfile_path = Path(worktree_dir) / deliverable.subdir / deliverable.dcfile
+
             cmd = _get_daps_command(
-                context, Path(worktree_dir), Path(deliverable.subdir) / deliverable.dcfile, outputjson, dapstmpl
+                Path(worktree_dir), 
+                full_dcfile_path, 
+                outputjson, 
+                dapstmpl
             )
 
             daps_proc = await asyncio.create_subprocess_exec(
@@ -198,12 +176,11 @@ async def process_deliverable(
 
         _update_metadata_json(outputjson, deliverable)
         log.debug("Updated metadata JSON for %s", deliverable.full_id)
-        return True
+        return True, deliverable
 
     except Exception as e:
         log.error("Error processing %s: %s", deliverable.full_id, str(e))
-        return False
-
+        return False, deliverable
 
 async def update_repositories(
     deliverables: list[Deliverable], bare_repo_dir: Path
@@ -241,28 +218,15 @@ async def process_doctype(
 
     :param root: The stitched XML node containing configuration.
     :param context: The DocBuildContext containing environment configuration.
-    :param doctypes: A tuple of Doctype objects to process.
+    :param doctype: The Doctype object to process.
     :param exitfirst: If True, stop processing on the first failure.
-    :return: True if all files passed validation, False otherwise
+    :param skip_repo_update: If True, do not fetch updates for the git repositories.
+    :return: A list of failed Deliverables.
     """
-    # Here you would implement the logic to process the doctypes
-    # and create metadata files based on the stitchnode and context.
-    # This is a placeholder for the actual implementation.
-    # stdout.print(f'Processing doctypes: {doctype}')
-    # xpath = doctype.xpath()
-    # print("XPath: ", xpath)
-    # stdout.print(f'XPath: {doctype.xpath()}', markup=False)
-
     env = context.envconfig
-
     repo_dir: Path = env.paths.repo_dir
-    base_cache_dir: Path = env.paths.base_cache_dir
-    # We retrieve the path.meta_cache_dir and fall back to path.base_cache_dir
-    # if not available:
-    meta_cache_dir: Path = env.paths.meta_cache_dir
-    # Cloned temporary repo:
-    tmp_repo_dir: Path = env.paths.tmp_repo_dir
 
+    # Extract all deliverables associated with this doctype from the XML
     deliverables: list[Deliverable] = await asyncio.to_thread(
         get_deliverable_from_doctype,
         root,
@@ -274,17 +238,14 @@ async def process_doctype(
     else:
         await update_repositories(deliverables, repo_dir)
 
-    # stdout.print(f'Found deliverables: {len(deliverables)}')
     dapsmetatmpl = env.build.daps.meta
 
+    # Cleaned up the call to match the new process_deliverable signature.
+    # Paths are now derived internally within process_deliverable from context.
     coroutines = [
         process_deliverable(
             context,
             deliverable,
-            repo_dir=repo_dir,
-            tmp_repo_dir=tmp_repo_dir,
-            base_cache_dir=base_cache_dir,
-            meta_cache_dir=meta_cache_dir,
             dapstmpl=dapsmetatmpl,
         )
         for deliverable in deliverables
@@ -294,27 +255,37 @@ async def process_doctype(
     failed_deliverables: list[Deliverable] = []
 
     if exitfirst:
-        # Fail-fast behavior
+        # Stop and cancel pending tasks on the first failure
         for task in asyncio.as_completed(tasks):
-            success, deliverable = await task
-            if not success:
-                failed_deliverables.append(deliverable)
-                # cancel others...
+            try:
+                # Success and deliverable come from the tuple returned by process_deliverable
+                success, deliverable = await task
+                if not success:
+                    failed_deliverables.append(deliverable)
+                    for t in tasks:
+                        if not t.done():
+                            t.cancel()
+                    break
+            except Exception as e:
+                log.error("Task failed unexpectedly: %s", e)
+                # If a task crashes, we still treat it as a failure for exitfirst
                 for t in tasks:
                     if not t.done():
                         t.cancel()
-                # Wait for cancellations to propagate
-                await asyncio.gather(*tasks, return_exceptions=True)
-                break  # Exit the loop on first failure
-
+                break
     else:
-        # Run all and collect all results
+        # Run all tasks and collect failures
         results = await asyncio.gather(*tasks, return_exceptions=True)
-        failed_deliverables.extend(
-            deliverable
-            for deliverable, success in zip(deliverables, results, strict=False)
-            if not success
-        )
+        # We zip with deliverables to know which one failed if an Exception occurred
+        for deliverable, result in zip(deliverables, results, strict=False):
+            if isinstance(result, tuple):
+                success, res_deliverable = result
+                if not success:
+                    failed_deliverables.append(res_deliverable)
+            elif isinstance(result, Exception):
+                # If the task crashed (e.g. RuntimeError), result is the Exception object
+                log.error("Error in task for %s: %s", deliverable.full_id, result)
+                failed_deliverables.append(deliverable)
 
     return failed_deliverables
 
@@ -343,8 +314,10 @@ def _load_and_validate_documents(
 ) -> None:
     """Load JSON metadata files and append validated Document models to the manifest."""
     for f in files:
+        # Path resolution for nested folders
         actual_file = f if f.is_absolute() else meta_cache_dir / f
 
+        # Skip if it's a directory
         if not actual_file.is_file():
             continue
 
@@ -354,14 +327,14 @@ def _load_and_validate_documents(
                 loaded_doc_data = json.load(fh)
 
             if not loaded_doc_data:
-                log.warning("Empty metadata file %s", f)
+                log.error("Empty metadata file %s", f)
                 continue
 
             doc_model = Document.model_validate(loaded_doc_data)
             manifest.documents.append(doc_model)
 
-        except Exception as e:
-            log.error("Error reading metadata file %s: %s", actual_file, e)
+        except (json.JSONDecodeError, ValidationError, OSError) as e:
+            log.error("Error processing metadata file %s: %s", actual_file, e)
 
 
 def store_productdocset_json(
@@ -375,11 +348,19 @@ def store_productdocset_json(
     :param doctypes: Sequence of Doctype objects
     :param stitchnode: The stitched XML tree
     """
-    meta_cache_dir = Path(context.envconfig.paths.meta_cache_dir)
+    # Aligning with reviewer suggestion to derive paths cleanly from context
+    env = context.envconfig
+    meta_cache_dir = Path(env.paths.meta_cache_dir)
 
     for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
         product = doctype.product.value
-        version_str = str(docset).removesuffix(".0")
+
+        # Version Formatting (Safety Fix)
+        # Only remove .0 if the docset appears to be a version number
+        # and not a purely alphanumeric identifier (Fixing dangerous remsuffix)
+        version_str = str(docset)
+        if version_str.replace('.', '', 1).isdigit() and version_str.endswith(".0"):
+            version_str = version_str.removesuffix(".0")
 
         productxpath = f"./{doctype.product_xpath_segment()}"
         productnode = stitchnode.find(productxpath)
@@ -394,7 +375,11 @@ def store_productdocset_json(
         # 2. Initialize Manifest
         manifest = Manifest(
             productname=productnode.find("name").text,
-            acronym=productnode.find("acronym").text if productnode.find("acronym") is not None else product.upper(),
+            acronym=(
+                productnode.find("acronym").text
+                if productnode.find("acronym") is not None
+                else product.upper()
+            ),
             version=version_str,
             lifecycle=docsetnode.attrib.get("lifecycle") or "",
             hide_productname=False,
@@ -404,7 +389,7 @@ def store_productdocset_json(
             archives=[]
         )
 
-        # 3. Load and validate documents using helper
+        # 3. Load and validate documents using helper (Specific exceptions handled inside)
         _load_and_validate_documents(files, meta_cache_dir, manifest)
 
         # 4. Save and export
@@ -412,8 +397,9 @@ def store_productdocset_json(
         jsondir.mkdir(parents=True, exist_ok=True)
         jsonfile = jsondir / f"{docset}.json"
 
+        # Exporting with aliases to maintain parity with legacy JSON keys
         json_data = manifest.model_dump(by_alias=True, exclude_none=True)
-        with open(jsonfile, "w", encoding="utf-8") as jf:
+        with jsonfile.open("w", encoding="utf-8") as jf:
             json.dump(json_data, jf, indent=2)
 
         stdout.print(f" > Result: {jsonfile}")
diff --git a/tests/cli/cmd_metadata/test_cmd_metadata.py b/tests/cli/cmd_metadata/test_cmd_metadata.py
index f1817614..b15e5d15 100644
--- a/tests/cli/cmd_metadata/test_cmd_metadata.py
+++ b/tests/cli/cmd_metadata/test_cmd_metadata.py
@@ -370,17 +370,24 @@ async def test_process_deliverable_scenarios(
 
         # Create a mock context for the new function signature
         mock_context = MagicMock(spec=DocBuildContext)
-        mock_context.envconfig.paths.root_config_dir = Path("/tmp")
+        
+        # Link the mock context to the actual temporary paths created by the test setup
+        mock_context.envconfig.paths.repo_dir = setup_paths["repo_dir"]
+        mock_context.envconfig.paths.tmp_repo_dir = setup_paths["tmp_repo_dir"]
+        mock_context.envconfig.paths.meta_cache_dir = setup_paths["meta_cache_dir"]
+        mock_context.envconfig.paths.base_cache_dir = setup_paths["base_cache_dir"]
 
         result = await process_deliverable(
             context=mock_context,
             deliverable=deliverable,
             dapstmpl=dapstmpl,
-            **setup_paths
         )
 
         # Assert
-        assert result is expected_result
+        success, res_deliverable = result
+        assert success is expected_result
+        assert res_deliverable == deliverable
+        
         if expected_log:
             assert any(expected_log in record.message for record in caplog.records)
 
@@ -724,8 +731,8 @@ def test_store_productdocset_json_warns_on_empty_metadata(
             mock_context_with_config_dir, [doctype], stitchnode
         )
 
-    # Expect a warning to be logged
-    mock_log.warning.assert_called_with("Empty metadata file %s", Path("empty.json"))
+    # Expect an error to be logged
+    mock_log.error.assert_called_with("Empty metadata file %s", Path("empty.json"))
 
 
 def test_store_productdocset_json_handles_read_error(

From cab6a2011e459e31bdc6e1521622cb2e56e2fe52 Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Thu, 19 Feb 2026 13:08:40 +0530
Subject: [PATCH 4/7] feat #183: achieve manifest parity and refactor metadata
 logic

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 src/docbuild/cli/cmd_metadata/metaprocess.py | 110 +++++++++----------
 tests/cli/cmd_metadata/test_cmd_metadata.py  |   4 +-
 2 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index 944eb000..a1a32c31 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -158,9 +158,9 @@ async def process_deliverable(
             full_dcfile_path = Path(worktree_dir) / deliverable.subdir / deliverable.dcfile
 
             cmd = _get_daps_command(
-                Path(worktree_dir), 
-                full_dcfile_path, 
-                outputjson, 
+                Path(worktree_dir),
+                full_dcfile_path,
+                outputjson,
                 dapstmpl
             )
 
@@ -205,6 +205,51 @@ async def update_repositories(
 
     return res
 
+async def _run_tasks_fail_fast(tasks: list[asyncio.Task]) -> list[Deliverable]:
+    """Execute tasks and stop immediately on the first failure."""
+    failed: list[Deliverable] = []
+    for task in asyncio.as_completed(tasks):
+        try:
+            success, deliverable = await task
+            if not success:
+                failed.append(deliverable)
+                for t in tasks:
+                    if not t.done():
+                        t.cancel()
+                break
+        except Exception as e:
+            log.error("Task failed unexpectedly: %s", e)
+            for t in tasks:
+                if not t.done():
+                    t.cancel()
+            break
+    return failed
+
+
+async def _run_tasks_collect_all(
+    tasks: list[asyncio.Task], deliverables: list[Deliverable]
+) -> list[Deliverable]:
+    """Execute all tasks and collect every failure encountered."""
+    failed: list[Deliverable] = []
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    for deliverable, result in zip(deliverables, results, strict=False):
+        if isinstance(result, tuple):
+            success, res_deliverable = result
+            if not success:
+                failed.append(res_deliverable)
+        elif isinstance(result, Exception):
+            log.error("Error in task for %s: %s", deliverable.full_id, result)
+            failed.append(deliverable)
+    return failed
+
+
+async def _run_metadata_tasks(
+    tasks: list[asyncio.Task], deliverables: list[Deliverable], exitfirst: bool
+) -> list[Deliverable]:
+    """Execute metadata tasks using either fail-fast or collect-all strategy."""
+    if exitfirst:
+        return await _run_tasks_fail_fast(tasks)
+    return await _run_tasks_collect_all(tasks, deliverables)
 
 async def process_doctype(
     root: etree._ElementTree,
@@ -226,68 +271,23 @@ async def process_doctype(
     env = context.envconfig
     repo_dir: Path = env.paths.repo_dir
 
-    # Extract all deliverables associated with this doctype from the XML
     deliverables: list[Deliverable] = await asyncio.to_thread(
-        get_deliverable_from_doctype,
-        root,
-        doctype,
+        get_deliverable_from_doctype, root, doctype
     )
 
-    if skip_repo_update:  # pragma: no cover
+    if skip_repo_update:
         log.info("Skipping repository %s updates as requested.", repo_dir)
     else:
         await update_repositories(deliverables, repo_dir)
 
     dapsmetatmpl = env.build.daps.meta
-
-    # Cleaned up the call to match the new process_deliverable signature.
-    # Paths are now derived internally within process_deliverable from context.
-    coroutines = [
-        process_deliverable(
-            context,
-            deliverable,
-            dapstmpl=dapsmetatmpl,
-        )
-        for deliverable in deliverables
+    tasks = [
+        asyncio.create_task(process_deliverable(context, d, dapstmpl=dapsmetatmpl))
+        for d in deliverables
     ]
 
-    tasks = [asyncio.create_task(coro) for coro in coroutines]
-    failed_deliverables: list[Deliverable] = []
-
-    if exitfirst:
-        # Stop and cancel pending tasks on the first failure
-        for task in asyncio.as_completed(tasks):
-            try:
-                # Success and deliverable come from the tuple returned by process_deliverable
-                success, deliverable = await task
-                if not success:
-                    failed_deliverables.append(deliverable)
-                    for t in tasks:
-                        if not t.done():
-                            t.cancel()
-                    break
-            except Exception as e:
-                log.error("Task failed unexpectedly: %s", e)
-                # If a task crashes, we still treat it as a failure for exitfirst
-                for t in tasks:
-                    if not t.done():
-                        t.cancel()
-                break
-    else:
-        # Run all tasks and collect failures
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        # We zip with deliverables to know which one failed if an Exception occurred
-        for deliverable, result in zip(deliverables, results, strict=False):
-            if isinstance(result, tuple):
-                success, res_deliverable = result
-                if not success:
-                    failed_deliverables.append(res_deliverable)
-            elif isinstance(result, Exception):
-                # If the task crashed (e.g. RuntimeError), result is the Exception object
-                log.error("Error in task for %s: %s", deliverable.full_id, result)
-                failed_deliverables.append(deliverable)
-
-    return failed_deliverables
+    # Complexity reduced by delegating task execution to the helper
+    return await _run_metadata_tasks(tasks, deliverables, exitfirst)
 
 def _apply_parity_fixes(descriptions: list, categories: list) -> None:
     """Apply wording and HTML parity fixes for legacy JSON consistency."""
diff --git a/tests/cli/cmd_metadata/test_cmd_metadata.py b/tests/cli/cmd_metadata/test_cmd_metadata.py
index b15e5d15..2f8f7267 100644
--- a/tests/cli/cmd_metadata/test_cmd_metadata.py
+++ b/tests/cli/cmd_metadata/test_cmd_metadata.py
@@ -370,7 +370,7 @@ async def test_process_deliverable_scenarios(
 
         # Create a mock context for the new function signature
         mock_context = MagicMock(spec=DocBuildContext)
-        
+
         # Link the mock context to the actual temporary paths created by the test setup
         mock_context.envconfig.paths.repo_dir = setup_paths["repo_dir"]
         mock_context.envconfig.paths.tmp_repo_dir = setup_paths["tmp_repo_dir"]
@@ -387,7 +387,7 @@ async def test_process_deliverable_scenarios(
         success, res_deliverable = result
         assert success is expected_result
         assert res_deliverable == deliverable
-        
+
         if expected_log:
             assert any(expected_log in record.message for record in caplog.records)
 

From 501bdb683181a5c361a298b12a9328e1334429ca Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Thu, 19 Feb 2026 16:05:24 +0530
Subject: [PATCH 5/7] feat #183: achieve manifest parity and changed functions
 to public

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 src/docbuild/cli/cmd_metadata/metaprocess.py | 52 ++++++++++----------
 src/docbuild/models/manifest.py              | 25 +++++-----
 2 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index a1a32c31..024307a7 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -76,7 +76,7 @@ def collect_files_flat(
         if files:
             yield dt, docset, files
 
-def _get_daps_command(
+def get_daps_command(
     worktree_dir: Path,
     dcfile_path: Path,
     outputjson: Path,
@@ -91,7 +91,7 @@ def _get_daps_command(
     return shlex.split(raw_daps_cmd)
 
 
-def _update_metadata_json(outputjson: Path, deliverable: Deliverable) -> None:
+def update_metadata_json(outputjson: Path, deliverable: Deliverable) -> None:
     """Update the generated metadata JSON with deliverable-specific details."""
     fmt = deliverable.format
     with edit_json(outputjson) as jsonconfig:
@@ -110,7 +110,7 @@ async def process_deliverable(
     deliverable: Deliverable,
     *,
     dapstmpl: str,
-) -> bool:
+) -> tuple[bool, Deliverable]:
     """Process a single deliverable asynchronously.
 
     This function creates a temporary clone of the deliverable's repository,
@@ -125,11 +125,11 @@ async def process_deliverable(
     """
     log.info("> Processing deliverable: %s", deliverable.full_id)
 
-    # Simplified initialization per reviewer feedback
+    # Simplified initialization
     env = context.envconfig
     repo_dir = env.paths.repo_dir
     tmp_repo_dir = env.paths.tmp_repo_dir
-    meta_cache_dir = Path(env.paths.meta_cache_dir)
+    meta_cache_dir = env.paths.meta_cache_dir
 
     bare_repo_path = repo_dir / deliverable.git.slug
     if not bare_repo_path.is_dir():
@@ -157,7 +157,7 @@ async def process_deliverable(
             # Use absolute path within worktree to avoid DAPS "Missing DC-file" error
             full_dcfile_path = Path(worktree_dir) / deliverable.subdir / deliverable.dcfile
 
-            cmd = _get_daps_command(
+            cmd = get_daps_command(
                 Path(worktree_dir),
                 full_dcfile_path,
                 outputjson,
@@ -165,8 +165,10 @@ async def process_deliverable(
             )
 
             daps_proc = await asyncio.create_subprocess_exec(
-                *cmd, stdin=asyncio.subprocess.DEVNULL,
-                stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
+                *cmd,
+                stdin=asyncio.subprocess.DEVNULL,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
             )
             _, stderr_data = await daps_proc.communicate()
 
@@ -174,7 +176,7 @@ async def process_deliverable(
                 log.error("DAPS Error: %s", stderr_data.decode())
                 raise RuntimeError(f"DAPS failed for {deliverable.full_id}")
 
-        _update_metadata_json(outputjson, deliverable)
+        update_metadata_json(outputjson, deliverable)
         log.debug("Updated metadata JSON for %s", deliverable.full_id)
         return True, deliverable
 
@@ -205,7 +207,8 @@ async def update_repositories(
 
     return res
 
-async def _run_tasks_fail_fast(tasks: list[asyncio.Task]) -> list[Deliverable]:
+
+async def run_tasks_fail_fast(tasks: list[asyncio.Task]) -> list[Deliverable]:
     """Execute tasks and stop immediately on the first failure."""
     failed: list[Deliverable] = []
     for task in asyncio.as_completed(tasks):
@@ -226,7 +229,7 @@ async def _run_tasks_fail_fast(tasks: list[asyncio.Task]) -> list[Deliverable]:
     return failed
 
 
-async def _run_tasks_collect_all(
+async def run_tasks_collect_all(
     tasks: list[asyncio.Task], deliverables: list[Deliverable]
 ) -> list[Deliverable]:
     """Execute all tasks and collect every failure encountered."""
@@ -248,8 +251,8 @@ async def _run_metadata_tasks(
 ) -> list[Deliverable]:
     """Execute metadata tasks using either fail-fast or collect-all strategy."""
     if exitfirst:
-        return await _run_tasks_fail_fast(tasks)
-    return await _run_tasks_collect_all(tasks, deliverables)
+        return await run_tasks_fail_fast(tasks)
+    return await run_tasks_collect_all(tasks, deliverables)
 
 async def process_doctype(
     root: etree._ElementTree,
@@ -307,7 +310,7 @@ def _apply_parity_fixes(descriptions: list, categories: list) -> None:
             trans.title = trans.title.replace("&", "&amp;")
 
 
-def _load_and_validate_documents(
+def load_and_validate_documents(
     files: list[Path],
     meta_cache_dir: Path,
     manifest: Manifest
@@ -348,19 +351,14 @@ def store_productdocset_json(
     :param doctypes: Sequence of Doctype objects
     :param stitchnode: The stitched XML tree
     """
-    # Aligning with reviewer suggestion to derive paths cleanly from context
     env = context.envconfig
     meta_cache_dir = Path(env.paths.meta_cache_dir)
 
     for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
         product = doctype.product.value
 
-        # Version Formatting (Safety Fix)
-        # Only remove .0 if the docset appears to be a version number
-        # and not a purely alphanumeric identifier (Fixing dangerous remsuffix)
+        # Use the docset directly as the version string
         version_str = str(docset)
-        if version_str.replace('.', '', 1).isdigit() and version_str.endswith(".0"):
-            version_str = version_str.removesuffix(".0")
 
         productxpath = f"./{doctype.product_xpath_segment()}"
         productnode = stitchnode.find(productxpath)
@@ -378,7 +376,7 @@ def store_productdocset_json(
             acronym=(
                 productnode.find("acronym").text
                 if productnode.find("acronym") is not None
-                else product.upper()
+                else product
             ),
             version=version_str,
             lifecycle=docsetnode.attrib.get("lifecycle") or "",
@@ -389,18 +387,20 @@ def store_productdocset_json(
             archives=[]
         )
 
-        # 3. Load and validate documents using helper (Specific exceptions handled inside)
-        _load_and_validate_documents(files, meta_cache_dir, manifest)
+        # 3. Load and validate documents using helper
+        load_and_validate_documents(files, meta_cache_dir, manifest)
 
         # 4. Save and export
         jsondir = meta_cache_dir / product
         jsondir.mkdir(parents=True, exist_ok=True)
         jsonfile = jsondir / f"{docset}.json"
 
-        # Exporting with aliases to maintain parity with legacy JSON keys
-        json_data = manifest.model_dump(by_alias=True, exclude_none=True)
+        # Exporting with aliases and INCLUDING defaults (dateModified, rank, isGate)
+        json_data = manifest.model_dump(by_alias=True)
+
         with jsonfile.open("w", encoding="utf-8") as jf:
-            json.dump(json_data, jf, indent=2)
+            # ensure_ascii=False ensures raw UTF-8 (e.g., "á" instead of "\u00e1")
+            json.dump(json_data, jf, indent=2, ensure_ascii=False)
 
         stdout.print(f" > Result: {jsonfile}")
         Category.reset_rank()
diff --git a/src/docbuild/models/manifest.py b/src/docbuild/models/manifest.py
index 3b9ae5db..24f8a7ec 100644
--- a/src/docbuild/models/manifest.py
+++ b/src/docbuild/models/manifest.py
@@ -220,8 +220,9 @@ class SingleDocument(BaseModel):
     def serialize_date(self: Self, value: date | None, info: SerializationInfo) -> str:
         """Serialize date to 'YYYY-MM-DD' or an empty string if None."""
         if value is None:
-            return ""
-        return value.isoformat()
+            return ""  # This ensures the key exists as "" in JSON
+        # If it's already a string (from DAPS output), return it, otherwise isoformat
+        return value.isoformat() if hasattr(value, "isoformat") else str(value)
 
 
 class Product(BaseModel):
@@ -273,24 +274,20 @@ class Document(BaseModel):
     tasks: list[str] = Field(default_factory=list)
     products: list[Product] = Field(default_factory=list)
     doctypes: list[str] = Field(default_factory=list, alias="docTypes")
-    isgated: bool = Field(default=False, alias="isGated", serialization_alias="isGated")
-    rank: int | None = Field(default=None)
+    isgated: bool = Field(default=False, alias="isGated", serialization_alias="isGate")
+    rank: int | str | None = Field(default=None)
 
     @field_validator("rank", mode="before")
     @classmethod
     def coerce_rank(cls: type[Self], value: str | int | None) -> int | None:
-        """Coerce rank from string to int, handling empty strings."""
-        if isinstance(value, str):
-            if not value.strip():
-                return None
-            return int(value)
-        return value
+        """Coerce rank to an integer, treating empty strings or None as None to match legacy parity."""
+        if value is None or (isinstance(value, str) and not value.strip()):
+            return None
+        return int(value)
 
     @field_serializer("rank")
-    def serialize_rank(
-        self: Self, value: int | str | None, info: SerializationInfo
-    ) -> str:
-        """Serialize rank to an empty string if None."""
+    def serialize_rank(self: Self, value: int | str | None, info: SerializationInfo) -> str:
+        """Serialize rank to an empty string if None to match legacy parity."""
         if value is None:
             return ""
         return str(value)

From 91789b8031fa944aa93e37351a4eb132bb8341a0 Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Thu, 19 Feb 2026 17:48:06 +0530
Subject: [PATCH 6/7] feat #183: resolve locale parity and finalize manifest
 audit tool, updated fragment file

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 changelog.d/183.feature.rst                  |   2 +-
 src/docbuild/cli/cmd_metadata/metaprocess.py |   2 +-
 tools/audit_parity.py                        | 127 +++++++++++++++++++
 3 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100755 tools/audit_parity.py

diff --git a/changelog.d/183.feature.rst b/changelog.d/183.feature.rst
index 98086c3d..84ce4cfb 100644
--- a/changelog.d/183.feature.rst
+++ b/changelog.d/183.feature.rst
@@ -1 +1 @@
-Implement manifest parity with legacy JSON portal configuration, including support for nested product metadata collection and containerized DAPS execution.
\ No newline at end of file
+Implement manifest parity with legacy JSON portal configuration, including support for nested product metadata collection, full locale (BCP 47) support, and a new manifest audit utility for structural verification.
\ No newline at end of file
diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index 024307a7..6e6628c4 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -103,7 +103,7 @@ def update_metadata_json(outputjson: Path, deliverable: Deliverable) -> None:
         if fmt.get("single-html"):
             doc["format"]["single-html"] = deliverable.singlehtml_path
         if not doc.get("lang"):
-            doc["lang"] = deliverable.language
+            doc["lang"] = deliverable.lang
 
 async def process_deliverable(
     context: DocBuildContext,
diff --git a/tools/audit_parity.py b/tools/audit_parity.py
new file mode 100755
index 00000000..83afc363
--- /dev/null
+++ b/tools/audit_parity.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env -S uv run --frozen python
+"""Smart Audit Tool for Document Manifest Parity.
+
+Compares a legacy (manual) JSON manifest against a generated JSON manifest
+by matching documents based on normalized Titles and strict Languages.
+"""
+
+import json
+from pathlib import Path
+import re
+import sys
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+console = Console()
+
+
+def normalize_text(text: str) -> str:
+    """Lowercase and strip HTML/extra whitespace for fuzzy title matching."""
+    if not text:
+        return ""
+    # Remove HTML tags
+    clean = re.sub(r"<[^>]+>", "", text)
+    # Collapse multiple whitespaces into one
+    return re.sub(r"\s+", " ", clean).strip().lower()
+
+
+def get_doc_map(data: dict) -> dict:
+    """Create a map of {(normalized_title, lang): doc_dict}.
+
+    Use the raw language code to ensure that discrepancies like 'en' vs 'en-us'
+    are caught and reported in the audit table.
+    """
+    doc_map = {}
+    for doc_group in data.get("documents", []):
+        for doc in doc_group.get("docs", []):
+            title = doc.get("title", "Untitled")
+            lang = doc.get("lang", "unknown")
+            # Unique key: Normalized Title + Strict Language Code
+            key = (normalize_text(title), lang)
+            doc_map[key] = doc
+    return doc_map
+
+
+def run_audit(manual_path: str, generated_path: str) -> None:
+    """Compare two manifest files and report discrepancies."""
+    p_manual = Path(manual_path)
+    p_generated = Path(generated_path)
+
+    try:
+        with open(manual_path, encoding="utf-8") as f:
+            manual_data = json.load(f)
+        with open(generated_path, encoding="utf-8") as f:
+            gen_data = json.load(f)
+    except Exception as e:
+        console.print(f"[bold red]Error loading files:[/bold red] {e}")
+        return
+
+    manual_docs = get_doc_map(manual_data)
+    gen_docs = get_doc_map(gen_data)
+
+    console.print(
+        Panel(
+            f"Legacy:    [bold magenta]{p_manual.name}[/bold magenta]\n"
+            f"Generated: [bold green]{p_generated.name}[/bold green]",
+            title="[bold cyan]Manifest Comparison Audit[/bold cyan]",
+            subtitle=f"Comparing {p_manual.parent.name} structure",
+        )
+    )
+
+    # Fields to verify for structural and content parity
+    fields_to_check = [
+        "lang",
+        "title",
+        "description",
+        "dateModified",
+        "rank",
+        "isGate",
+        "dcfile",
+        "rootid",
+    ]
+
+    table = Table(title="Field Discrepancies", show_header=True, header_style="bold blue")
+    table.add_column("Document Match", style="italic")
+    table.add_column("Field")
+    table.add_column("Legacy Value", style="red")
+    table.add_column("Generated Value", style="green")
+
+    diff_found = False
+
+    # Check for differences in matching documents
+    for key, m_doc in manual_docs.items():
+        if key in gen_docs:
+            g_doc = gen_docs[key]
+            for field in fields_to_check:
+                # Normalize values to strings for comparison
+                m_val = str(m_doc.get(field, "")).strip()
+                g_val = str(g_doc.get(field, "")).strip()
+
+                if m_val != g_val:
+                    table.add_row(m_doc.get("title"), field, m_val, g_val)
+                    diff_found = True
+        else:
+            # Document exists in Legacy but could not be matched in Generated
+            table.add_row(m_doc.get("title"), "FILE", "MISSING", "")
+            diff_found = True
+
+    # Check for extra documents in Generated that aren't in Legacy
+    for key, g_doc in gen_docs.items():
+        if key not in manual_docs:
+            table.add_row(g_doc.get("title"), "FILE", "", "NEW IN GENERATED")
+            diff_found = True
+
+    if not diff_found:
+        console.print("[bold green]✅ 100% Parity found![/bold green]")
+    else:
+        console.print(table)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        console.print("[yellow]Usage:[/yellow] ./tools/audit_parity.py <legacy.json> <generated.json>")
+        sys.exit(1)
+
+    run_audit(sys.argv[1], sys.argv[2])

From 3deced64beeb6f8c606b4b51cde2e64c5206ca8a Mon Sep 17 00:00:00 2001
From: sushant-suse <sushant.gaurav@suse.com>
Date: Thu, 19 Feb 2026 21:21:55 +0530
Subject: [PATCH 7/7] feat #183: resolve private function and added TODO

Signed-off-by: sushant-suse <sushant.gaurav@suse.com>
---
 src/docbuild/cli/cmd_metadata/metaprocess.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/docbuild/cli/cmd_metadata/metaprocess.py b/src/docbuild/cli/cmd_metadata/metaprocess.py
index 6e6628c4..7df68b87 100644
--- a/src/docbuild/cli/cmd_metadata/metaprocess.py
+++ b/src/docbuild/cli/cmd_metadata/metaprocess.py
@@ -292,8 +292,10 @@ async def process_doctype(
     # Complexity reduced by delegating task execution to the helper
     return await _run_metadata_tasks(tasks, deliverables, exitfirst)
 
-def _apply_parity_fixes(descriptions: list, categories: list) -> None:
+def apply_parity_fixes(descriptions: list, categories: list) -> None:
     """Apply wording and HTML parity fixes for legacy JSON consistency."""
+    # TODO: These strings are hard-coded for legacy parity but should be moved to
+    # Docserv config files to allow for proper translation and localization.
     legacy_tail = (
         "<p>The default view of this page is the ```Table of Contents``` sorting order. "
         "To search for a particular document, you can narrow down the results using the "
@@ -368,7 +370,7 @@ def store_productdocset_json(
         # 1. Capture and Clean Descriptions/Categories using helper
         descriptions = list(Description.from_xml_node(productnode))
         categories = list(Category.from_xml_node(productnode))
-        _apply_parity_fixes(descriptions, categories)
+        apply_parity_fixes(descriptions, categories)
 
         # 2. Initialize Manifest
         manifest = Manifest(