Skip to content

Commit 2d2078f

Browse files
authored
feat(cli): add 'check files' command with optimized git grouping (#163)
* feat(cli): add 'check files' command with optimized git grouping (#78) Signed-off-by: sushant-suse <[email protected]> * feat(check): implement doctype filtering and pinpoint missing files (#78) Signed-off-by: sushant-suse <[email protected]> * refactor(check): use Deliverable models and implement contextual error reporting (#78) Signed-off-by: sushant-suse <[email protected]> * refactor(check): added expanduser() (#78) Signed-off-by: sushant-suse <[email protected]> * feat(check): add success message and finalize path handling (#78) Signed-off-by: sushant-suse <[email protected]> --------- Signed-off-by: sushant-suse <[email protected]>
1 parent 5bc7685 commit 2d2078f

7 files changed

Lines changed: 313 additions & 0 deletions

File tree

changelog.d/78.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added ``check files`` subcommand to verify that all DC files defined in XML configurations exist in their respective remote repositories. The implementation includes an optimization to group checks by repository, significantly reducing Git network overhead.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import asyncio
2+
from typing import TYPE_CHECKING
3+
4+
import click
5+
6+
from docbuild.cli.context import DocBuildContext
7+
8+
# Import Doctype for the type hint
9+
from docbuild.models.doctype import Doctype
10+
11+
from ..callback import validate_doctypes
12+
from .process import process_check_files
13+
14+
15+
@click.group(name="check")
16+
def cmd_check() -> None:
17+
"""Check the environment or configuration for consistency."""
18+
pass
19+
20+
21+
@cmd_check.command(name="files")
22+
@click.argument("doctypes",
23+
nargs=-1,
24+
callback=validate_doctypes,
25+
)
26+
@click.pass_obj
27+
def check_files(ctx: DocBuildContext, doctypes: tuple[Doctype, ...]) -> None:
28+
"""Verify that DC files exist. Optional: specify 'product/version/lang'."""
29+
# Execute the logic via asyncio, passing the optional doctype filter
30+
# doctypes is a tuple of Doctype objects here
31+
missing: list[str] = asyncio.run(process_check_files(ctx, doctypes))
32+
33+
if missing:
34+
missing_str = "\n- ".join(str(f) for f in missing if f)
35+
raise click.ClickException(
36+
f"DC file verification failed. The following files are missing:\n- {missing_str}"
37+
)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""Logic for checking DC file availability in remote repositories."""
2+
3+
from collections.abc import Sequence
4+
import logging
5+
from pathlib import Path
6+
from typing import cast
7+
8+
from docbuild.cli.cmd_metadata.metaprocess import get_deliverable_from_doctype
9+
from docbuild.cli.context import DocBuildContext
10+
from docbuild.config.xml.stitch import create_stitchfile
11+
from docbuild.constants import DEFAULT_DELIVERABLES
12+
from docbuild.models.config.env import EnvConfig
13+
from docbuild.models.deliverable import Deliverable
14+
from docbuild.models.doctype import Doctype
15+
from docbuild.utils.git import ManagedGitRepo
16+
17+
log = logging.getLogger(__name__)
18+
19+
20+
async def _verify_repository_files(
21+
repo_url: str,
22+
branch: str,
23+
deliverables: list[Deliverable],
24+
repo_root: Path,
25+
) -> list[str]:
26+
"""Clone a repository and check for the existence of specific DC files."""
27+
missing = []
28+
# Use the first deliverable to get the repo's abbreviated name (surl)
29+
repo_surl = deliverables[0].git.surl
30+
log.info(f"Checking Repo: {repo_surl} [{branch}]")
31+
32+
repo_handler = ManagedGitRepo(repo_url, repo_root)
33+
34+
if not await repo_handler.clone_bare():
35+
log.error(f"Repository inaccessible: {repo_surl}")
36+
for d in deliverables:
37+
# Format: [repo] product/version/lang:file
38+
missing.append(f"[{repo_surl}] {d.productid}/{d.docsetid}/{d.lang}:{d.dcfile}")
39+
return missing
40+
41+
available_files = await repo_handler.ls_tree(branch)
42+
for d in deliverables:
43+
display_name = f"[{repo_surl}] {d.productid}/{d.docsetid}/{d.lang}:{d.dcfile}"
44+
if d.dcfile in available_files:
45+
log.info(f"Found: {display_name}")
46+
else:
47+
log.error(f"Missing: {display_name}")
48+
missing.append(display_name)
49+
return missing
50+
51+
52+
async def process_check_files(
53+
ctx: DocBuildContext,
54+
doctypes: Sequence[Doctype] | None
55+
) -> list[str]:
56+
"""Verify DC file existence using official Deliverable models."""
57+
log.info("Starting DC file availability check...")
58+
59+
env_config = cast(EnvConfig, ctx.envconfig)
60+
config_dir = env_config.paths.config_dir.expanduser()
61+
repo_root = env_config.paths.repo_dir.expanduser()
62+
63+
# 1. Prepare XML and Stitch Tree
64+
xml_files = list(config_dir.rglob("*.xml"))
65+
if not xml_files:
66+
log.warning(f"No XML files found in {config_dir}")
67+
return []
68+
69+
stitch_tree = await create_stitchfile(xml_files)
70+
71+
# 2. Identify target doctypes (use defaults if none provided)
72+
if not doctypes:
73+
doctypes = [Doctype.from_str(DEFAULT_DELIVERABLES)]
74+
75+
# 3. Use official logic to extract Deliverable objects
76+
all_deliverables: list[Deliverable] = []
77+
for dt in doctypes:
78+
all_deliverables.extend(get_deliverable_from_doctype(stitch_tree, dt))
79+
80+
if not all_deliverables:
81+
log.error("No deliverables found for the selected doctypes.")
82+
return []
83+
84+
# 4. Group by repository and branch to optimize network calls
85+
groups: dict[tuple[str, str], list[Deliverable]] = {}
86+
for d in all_deliverables:
87+
key = (d.git.url, d.branch or "main")
88+
groups.setdefault(key, []).append(d)
89+
90+
# 5. Verification Loop
91+
missing_files: list[str] = []
92+
for (url, branch), deli_list in groups.items():
93+
results = await _verify_repository_files(url, branch, deli_list, repo_root)
94+
missing_files.extend(results)
95+
96+
# Final success message if no files were missing
97+
if not missing_files:
98+
log.info("All DC files are available in remote repositories.")
99+
100+
return missing_files

src/docbuild/cli/cmd_cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from ..utils.pidlock import LockAcquisitionError, PidFileLock
2727
from .cmd_build import build
2828
from .cmd_c14n import c14n
29+
from .cmd_check import cmd_check
2930
from .cmd_config import config
3031
from .cmd_metadata import metadata
3132
from .cmd_repo import repo
@@ -228,3 +229,4 @@ def cli(
228229
cli.add_command(repo)
229230
cli.add_command(metadata)
230231
cli.add_command(validate)
232+
cli.add_command(cmd_check)

src/docbuild/utils/git.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,43 @@ async def fetch_updates(self: Self) -> bool:
199199
except RuntimeError as e:
200200
log.error("Failed to fetch updates for '%s': %s", self.slug, e)
201201
return False
202+
203+
async def ls_tree(self: Self, branch: str, recursive: bool = True) -> list[str]:
204+
"""List all files in a specific branch of the bare repository.
205+
206+
:param branch: The branch name to inspect.
207+
:param recursive: Whether to list files in subdirectories.
208+
:return: A list of file paths found in the branch.
209+
"""
210+
if not self.bare_repo_path.exists():
211+
log.warning(
212+
"Cannot run ls-tree: Bare repository does not exist at %s",
213+
self.bare_repo_path,
214+
)
215+
return []
216+
217+
args = ["ls-tree", "--name-only"]
218+
if recursive:
219+
args.append("-r")
220+
args.append(branch)
221+
222+
try:
223+
# We use execute_git_command which already handles the 'git' prefix
224+
# and uses the bare_repo_path as the current working directory.
225+
self.result = await execute_git_command(
226+
*args,
227+
cwd=self.bare_repo_path,
228+
gitconfig=self._gitconfig,
229+
)
230+
231+
if not self.result.stdout:
232+
return []
233+
234+
return self.result.stdout.strip().splitlines()
235+
236+
except RuntimeError as e:
237+
log.error(
238+
"Failed to run ls-tree on branch '%s' in '%s': %s",
239+
branch, self.slug, e
240+
)
241+
return []

tests/cli/cmd_check/__init__.py

Whitespace-only changes.
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
from unittest.mock import AsyncMock, MagicMock, patch
2+
3+
import pytest
4+
5+
from docbuild.cli.cmd_check import process
6+
from docbuild.cli.cmd_check.process import process_check_files
7+
8+
9+
@pytest.fixture
10+
def mock_deliverable():
11+
"""Create a mock Deliverable object with required attributes."""
12+
deli = MagicMock()
13+
deli.productid = "sles"
14+
deli.docsetid = "16.0"
15+
deli.lang = "en-us"
16+
deli.dcfile = "README.md"
17+
deli.branch = "main"
18+
deli.git.url = "https://github.com/org/repo.git"
19+
deli.git.surl = "gh://org/repo"
20+
return deli
21+
22+
@patch.object(process, "create_stitchfile", new_callable=AsyncMock)
23+
@patch.object(process, "get_deliverable_from_doctype")
24+
@patch.object(process, "ManagedGitRepo")
25+
async def test_process_check_files_all_found(
26+
mock_repo_class, mock_get_deli, mock_stitch, tmp_path, mock_deliverable
27+
):
28+
"""Test full process when all files exist in the repo."""
29+
# 1. Setup mocks
30+
mock_stitch.return_value = MagicMock()
31+
mock_get_deli.return_value = [mock_deliverable]
32+
33+
mock_repo = AsyncMock()
34+
mock_repo.clone_bare.return_value = True
35+
mock_repo.ls_tree.return_value = ["README.md"]
36+
mock_repo_class.return_value = mock_repo
37+
38+
# 2. Setup Context
39+
ctx = MagicMock()
40+
config_dir = tmp_path / "config.d"
41+
config_dir.mkdir()
42+
(config_dir / "test.xml").write_text("<xml/>")
43+
ctx.envconfig.paths.config_dir = config_dir
44+
ctx.envconfig.paths.repo_dir = tmp_path / "repos"
45+
46+
# 3. Execute
47+
result = await process_check_files(ctx, doctypes=None)
48+
49+
# 4. Assertions
50+
assert result == []
51+
mock_repo.clone_bare.assert_called_once()
52+
mock_repo.ls_tree.assert_called_with("main")
53+
54+
@patch.object(process, "create_stitchfile", new_callable=AsyncMock)
55+
@patch.object(process, "get_deliverable_from_doctype")
56+
@patch.object(process, "ManagedGitRepo")
57+
async def test_process_check_files_missing(
58+
mock_repo_class, mock_get_deli, mock_stitch, tmp_path, mock_deliverable
59+
):
60+
"""Test full process when a file is missing in the repo."""
61+
mock_stitch.return_value = MagicMock()
62+
mock_get_deli.return_value = [mock_deliverable]
63+
64+
mock_repo = AsyncMock()
65+
mock_repo.clone_bare.return_value = True
66+
mock_repo.ls_tree.return_value = ["LICENSE"] # README.md is missing
67+
mock_repo_class.return_value = mock_repo
68+
69+
ctx = MagicMock()
70+
config_dir = tmp_path / "config.d"
71+
config_dir.mkdir()
72+
(config_dir / "test.xml").write_text("<xml/>")
73+
ctx.envconfig.paths.config_dir = config_dir
74+
ctx.envconfig.paths.repo_dir = tmp_path / "repos"
75+
76+
result = await process_check_files(ctx, doctypes=None)
77+
78+
# Check for the new formatted string output
79+
expected_error = "[gh://org/repo] sles/16.0/en-us:README.md"
80+
assert expected_error in result
81+
82+
@patch.object(process, "create_stitchfile", new_callable=AsyncMock)
83+
@patch.object(process, "get_deliverable_from_doctype")
84+
@patch.object(process, "ManagedGitRepo")
85+
async def test_process_git_failure(
86+
mock_repo_class, mock_get_deli, mock_stitch, tmp_path, mock_deliverable
87+
):
88+
"""Test coverage for the branch where Git cloning/fetching fails."""
89+
mock_stitch.return_value = MagicMock()
90+
mock_get_deli.return_value = [mock_deliverable]
91+
92+
mock_repo = AsyncMock()
93+
mock_repo.clone_bare.return_value = False # Simulate failure
94+
mock_repo_class.return_value = mock_repo
95+
96+
ctx = MagicMock()
97+
config_dir = tmp_path / "config.d"
98+
config_dir.mkdir()
99+
(config_dir / "test.xml").write_text("<xml/>")
100+
ctx.envconfig.paths.config_dir = config_dir
101+
ctx.envconfig.paths.repo_dir = tmp_path / "repos"
102+
103+
result = await process_check_files(ctx, doctypes=None)
104+
105+
expected_error = "[gh://org/repo] sles/16.0/en-us:README.md"
106+
assert expected_error in result
107+
108+
@patch.object(process, "create_stitchfile", new_callable=AsyncMock)
109+
@patch.object(process, "get_deliverable_from_doctype")
110+
async def test_process_no_deliverables_found(mock_get_deli, mock_stitch, tmp_path):
111+
"""Test path where stitch tree returns no deliverables."""
112+
mock_stitch.return_value = MagicMock()
113+
mock_get_deli.return_value = [] # No deliverables
114+
115+
ctx = MagicMock()
116+
config_dir = tmp_path / "config.d"
117+
config_dir.mkdir()
118+
(config_dir / "test.xml").write_text("<xml/>")
119+
ctx.envconfig.paths.config_dir = config_dir
120+
121+
result = await process_check_files(ctx, doctypes=None)
122+
assert result == []
123+
124+
@patch.object(process, "create_stitchfile", new_callable=AsyncMock)
125+
async def test_process_no_xml_files(mock_stitch, tmp_path):
126+
"""Verify behavior when no XML files are present at all."""
127+
ctx = MagicMock()
128+
config_dir = tmp_path / "empty"
129+
config_dir.mkdir()
130+
ctx.envconfig.paths.config_dir = config_dir
131+
132+
result = await process_check_files(ctx, doctypes=None)
133+
assert result == []

0 commit comments

Comments
 (0)