Skip to content

Commit a3f5b80

Browse files
Fix #96: Centralize Git operations (#99)
The ManagedGitRepo manages a bare repository and temporary worktrees. It stores the repo URL among other attributes in a class dict variable. This class variable acts as a flag when the repository was updated or cloned. There are other changes in this commit: * Introduce execute_git_command for general git commands * Introduce ManagedGitRepo as a class that manages bare repos * Introduce shell.py module and collect all shell related functions there * If repo does exist, fetch updates * Add test cases * Add news file --------- Co-authored-by: Sushant Gaurav <[email protected]>
1 parent 6146278 commit a3f5b80

8 files changed

Lines changed: 568 additions & 264 deletions

File tree

changelog.d/99.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Centralize Git and shell operations for better code reuse across modules

src/docbuild/cli/cmd_repo/process.py

Lines changed: 7 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -6,71 +6,14 @@
66

77
from ...cli.context import DocBuildContext
88
from ...config.xml.stitch import create_stitchfile
9+
from ...constants import GITLOGGER_NAME
910
from ...models.repo import Repo
1011
from ...utils.contextmgr import make_timer
11-
from ...constants import GITLOGGER_NAME
12+
from ...utils.git import ManagedGitRepo
1213

1314
log = logging.getLogger(GITLOGGER_NAME)
1415

1516

16-
async def clone_repo(repo: Repo, base_dir: Path) -> bool:
17-
"""Clone a GitHub repository into the specified base directory."""
18-
repo_path = base_dir / repo.slug
19-
20-
# Ensure the parent directory exists
21-
repo_path.parent.mkdir(parents=True, exist_ok=True)
22-
23-
if repo_path.exists():
24-
log.info('Repository %r already exists at %s', repo.name, repo_path)
25-
return True
26-
27-
log.info("Cloning '%s' into '%s'...", repo, str(repo_path))
28-
29-
# Use create_subprocess_exec for security (avoids shell injection)
30-
# and pass arguments as a sequence.
31-
cmd_args = [
32-
'git',
33-
'-c',
34-
'color.ui=never',
35-
'clone',
36-
'--bare',
37-
'--progress',
38-
str(repo),
39-
str(repo_path),
40-
]
41-
42-
process = await asyncio.create_subprocess_exec(
43-
*cmd_args,
44-
stdout=asyncio.subprocess.PIPE,
45-
stderr=asyncio.subprocess.PIPE,
46-
env={
47-
'LANG': 'C',
48-
'LC_ALL': 'C',
49-
'GIT_TERMINAL_PROMPT': '0',
50-
'GIT_PROGRESS_FORCE': '1',
51-
},
52-
)
53-
54-
# Wait for the process to finish and capture output
55-
stdout, stderr = await process.communicate()
56-
57-
if process.returncode == 0:
58-
log.info("Cloned '%s' successfully", repo)
59-
return True
60-
else:
61-
log.error(
62-
"Failed to clone '%s' (exit code %i)",
63-
repo,
64-
process.returncode,
65-
)
66-
if stderr:
67-
# Log each line of stderr with a prefix for better readability
68-
error_output = stderr.decode().strip()
69-
for line in error_output.splitlines():
70-
log.error(' [git] %s', line)
71-
return False
72-
73-
7417
async def process(context: DocBuildContext, repos: tuple[str, ...]) -> int:
7518
"""Process the cloning of repositories.
7619
@@ -119,12 +62,15 @@ async def process(context: DocBuildContext, repos: tuple[str, ...]) -> int:
11962

12063
timer = make_timer('git-clone-repos')
12164
with timer() as t:
122-
tasks = [clone_repo(repo, repo_dir) for repo in unique_git_repos]
65+
tasks = [
66+
ManagedGitRepo(str(repo), repo_dir).clone_bare()
67+
for repo in unique_git_repos
68+
]
12369
results = await asyncio.gather(*tasks)
12470

12571
log.info('Elapsed time: %0.3f seconds', t.elapsed)
12672

12773
# Return 0 for success (all clones succeeded), 1 for failure.
12874
if all(results):
12975
return 0
130-
return 1
76+
return 1

src/docbuild/cli/cmd_validate/process.py

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from ...constants import XMLDATADIR
1616
from ...utils.decorators import RegistryDecorator
1717
from ...utils.paths import calc_max_len
18+
from ...utils.shell import run_command
1819
from ..context import DocBuildContext
1920

2021
# Cast to help with type checking
@@ -76,31 +77,6 @@ def display_results(
7677
console_err.print(f' {message}')
7778

7879

79-
async def run_command(
80-
*args: str, env: dict[str, str] | None = None
81-
) -> tuple[int, str, str]:
82-
"""Run an external command and capture its output.
83-
84-
:param args: The command and its arguments separated as tuple elements.
85-
:param env: A dictionary of environment variables for the new process.
86-
:return: A tuple of (returncode, stdout, stderr).
87-
:raises FileNotFoundError: if the command is not found.
88-
"""
89-
process = await asyncio.create_subprocess_exec(
90-
*args,
91-
stdout=asyncio.subprocess.PIPE,
92-
stderr=asyncio.subprocess.PIPE,
93-
env=env,
94-
)
95-
stdout, stderr = await process.communicate()
96-
97-
# After .communicate() returns, the process has terminated and the
98-
# returncode is guaranteed to be set to an integer.
99-
assert process.returncode is not None
100-
101-
return process.returncode, stdout.decode(), stderr.decode()
102-
103-
10480
async def validate_rng(
10581
xmlfile: Path,
10682
rng_schema_path: Path = PRODUCT_CONFIG_SCHEMA,
@@ -121,7 +97,7 @@ async def validate_rng(
12197
:return: A tuple containing a boolean success status and any output message.
12298
"""
12399
jing_cmd = ['jing']
124-
if idcheck:
100+
if idcheck:
125101
jing_cmd.append('-i')
126102
if rng_schema_path.suffix == '.rnc':
127103
jing_cmd.append('-c')
@@ -261,7 +237,7 @@ async def process_file(
261237
rng_success, rng_output = await asyncio.to_thread(
262238
validate_rng_lxml,
263239
path_obj,
264-
XMLDATADIR / 'product-config-schema.rng',
240+
XMLDATADIR / 'product-config-schema.rng',
265241
)
266242
elif validation_method == 'jing':
267243
# Use existing jing-based validator (.rnc or .rng)

src/docbuild/models/repo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Repo:
5252
"""The abbreviated name of the repository (e.g., 'org/repo')."""
5353

5454
def __init__(self, value: str) -> None:
55-
"""Initialize a repository.
55+
"""Initialize a repository model from a URL or a short name.
5656
5757
This initializer understands:
5858

src/docbuild/utils/git.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
"""Git helper function."""
2+
3+
import logging
4+
from pathlib import Path
5+
from typing import ClassVar, Self
6+
7+
from ..constants import GITLOGGER_NAME
8+
from ..models.repo import Repo
9+
from ..utils.shell import execute_git_command
10+
11+
log = logging.getLogger(GITLOGGER_NAME)
12+
13+
14+
class ManagedGitRepo:
15+
"""Manages a bare repository and its temporary worktrees."""
16+
17+
#: Class variable to indicate the update state of a repo
18+
_is_updated: ClassVar[dict[Repo, bool]] = {}
19+
20+
def __init__(self: Self, remote_url: str, permanent_root: Path) -> None:
21+
"""Initialize the managed repository.
22+
23+
:param remote_url: The remote URL of the repository.
24+
:param permanent_root: The root directory for storing permanent bare clones.
25+
"""
26+
self._repo_model = Repo(remote_url)
27+
self._permanent_root = permanent_root
28+
# The Repo model handles the "sluggification" of the URL
29+
self.bare_repo_path = self._permanent_root / self._repo_model.slug
30+
# Initialize attribute for output:
31+
self.stdout = self.stderr = None
32+
# Add repo into class variable
33+
type(self)._is_updated.setdefault(self._repo_model, False)
34+
35+
def __repr__(self: Self) -> str:
36+
"""Return a string representation of the ManagedGitRepo."""
37+
return (
38+
f'{self.__class__.__name__}(remote_url={self.remote_url!r}, '
39+
f"bare_repo_path='{self.bare_repo_path!s}')"
40+
)
41+
42+
@property
43+
def slug(self: Self) -> str:
44+
"""Return the slug of the repository."""
45+
return self._repo_model.slug
46+
47+
@property
48+
def remote_url(self: Self) -> str:
49+
"""Return the remote URL of the repository."""
50+
return self._repo_model.url
51+
52+
@property
53+
def permanent_root(self: Self) -> Path:
54+
"""Return the permanent root directory for the repository."""
55+
return self._permanent_root
56+
57+
async def _initial_clone(self: Self) -> bool:
58+
"""Execute the initial 'git clone --bare' command.
59+
60+
This is a helper for `clone_bare` and assumes the destination
61+
directory does not exist.
62+
63+
:returns: True if the clone was successful, False on error.
64+
"""
65+
url = self._repo_model.url
66+
try:
67+
self.stdout, self.stderr = await execute_git_command(
68+
'clone',
69+
'--bare',
70+
'--progress',
71+
str(url),
72+
str(self.bare_repo_path),
73+
cwd=self._permanent_root,
74+
)
75+
log.info("Cloned '%s' successfully", url)
76+
return True
77+
78+
except RuntimeError as e:
79+
log.error("Failed to clone '%s': %s", url, e)
80+
return False
81+
82+
async def clone_bare(self: Self) -> bool:
83+
"""Clone the remote repository as a bare repository.
84+
85+
If the repository already exists, it logs a message and returns.
86+
87+
:returns: True if successful, False otherwise.
88+
"""
89+
url = self._repo_model.url
90+
cls = type(self)
91+
92+
if cls._is_updated.get(self._repo_model, False):
93+
log.info('Repository %r already processed this run.', self._repo_model.name)
94+
return True
95+
96+
result = False
97+
if self.bare_repo_path.exists():
98+
log.info(
99+
'Repository already exists, fetching updates for %r',
100+
self._repo_model.name,
101+
)
102+
result = await self.fetch_updates()
103+
else:
104+
log.info("Cloning '%s' into '%s'...", url, self.bare_repo_path)
105+
result = await self._initial_clone()
106+
107+
if result:
108+
cls._is_updated[self._repo_model] = True
109+
110+
return result
111+
112+
async def create_worktree(
113+
self: Self,
114+
target_dir: Path,
115+
branch: str,
116+
*,
117+
is_local: bool = True,
118+
options: list[str] | None = None,
119+
) -> None:
120+
"""Create a temporary worktree from the bare repository."""
121+
if not self.bare_repo_path.exists():
122+
raise FileNotFoundError(
123+
'Cannot create worktree. Bare repository does not exist at: '
124+
f'{self.bare_repo_path}'
125+
)
126+
127+
clone_args = ['clone']
128+
if is_local:
129+
clone_args.append('--local')
130+
clone_args.extend(['--branch', branch])
131+
if options:
132+
clone_args.extend(options)
133+
clone_args.extend([str(self.bare_repo_path), str(target_dir)])
134+
135+
self.stdout, self.stderr = await execute_git_command(
136+
*clone_args, cwd=target_dir.parent
137+
)
138+
139+
async def fetch_updates(self: Self) -> bool:
140+
"""Fetch updates from the remote to the bare repository.
141+
142+
:return: True if successful, False otherwise.
143+
"""
144+
if not self.bare_repo_path.exists():
145+
log.warning(
146+
'Cannot fetch updates: Bare repository does not exist at %s',
147+
self.bare_repo_path,
148+
)
149+
return False
150+
151+
log.info("Fetching updates for '%s'", self.slug)
152+
try:
153+
self.stdout, self.stderr = await execute_git_command(
154+
'fetch', '--all', cwd=self.bare_repo_path
155+
)
156+
log.info("Successfully fetched updates for '%s'", self.slug)
157+
return True
158+
159+
except RuntimeError as e:
160+
log.error("Failed to fetch updates for '%s': %s", self.slug, e)
161+
return False

0 commit comments

Comments
 (0)