diff --git a/onecode/cli/utils.py b/onecode/cli/utils.py index 69b2220..a6727f0 100644 --- a/onecode/cli/utils.py +++ b/onecode/cli/utils.py @@ -5,7 +5,6 @@ import json import os from collections import OrderedDict -from glob import iglob from typing import Dict, List, Optional import pydash @@ -17,6 +16,7 @@ from ..base.decorator import check_type from ..base.enums import Env from ..base.project import Project +from ..utils.module import get_call_graph_entry_files @check_type @@ -130,6 +130,40 @@ def run(): json.dump(flows, f, indent=4) +def _flow_module_name(name: str) -> str: + if name.startswith('flows.'): + return name[len('flows.'):] + if name.startswith('flows\\'): + return name[len('flows\\'):] + return name + + +def _resolve_graph_key(name: str, graph: Dict) -> str: + """ + Resolve a PyCG graph key across platform naming differences. + + On Windows, PyCG keeps path separators in module namespaces (``flows\\step1.run``) + while Linux uses dots (``flows.step1.run``). Relative imports inside ``flows/`` + are often keyed as ``utils.xx`` while the analyzed module lives under + ``flows\\utils.xx``. + """ + if name in graph: + return name + + module_name = _flow_module_name(name) + candidates = [ + f'flows.{module_name}', + name, + f'flows\\{module_name}', + ] + + for candidate in candidates: + if candidate in graph: + return candidate + + return name + + # check_type decorator not compatible with recursive calls def extract_calls( entry_point: str, @@ -162,9 +196,7 @@ def extract_calls( for ent in Project().registered_elements } - # PyCG is not exactly equivalent on Windows vs Linux wrt to graph keys - if os.name == 'nt' and not entry_point.startswith('flows\\'): - entry_point = f'flows\\{entry_point}' + entry_point = _resolve_graph_key(entry_point, graph) if entry_point in graph: for fn in graph[entry_point]: @@ -183,7 +215,8 @@ def extract_calls( if verbose: print(f" >> ({entry_point}) function {fn['normed']} ⏩") - extract_calls(fn['normed'], graph, calls) + next_point = _resolve_graph_key(fn['normed'], graph) + extract_calls(next_point, graph, calls) @check_type @@ -214,13 +247,7 @@ def process_call_graph( raise FileNotFoundError('Ensure you are at the root of your OneCode project') statements = OrderedDict() - entry_files = [ - filename for filename in iglob( - os.path.join(project_path, 'flows', '**', '*.py'), recursive=True - ) if filename != '__init__.py' and not filename.startswith( - os.path.join(project_path, 'flows', 'onecode_ext') - ) - ] + entry_files = get_call_graph_entry_files(project_path) cg = CallGraphGenerator( entry_files, @@ -238,10 +265,7 @@ def process_call_graph( print(f"Processing {label}...") calls = [] - if os.name == 'nt': - extract_calls(f"{file}.run", flow_graph, calls, verbose) - else: - extract_calls(f"flows.{file}.run", flow_graph, calls, verbose) + extract_calls(f"flows.{file}.run", flow_graph, calls, verbose) statements[label] = { "entry_point": file, diff --git a/onecode/utils/module.py b/onecode/utils/module.py index 4698681..089a131 100644 --- a/onecode/utils/module.py +++ b/onecode/utils/module.py @@ -2,10 +2,11 @@ # SPDX-License-Identifier: MIT import importlib +import ast +import json import os import sys from collections import OrderedDict -from glob import iglob from pathlib import Path from types import ModuleType from typing import Dict, List, Optional, Union @@ -16,6 +17,7 @@ from pycg.utils.constants import CALL_GRAPH_OP from ..base.decorator import check_type +from ..base.enums import Env @check_type @@ -56,6 +58,101 @@ def register_ext_module( return module +def _is_onecode_ext_path(path: str) -> bool: + parts = path.replace('\\', '/').split('/') + return 'onecode_ext' in parts + + +def _resolve_relative_import(flow_py: str, node: ast.ImportFrom) -> List[str]: + base_dir = os.path.dirname(flow_py) + module_parts = node.module.split('.') if node.module else [] + rel_path = os.path.normpath( + os.path.join(base_dir, *(['..'] * (node.level - 1)), *module_parts) + ) + + candidates = [] + if os.path.isfile(rel_path + '.py'): + candidates.append(rel_path + '.py') + init_py = os.path.join(rel_path, '__init__.py') + if os.path.isfile(init_py): + candidates.append(init_py) + + return [path for path in candidates if not _is_onecode_ext_path(path)] + + +def _collect_flow_helper_files(flow_files: List[str]) -> List[str]: + helpers = [] + seen = set(os.path.abspath(path) for path in flow_files) + + pending = list(flow_files) + while pending: + flow_py = pending.pop() + try: + with open(flow_py, 'r', encoding='utf-8') as f: + tree = ast.parse(f.read(), filename=flow_py) + except (OSError, SyntaxError, UnicodeDecodeError): + continue + + for node in ast.walk(tree): + if not isinstance(node, ast.ImportFrom) or node.level == 0: + continue + + for helper_py in _resolve_relative_import(flow_py, node): + helper_py = os.path.abspath(helper_py) + if helper_py in seen: + continue + seen.add(helper_py) + helpers.append(helper_py) + pending.append(helper_py) + + return helpers + + +@check_type +def get_call_graph_entry_files(project_path: str) -> List[str]: + """ + Return the Python entry files used for static call-graph analysis. + + Analysis starts from ``main.py`` and includes only the flow scripts registered in + ``.onecode.json`` — the same flows executed at runtime by ``main.py`` — plus any + helper modules they import locally under ``flows/``. This avoids scanning unrelated + project files (e.g. ``.venv``, unused scripts, or data helpers). + + Args: + project_path: Path to the root of the OneCode project. + + Returns: + Absolute paths to the entry Python files. + + Raises: + FileNotFoundError: if ``main.py`` or ``.onecode.json`` is missing. + + """ + project_path = os.path.abspath(project_path) + main_py = os.path.join(project_path, 'main.py') + if not os.path.isfile(main_py): + raise FileNotFoundError('main.py not found at project root') + + config_file = os.path.join(project_path, Env.ONECODE_CONFIG_FILE) + if not os.path.isfile(config_file): + raise FileNotFoundError('Ensure you are at the root of your OneCode project') + + entry_files = [main_py] + flow_files = [] + with open(config_file, 'r') as f: + flows = json.load(f) + + for flow in flows: + flow_py = os.path.join(project_path, 'flows', f"{flow['file']}.py") + if os.path.isfile(flow_py): + entry_files.append(flow_py) + flow_files.append(flow_py) + + entry_files.extend(_collect_flow_helper_files(flow_files)) + + return entry_files + + @check_type def get_imported_modules(scripts_folder: str) -> List[str]: """ @@ -68,7 +165,7 @@ def get_imported_modules(scripts_folder: str) -> List[str]: List of modules names imported by the Python scripts. """ - entry_files = list(iglob(os.path.join(scripts_folder, '**', '*.py'), recursive=True)) + entry_files = get_call_graph_entry_files(scripts_folder) cg = CallGraphGenerator( entry_files, scripts_folder, diff --git a/tests/unit/cli/test_utils.py b/tests/unit/cli/test_utils.py index d18b97b..cb52b5a 100644 --- a/tests/unit/cli/test_utils.py +++ b/tests/unit/cli/test_utils.py @@ -5,6 +5,52 @@ from onecode import Project, register_ext_module from onecode.cli import process_call_graph +from onecode.cli.utils import _resolve_graph_key, extract_calls + + +def test_resolve_graph_key_windows_style(): + graph = { + 'flows\\step1.run': [], + 'flows\\step2.run': [ + { + 'normed': 'utils.xx', + 'code': 'xx()', + } + ], + 'flows\\utils.xx': [ + { + 'normed': 'onecode.slider', + 'code': "onecode.slider('My slider\"1', 0.5, max=6)", + } + ], + } + + assert _resolve_graph_key('flows.step1.run', graph) == 'flows\\step1.run' + assert _resolve_graph_key('utils.xx', graph) == 'flows\\utils.xx' + + +def test_extract_calls_resolves_windows_helper_modules(): + graph = { + 'flows\\step2.run': [ + { + 'normed': 'utils.xx', + 'code': 'xx()', + } + ], + 'flows\\utils.xx': [ + { + 'normed': 'onecode.slider', + 'code': "onecode.slider('My slider\"1', 0.5, max=6)", + } + ], + } + + Project().reset() + calls = [] + extract_calls('flows.step2.run', graph, calls) + + assert len(calls) == 1 + assert calls[0]['func'] == 'onecode.slider' def test_invalid_call_graph(): diff --git a/tests/unit/utils/test_call_graph_entry.py b/tests/unit/utils/test_call_graph_entry.py new file mode 100644 index 0000000..f24c832 --- /dev/null +++ b/tests/unit/utils/test_call_graph_entry.py @@ -0,0 +1,41 @@ +import os + +import pytest +from datatest import working_directory + +from onecode import get_call_graph_entry_files + + +@working_directory(__file__) +def test_get_call_graph_entry_files_flow_1(): + project_path = os.path.join('..', '..', 'data', 'flow_1') + entry_files = get_call_graph_entry_files(project_path) + + assert entry_files[0].endswith('main.py') + assert set(os.path.basename(f) for f in entry_files[1:]) == { + 'step1.py', 'step2.py', 'step3.py', 'utils.py' + } + assert 'unused.py' not in {os.path.basename(f) for f in entry_files} + + +@working_directory(__file__) +def test_get_call_graph_entry_files_flow_modules(): + project_path = os.path.join('..', '..', 'data', 'flow_modules') + entry_files = get_call_graph_entry_files(project_path) + + assert entry_files == [ + os.path.abspath(os.path.join(project_path, 'main.py')), + os.path.abspath(os.path.join(project_path, 'flows', 'flow_modules.py')), + ] + + +def test_get_call_graph_entry_files_missing_main(tmp_path): + with pytest.raises(FileNotFoundError, match='main.py not found'): + get_call_graph_entry_files(str(tmp_path)) + + +def test_get_call_graph_entry_files_missing_config(tmp_path): + (tmp_path / 'main.py').write_text('print("hello")\n') + + with pytest.raises(FileNotFoundError, match='Ensure you are at the root'): + get_call_graph_entry_files(str(tmp_path))