Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions plexe/utils/dashboard/discovery.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""
Experiment discovery and metadata extraction for dashboard.

Scans workdir at correct depth (dataset_name/timestamp/) and loads checkpoint metadata.
Scans at flat, 1-level, or 2-level depth:
- workdir/checkpoints/ (flat — default standalone local runs)
- workdir/{dataset_name}/checkpoints/ (1-level)
- workdir/{dataset_name}/{timestamp}/checkpoints/ (2-level)
"""

import json
Expand Down Expand Up @@ -57,6 +60,19 @@ def discover_experiments(workdir: Path) -> list[ExperimentMetadata]:
logger.warning(f"Workdir does not exist: {workdir}")
return experiments

# Flat layout: checkpoints/ directly under workdir (standalone local runs)
if (workdir / "checkpoints").is_dir():
try:
experiments.append(
_extract_metadata(
dataset_name=workdir.name,
timestamp=workdir.name,
experiment_path=workdir,
)
)
except Exception as e:
logger.warning(f"Failed to extract metadata from {workdir}: {e}")

# Scan first level (dataset names)
for dataset_dir in workdir.iterdir():
if not dataset_dir.is_dir():
Expand Down Expand Up @@ -134,13 +150,16 @@ def _extract_metadata(dataset_name: str, timestamp: str, experiment_path: Path)
}

current_phase = latest_checkpoint.get("phase")
phase_number = phase_map.get(current_phase, 0)
phase_key = (
current_phase.split("_", 1)[1] if current_phase and current_phase.split("_", 1)[0].isdigit() else current_phase
)
phase_number = phase_map.get(phase_key, 0)

# Determine status
checkpoint_status = latest_checkpoint.get("status", "completed")
if checkpoint_status == "in_progress":
status = "running"
elif current_phase == "package_final_model" and checkpoint_status == "completed":
elif phase_key == "package_final_model" and checkpoint_status == "completed":
status = "completed"
else:
# Check if there's an error or if it looks abandoned
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "plexe"
version = "1.4.4"
version = "1.4.5"
description = "An agentic framework for building ML models from natural language"
authors = [
"Marcello De Bernardi <[email protected]>",
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/utils/dashboard/test_discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Unit tests for dashboard experiment discovery."""

import json
from pathlib import Path

from plexe.utils.dashboard.discovery import discover_experiments


def _write_checkpoint(checkpoints_dir: Path, filename: str, phase: str, intent: str = "test intent") -> None:
checkpoints_dir.mkdir(parents=True, exist_ok=True)
payload = {
"phase": phase,
"status": "completed",
"context": {
"intent": intent,
"experiment_id": "local",
"metric": {"name": "roc_auc"},
},
}
(checkpoints_dir / filename).write_text(json.dumps(payload), encoding="utf-8")


def test_discover_flat_workdir_layout(tmp_path: Path) -> None:
"""Standalone runs write checkpoints/ directly under --work-dir."""
_write_checkpoint(tmp_path / "checkpoints", "06_package_final_model.json", "06_package_final_model")

experiments = discover_experiments(tmp_path)

assert len(experiments) == 1
assert experiments[0].path == tmp_path
assert experiments[0].phase_number == 6
assert experiments[0].status == "completed"
assert experiments[0].intent == "test intent"


def test_discover_nested_one_level_layout(tmp_path: Path) -> None:
"""Nested dataset folders remain discoverable."""
exp_dir = tmp_path / "weatherAUS"
_write_checkpoint(exp_dir / "checkpoints", "03_build_baselines.json", "03_build_baselines")

experiments = discover_experiments(tmp_path)

assert len(experiments) == 1
assert experiments[0].path == exp_dir
assert experiments[0].phase_number == 3