diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 3cc55ba..0000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version, and other tools you might need -build: - os: "ubuntu-22.04" - tools: - python: "3.12" - commands: - # Install uv and dependencies - - pip install uv - - uv sync - # Fetch data from MLflow (uses DATABRICKS_TOKEN secret) - #- uv run python main.py --fetch - # Build Sphinx documentation - - uv run sphinx-build -b html docs/source $READTHEDOCS_OUTPUT/html diff --git a/Experiments/01-Validation/README.rst b/Experiments/01-Validation/README.rst deleted file mode 100644 index a94d1b0..0000000 --- a/Experiments/01-Validation/README.rst +++ /dev/null @@ -1,12 +0,0 @@ -01 - Validation of the solvers -====================================== - -Description ------------ -Here we validate the different solvers. -Configuration -------------- - -.. literalinclude:: ../conf/experiment/validation.yaml - :language: yaml - :caption: experiment/conf/experiment/validation.yaml diff --git a/Experiments/01-Validation/plot_validation.py b/Experiments/01-Validation/plot_validation.py deleted file mode 100644 index e69de29..0000000 diff --git a/Experiments/GALLERY_HEADER.rst b/Experiments/GALLERY_HEADER.rst deleted file mode 100644 index d2cefa1..0000000 --- a/Experiments/GALLERY_HEADER.rst +++ /dev/null @@ -1,4 +0,0 @@ -Example Scripts -=============== - -Example scripts. diff --git a/README.md b/README.md index 0f132d8..17d3577 100644 --- a/README.md +++ b/README.md @@ -12,75 +12,53 @@ Comparing Finite Volume and Spectral methods for the incompressible Navier-Stoke uv sync ``` -## Running Solvers +## Usage -The project uses [Hydra](https://hydra.cc/) for configuration management. Run solvers via `run_solver.py`: +```bash +# Run solver + generate plots (default) +uv run python main.py -m +experiment/validation/ghia=fv -### Using Experiment Configs +# Regenerate plots only (no solving) +uv run python main.py -m +experiment/validation/ghia=fv plot_only=true -Pre-defined experiment configurations are in `conf/experiment/`: +# Single run (testing) +uv run python main.py solver=fv N=32 Re=100 -```bash -uv run python run_solver.py -m +experiment=fv_validation +# Custom sweeps +uv run python main.py -m solver=fv N=16,32,64 Re=100,400 ``` -for only plots: pass plot_only=true - -Overwriting at runtime: -uv run python run_solver.py -m +experiment=fv_validation N=16,32,64 Re=100 +## Configuration - -### Configuration Structure +The project uses [Hydra](https://hydra.cc/) for configuration. Structure: ``` conf/ -├── config.yaml # Main config (N, Re, tolerance, etc.) +├── config.yaml # Main config (N, Re, tolerance) ├── solver/ -│ ├── fv.yaml # FV-specific (alpha_uv, alpha_p, scheme) -│ └── spectral.yaml # Spectral-specific (CFL, beta_squared) +│ ├── fv.yaml # Finite Volume settings +│ └── spectral/ # Spectral solver variants ├── experiment/ -│ ├── quick_test.yaml # Fast debugging runs -│ ├── fv_validation.yaml # FV benchmark settings -│ └── spectral_validation.yaml -├── mlflow/ -│ ├── local.yaml # File-based tracking (default) -│ └── coolify.yaml # Remote server (Coolify) -└── hydra/ - └── launcher/ - └── joblib.yaml # Parallel launcher (all cores) +│ └── validation/ghia/ # Ghia benchmark experiments +└── mlflow/ + ├── local.yaml # File-based tracking (default) + └── coolify.yaml # Remote server ``` ## MLflow -Results are tracked with [MLflow](https://mlflow.org/). Two tracking modes are available: - -### Local Files (Default) - -File-based tracking in `./mlruns` - no setup required: +Results are tracked with [MLflow](https://mlflow.org/): ```bash -uv run python run_solver.py solver=fv mlflow=local +# Local UI +uv run mlflow ui -# View UI -uv run main.py --mlflow-ui +# Remote server +# https://kni.dk/mlflow-ana-p3/ ``` -### Remote Server (Coolify) - -[mlflow-server](https://kni.dk/mlflow-ana-p3/#/experiments) -```bash -# Setup credentials (one-time) -cp .env.template .env -# Edit .env with your credentials - -# Run solver -uv run python run_solver.py solver=fv mlflow=coolify -``` - - ## References -- [High-Re solutions for incompressible flow (Ghia et al.)](https://www.sciencedirect.com/science/article/pii/0021999182900584) - Benchmark data -- [Chebyshev pseudospectral multigrid method](https://www.sciencedirect.com/science/article/pii/S0045793009001121) - Spectral method -- [The 2D lid-driven cavity problem revisited](https://www.researchgate.net/publication/222433759_The_2D_lid-driven_cavity_problem_revisited) - Conserved quantities -- [P_N-P_{N-2} spectral method](https://www.sciencedirect.com/science/article/pii/S0743731518305549) - Pressure formulation +- [High-Re solutions for incompressible flow (Ghia et al.)](https://www.sciencedirect.com/science/article/pii/0021999182900584) +- [Chebyshev pseudospectral multigrid method](https://www.sciencedirect.com/science/article/pii/S0045793009001121) +- [The 2D lid-driven cavity problem revisited](https://www.researchgate.net/publication/222433759_The_2D_lid-driven_cavity_problem_revisited) diff --git a/conf/config.yaml b/conf/config.yaml index 8cebd61..9f0ebf3 100644 --- a/conf/config.yaml +++ b/conf/config.yaml @@ -1,27 +1,9 @@ # Main Hydra configuration for LDC solvers # -# STANDARD USAGE - Always use -m (multirun mode): -# -# Validation: -# uv run python run_solver.py -m +experiment=validation/ghia -# -# Benchmarking: -# uv run python run_solver.py -m +experiment=benchmarking/multigrid_comparison -# -# Quick testing: -# uv run python run_solver.py -m +experiment=testing/quick_test -# -# Custom sweeps: -# uv run python run_solver.py -m solver=fv N=16,32,64 Re=100,400 -# -# Override machine for HPC: -# uv run python run_solver.py -m machine=hpc +experiment=validation/ghia -# -# Regenerate plots (separate tool): -# uv run python plot_runs.py +experiment=validation/ghia -# -# Single runs (testing only, no -m flag needed): -# uv run python run_solver.py solver=fv N=32 Re=100 +# Usage: +# uv run python main.py -m +experiment/validation/ghia=fv # solve + plot +# uv run python main.py -m +experiment/validation/ghia=fv plot_only=true # plot only +# uv run python main.py solver=fv N=32 Re=100 # single run defaults: - problem: ldc @@ -30,22 +12,24 @@ defaults: - machine: local - _self_ -# Grid size (must be set by experiment or command line) +# Grid size N: 32 -# Solver control (taken from solver config) -tolerance: ${solver.tolerance} -max_iterations: ${solver.max_iterations} +# Solver control +tolerance: 1.0e-6 +max_iterations: 100000 -# Default experiment name (typically overridden by experiment config) +# Experiment naming experiment_name: LDC-Dev sweep_name: dev-run +# Plot-only mode (skip solving, regenerate plots from existing runs) +plot_only: false + # ============================================================================= # Hydra output paths and callbacks # ============================================================================= hydra: - # Always use multirun mode by default mode: MULTIRUN run: dir: hydra_outputs/runs/${now:%d-%m-%y}/${now:%H:%M:%S} diff --git a/conf/experiment/validation/corner_treatment.yaml b/conf/experiment/validation/corner_treatment.yaml deleted file mode 100644 index 57b0e34..0000000 --- a/conf/experiment/validation/corner_treatment.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# @package _global_ -# Corner Treatment Comparison: Smoothing vs Subtraction Method -# Compares the two approaches for handling corner singularities in spectral LDC solver -# -# Run with: -# uv run python run_solver.py -m +experiment=validation/corner_treatment -# -# References: -# - Subtraction method: Zhang & Xi (2010), Botella & Peyret (1998) -# - Smoothing: Simple cosine smoothing near corners - -defaults: - - override /solver: spectral/sg - -# MLflow experiment -experiment_name: LDC-CornerTreatment -sweep_name: corner-comparison-Re${Re} - -hydra: - sweeper: - params: - # Compare both corner treatment methods - solver.corner_treatment: smoothing,subtraction - # Test across multiple grid sizes to see convergence behavior - N: 16,32,64 - # Reynolds number (can add more: 100,400,1000) - Re: 100 diff --git a/conf/experiment/validation/ghia/fv.yaml b/conf/experiment/validation/ghia/fv.yaml index 1c5d93f..92dd63d 100644 --- a/conf/experiment/validation/ghia/fv.yaml +++ b/conf/experiment/validation/ghia/fv.yaml @@ -13,5 +13,5 @@ hydra: sweeper: params: # Sweep over FV grid sizes - N: 32,64 + N: 32 Re: 100 diff --git a/conf/experiment/validation/ghia/spectral.yaml b/conf/experiment/validation/ghia/spectral.yaml index 8209408..6b5d2f9 100644 --- a/conf/experiment/validation/ghia/spectral.yaml +++ b/conf/experiment/validation/ghia/spectral.yaml @@ -15,6 +15,6 @@ hydra: sweeper: params: # Sweep over spectral solver types - solver: spectral/sg,spectral/fsg + solver: spectral/fsg #,spectral/fsg Re: 100 - N: 15 + N: 27 diff --git a/conf/experiment/validation/hpc-fv.yaml b/conf/experiment/validation/hpc-fv.yaml new file mode 100644 index 0000000..a918097 --- /dev/null +++ b/conf/experiment/validation/hpc-fv.yaml @@ -0,0 +1,17 @@ +# @package _global_ +# Ghia validation - Finite Volume solver + +defaults: + - override /solver: fv + +# MLflow experiment +experiment_name: LDC-Validation +sweep_name: Hpc-baseline-fv + +hydra: + sweeper: + params: + N: 128 + Re: 100, 400, 1000 + # Relaxation parameter sweep + diff --git a/conf/solver/fv.yaml b/conf/solver/fv.yaml index 88eb623..dbdc302 100644 --- a/conf/solver/fv.yaml +++ b/conf/solver/fv.yaml @@ -2,13 +2,21 @@ _target_: solvers.fv.solver.FVSolver name: fv +# Problem parameters (interpolated from root config) +Re: ${Re} +lid_velocity: ${lid_velocity} +Lx: ${Lx} +Ly: ${Ly} +nx: ${N} +ny: ${N} + # Solver control -tolerance: 1.0e-6 -max_iterations: 10000 # FV typically needs more iterations than spectral +tolerance: ${tolerance} +max_iterations: ${max_iterations} # FV-specific parameters -convection_scheme: Upwind +convection_scheme: TVD limiter: MUSCL -alpha_uv: 0.6 # velocity under-relaxation -alpha_p: 0.4 # pressure under-relaxation -linear_solver_tol: 1.0e-6 # SciPy linear solver tolerance +alpha_uv: 0.4 # velocity under-relaxation +alpha_p: 0.2 # pressure under-relaxation +linear_solver_tol: 1.0e-9 diff --git a/conf/solver/spectral/sg.yaml b/conf/solver/spectral/sg.yaml index c2036db..3890f95 100644 --- a/conf/solver/spectral/sg.yaml +++ b/conf/solver/spectral/sg.yaml @@ -3,9 +3,17 @@ _target_: solvers.spectral.sg.SGSolver name: spectral +# Problem parameters (interpolated from root config) +Re: ${Re} +lid_velocity: ${lid_velocity} +Lx: ${Lx} +Ly: ${Ly} +nx: ${N} +ny: ${N} + # Solver control -tolerance: 1.0e-6 -max_iterations: 30000 # Spectral methods converge slowly for high Re +tolerance: ${tolerance} +max_iterations: ${max_iterations} # Spectral-specific parameters basis_type: chebyshev # "chebyshev" or "legendre" diff --git a/docs/reports/TexReport b/docs/reports/TexReport index 7b5c1fe..5a8cb05 160000 --- a/docs/reports/TexReport +++ b/docs/reports/TexReport @@ -1 +1 @@ -Subproject commit 7b5c1feee054fbc5e233ce1880e137cddefcb23e +Subproject commit 5a8cb0597bb168e8efb8778c165d3bcf3a9374b0 diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst index b829f26..eb843ef 100644 --- a/docs/source/_templates/autosummary/class.rst +++ b/docs/source/_templates/autosummary/class.rst @@ -31,7 +31,3 @@ {% endif %} {% endblock %} - {% block examples %} - .. include:: ../gen_modules/backreferences/{{ fullname }}.examples - :start-line: 1 - {% endblock %} diff --git a/docs/source/api/base_solver.rst b/docs/source/api/base_solver.rst deleted file mode 100644 index e897e66..0000000 --- a/docs/source/api/base_solver.rst +++ /dev/null @@ -1,12 +0,0 @@ -Base Solver (``solvers.base``) -============================== - -Abstract base class for lid-driven cavity solvers. - -.. currentmodule:: solvers.base - -.. autosummary:: - :toctree: ../generated - :nosignatures: - - LidDrivenCavitySolver diff --git a/docs/source/api/datastructures.rst b/docs/source/api/datastructures.rst deleted file mode 100644 index 9d2478d..0000000 --- a/docs/source/api/datastructures.rst +++ /dev/null @@ -1,19 +0,0 @@ -Data Structures (``solvers.datastructures``) -============================================ - -Configuration and result data structures for solvers. - -.. currentmodule:: solvers.datastructures - -.. autosummary:: - :toctree: ../generated - :nosignatures: - - Parameters - FVParameters - SpectralParameters - Metrics - Fields - TimeSeries - FVSolverFields - SpectralSolverFields diff --git a/docs/source/api/fv_solver.rst b/docs/source/api/fv_solver.rst deleted file mode 100644 index fe4954a..0000000 --- a/docs/source/api/fv_solver.rst +++ /dev/null @@ -1,12 +0,0 @@ -Finite Volume Solver (``solvers.fv.solver``) -============================================ - -Finite volume solver using the SIMPLE algorithm with PETSc. - -.. currentmodule:: solvers.fv.solver - -.. autosummary:: - :toctree: ../generated - :nosignatures: - - FVSolver diff --git a/docs/source/api/solvers.rst b/docs/source/api/solvers.rst deleted file mode 100644 index f799138..0000000 --- a/docs/source/api/solvers.rst +++ /dev/null @@ -1,14 +0,0 @@ -Solvers (``solvers``) -===================== - -Solvers for lid-driven cavity flow simulations. - -.. currentmodule:: solvers - -.. autosummary:: - :toctree: ../generated - :nosignatures: - - LidDrivenCavitySolver - FVSolver - SpectralSolver diff --git a/docs/source/api/spectral_solver.rst b/docs/source/api/spectral_solver.rst deleted file mode 100644 index e3b003b..0000000 --- a/docs/source/api/spectral_solver.rst +++ /dev/null @@ -1,12 +0,0 @@ -Spectral Solver (``solvers.spectral.solver``) -============================================= - -Spectral solver using Chebyshev collocation with artificial compressibility. - -.. currentmodule:: solvers.spectral.solver - -.. autosummary:: - :toctree: ../generated - :nosignatures: - - SpectralSolver diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst index d3a04f1..609cb90 100644 --- a/docs/source/api_reference.rst +++ b/docs/source/api_reference.rst @@ -4,9 +4,36 @@ API Reference ============= -This page provides an overview of the solver modules. +Solvers +------- -.. toctree:: - :maxdepth: 2 +.. currentmodule:: solvers - api/solvers +.. autosummary:: + :toctree: generated + :template: autosummary/class.rst + :nosignatures: + + LidDrivenCavitySolver + FVSolver + SGSolver + FSGSolver + VMGSolver + FMGSolver + +Data Structures +--------------- + +.. autosummary:: + :toctree: generated + :template: autosummary/class.rst + :nosignatures: + + Parameters + Metrics + Fields + TimeSeries + FVParameters + FVSolverFields + SpectralParameters + SpectralSolverFields diff --git a/docs/source/conf.py b/docs/source/conf.py index 778c333..3774690 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,6 @@ "sphinx.ext.viewcode", "numpydoc", "sphinx_copybutton", - "sphinx_gallery.gen_gallery", ] root_doc = "index" @@ -59,36 +58,6 @@ templates_path = ["_templates"] numpydoc_use_plots = False # Don't auto-generate plots from Examples -# -- Sphinx Gallery configuration -------------------------------------------- - -sphinx_gallery_conf = { - "examples_dirs": "../../Experiments", # Path to example scripts - "gallery_dirs": "example_gallery", # Output directory for gallery - "filename_pattern": "/plot_", # Pattern to match which scripts to execute - "download_all_examples": False, # No download buttons - "remove_config_comments": True, # Clean up notebook outputs - "abort_on_example_error": False, # Continue if examples fail - "plot_gallery": True, # Enable plot gallery generation - "capture_repr": ("_repr_html_", "__repr__"), # Capture output representations - "matplotlib_animations": True, # Support matplotlib animations - # Remove Jupyter cell markers (# %%) from rendered output - "first_notebook_cell": None, # Don't add a first cell - "last_notebook_cell": None, # Don't add a last cell - "notebook_images": False, # Don't embed images in notebooks - # Cross-referencing: Create "Examples using X" in API docs - "backreferences_dir": "gen_modules/backreferences", - "doc_module": ( - "solvers", - "utils", - ), # Generate backreferences for our packages - "inspect_global_variables": True, # Detect classes/functions used in examples - # Make code clickable: Link to API docs when code mentions package functions - "reference_url": { - "solvers": None, # None = use local docs (not external URL) - "utils": None, - }, -} - # -- Intersphinx configuration ----------------------------------------------- intersphinx_mapping = { diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 22366dc..b36bbf4 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -1,107 +1,127 @@ -Experiment Configuration -======================== +Configuration +============= This guide explains the Hydra configuration system used for experiment management. -Configuration Hierarchy +Configuration Structure ----------------------- -The configuration system uses a hierarchical structure where settings can be -defined at multiple levels and overridden as needed: - .. code-block:: text conf/ - ├── config.yaml # Base configuration (defaults) + ├── config.yaml # Main config (defaults, grid size, tolerance) + ├── problem/ + │ └── ldc.yaml # Physics (Re, lid velocity, domain size) ├── solver/ - │ ├── fv.yaml # Finite Volume solver settings - │ └── spectral.yaml # Spectral solver settings + │ ├── fv.yaml # Finite Volume solver + │ └── spectral/ # Spectral solver variants + │ ├── sg.yaml # Single Grid + │ ├── fsg.yaml # Full Single Grid MG + │ ├── vmg.yaml # V-cycle MultiGrid + │ └── fmg.yaml # Full MultiGrid ├── experiment/ - │ ├── quick_test.yaml # Fast debugging - │ ├── sweep_test.yaml # Sweep testing - │ ├── fv_validation.yaml # FV benchmark - │ └── spectral_validation.yaml - ├── mlflow/ - │ ├── local.yaml # Local file tracking - │ └── coolify.yaml # Remote server - └── hydra/ - └── launcher/ - └── joblib.yaml # Parallel execution + │ ├── validation/ghia/ # Ghia benchmark validation + │ │ ├── fv.yaml + │ │ └── spectral.yaml + │ └── benchmarking/ + │ └── timings.yaml + ├── machine/ + │ ├── local.yaml # Local machine settings + │ └── hpc.yaml # DTU HPC cluster + └── mlflow/ + ├── local.yaml # File-based tracking (default) + └── coolify.yaml # Remote server Base Configuration ------------------ -The main ``config.yaml`` defines default values for all parameters: +The main ``config.yaml`` defines defaults and grid parameters: .. code-block:: yaml # conf/config.yaml defaults: + - problem: ldc - solver: fv - mlflow: local + - machine: local - _self_ - # Grid and physics - N: 32 # Grid size (cells for FV, polynomial order for spectral) - Re: 100 # Reynolds number - lid_velocity: 1.0 # Lid velocity - Lx: 1.0 # Domain width - Ly: 1.0 # Domain height - - # Solver control + N: 32 # Grid size tolerance: 1.0e-6 # Convergence tolerance - max_iterations: 500 # Maximum iterations + max_iterations: 10000 # Maximum iterations + + experiment_name: LDC-Dev + sweep_name: dev-run + plot_only: false # Set true to regenerate plots without solving + +Problem Configuration +--------------------- - # Experiment tracking - experiment_name: LDC-Solver - sweep_name: sweep # Parent run name for multirun sweeps +Physics parameters are defined in ``conf/problem/ldc.yaml``: + +.. code-block:: yaml + + # @package _global_ + Re: 100 # Reynolds number + lid_velocity: 1.0 # Velocity of moving lid + Lx: 1.0 # Domain width + Ly: 1.0 # Domain height Solver Configurations --------------------- -Each solver has its own configuration file with solver-specific parameters. +Solvers use Hydra interpolation (``${...}``) to inherit parameters from the root config. **Finite Volume** (``conf/solver/fv.yaml``): .. code-block:: yaml + # @package solver + _target_: solvers.fv.solver.FVSolver name: fv - convection_scheme: upwind # upwind, central, quick - limiter: none # none, minmod, vanLeer - alpha_uv: 0.7 # Velocity under-relaxation - alpha_p: 0.3 # Pressure under-relaxation - linear_solver_tol: 1.0e-6 # PETSc solver tolerance -**Spectral** (``conf/solver/spectral.yaml``): + # Interpolated from root config + Re: ${Re} + nx: ${N} + ny: ${N} + tolerance: ${tolerance} + + # FV-specific parameters + convection_scheme: TVD # TVD or upwind + limiter: MUSCL # MUSCL, minmod, vanLeer + alpha_uv: 0.6 # Velocity under-relaxation + alpha_p: 0.4 # Pressure under-relaxation + +**Spectral Single Grid** (``conf/solver/spectral/sg.yaml``): .. code-block:: yaml + # @package solver + _target_: solvers.spectral.sg.SGSolver name: spectral - basis_type: chebyshev-gauss-lobatto # Basis functions - CFL: 0.5 # CFL number for time stepping - beta_squared: 1.0 # Artificial compressibility - corner_smoothing: true # Smooth corner singularities -Experiment Configurations -------------------------- + Re: ${Re} + nx: ${N} + ny: ${N} -Experiment configs override base settings for specific use cases. They use -``# @package _global_`` to merge into the root config. + # Spectral-specific parameters + basis_type: chebyshev # chebyshev or legendre + CFL: 0.5 + beta_squared: 5.0 # Artificial compressibility -**Quick Test** (``conf/experiment/quick_test.yaml``): + # Corner singularity treatment + corner_treatment: smoothing # smoothing or subtraction + corner_smoothing: 0.15 -.. code-block:: yaml +Other spectral variants (``fsg.yaml``, ``vmg.yaml``, ``fmg.yaml``) add multigrid acceleration. - # @package _global_ - experiment_name: Quick-Test - sweep_name: quick-test-sweep +Experiment Configurations +------------------------- - N: 16 - Re: 100 - tolerance: 1.0e-4 - max_iterations: 100 +Experiments override base settings and define parameter sweeps. -**Validation Sweep** (``conf/experiment/fv_validation.yaml``): +**Ghia Validation** (``conf/experiment/validation/ghia/fv.yaml``): .. code-block:: yaml @@ -109,151 +129,83 @@ Experiment configs override base settings for specific use cases. They use defaults: - override /solver: fv - experiment_name: FV-Validation - sweep_name: fv-validation-sweep + experiment_name: LDC-Validation + sweep_name: ghia-Re${Re} - N: 64 - Re: 100 - tolerance: 1.0e-7 - max_iterations: 50000 - - # Define sweep parameters for multirun hydra: sweeper: params: - N: 32,64,128 - Re: 100,400,1000 - -Creating Custom Experiments ---------------------------- - -To create a new experiment configuration: - -1. Create a new YAML file in ``conf/experiment/``: - -.. code-block:: yaml - - # conf/experiment/my_experiment.yaml - # @package _global_ - - experiment_name: My-Experiment - sweep_name: my-sweep - - # Override any parameters - N: 48 - Re: 400 - tolerance: 1.0e-8 - max_iterations: 10000 - - # Optionally define sweep parameters - hydra: - sweeper: - params: - N: 32,48,64 - Re: 100,400 - -2. Run with your experiment: - -.. code-block:: bash + N: 32, 64 + Re: 100 - # Single run - uv run python run_solver.py +experiment=my_experiment solver=fv - - # Sweep (uses hydra.sweeper.params if defined) - uv run python run_solver.py -m +experiment=my_experiment - -MLflow Integration +Command Line Usage ------------------ -Experiment Name -^^^^^^^^^^^^^^^ - -The ``experiment_name`` field determines the MLflow experiment where runs are logged: - -.. code-block:: yaml - - experiment_name: FV-Validation # Creates/uses this MLflow experiment - -Sweep Name (Parent Runs) -^^^^^^^^^^^^^^^^^^^^^^^^ +Single Runs +^^^^^^^^^^^ -When running parameter sweeps (``-m`` flag), a parent run is automatically created -to group all child runs. The ``sweep_name`` field controls the parent run's name: - -.. code-block:: yaml +.. code-block:: bash - sweep_name: fv-validation-sweep + # Default FV solver + uv run python main.py solver=fv N=32 Re=100 -This creates a hierarchy in MLflow: + # Spectral solver (single grid) + uv run python main.py solver=spectral/sg N=31 Re=100 -.. code-block:: text + # Spectral with V-cycle multigrid + uv run python main.py solver=spectral/vmg N=31 Re=1000 - fv-validation-sweep (parent) - ├── fv_N32_Re100 (child) - ├── fv_N32_Re400 (child) - ├── fv_N64_Re100 (child) - └── ... +Parameter Sweeps +^^^^^^^^^^^^^^^^ -You can also override the sweep name from the command line: +Use ``-m`` (multirun) flag: .. code-block:: bash - uv run python run_solver.py -m sweep_name=custom-sweep solver=fv N=16,32,64 - -Command Line Usage ------------------- - -Basic Overrides -^^^^^^^^^^^^^^^ - -Override any parameter from the command line: - -.. code-block:: bash + # Command-line sweep + uv run python main.py -m solver=fv N=16,32,64 Re=100,400 - # Override single parameters - uv run python run_solver.py solver=spectral N=31 Re=1000 + # Use experiment config + uv run python main.py -m +experiment/validation/ghia=fv - # Override multiple parameters - uv run python run_solver.py solver=fv N=64 Re=400 tolerance=1e-8 + # Override experiment parameters + uv run python main.py -m +experiment/validation/ghia=fv Re=400,1000 -Using Experiments -^^^^^^^^^^^^^^^^^ +Plot-Only Mode +^^^^^^^^^^^^^^ -Load an experiment configuration with ``+experiment=``: +Regenerate plots from existing MLflow runs without re-solving: .. code-block:: bash - # Load experiment config - uv run python run_solver.py +experiment=fv_validation + uv run python main.py -m +experiment/validation/ghia=fv plot_only=true - # Load experiment and override parameters - uv run python run_solver.py +experiment=fv_validation N=128 Re=1000 - -Parameter Sweeps -^^^^^^^^^^^^^^^^ +Viewing Configuration +^^^^^^^^^^^^^^^^^^^^^ -Use ``-m`` (multirun) to sweep over parameters: +Print resolved config without running: .. code-block:: bash - # Sweep from command line - uv run python run_solver.py -m solver=fv N=16,32,64 Re=100,400 + uv run python main.py --cfg job + uv run python main.py +experiment/validation/ghia=fv --cfg job - # Use experiment's predefined sweep - uv run python run_solver.py -m +experiment=fv_validation +MLflow Integration +------------------ - # Parallel sweep with joblib - uv run python run_solver.py -m hydra/launcher=joblib solver=fv,spectral N=16,32,64 +Runs are automatically tracked in MLflow. The ``experiment_name`` determines the +MLflow experiment, and ``sweep_name`` creates parent runs for grouping sweeps: -Viewing Configuration -^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: text -Print the resolved configuration without running: + LDC-Validation (experiment) + └── ghia-Re100 (parent run) + ├── fv_N32 (child) + └── fv_N64 (child) -.. code-block:: bash +View results: - # Show resolved config - uv run python run_solver.py --cfg job +.. code-block:: bash - # Show config with experiment - uv run python run_solver.py +experiment=fv_validation --cfg job + uv run mlflow ui + # Open http://localhost:5000 diff --git a/docs/source/index.rst b/docs/source/index.rst index 8d3ffb6..2047367 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,52 +8,34 @@ Lid-Driven Cavity Flow: Finite Volume and Spectral Methods This documentation provides computational experiments, API reference, and implementation details for solving the lid-driven cavity problem using finite volume and spectral methods. -For the full codebase, please visit the `GitHub repository `_. +For the full codebase, visit the `GitHub repository `_. -Contents --------- +Quick Start +----------- -:doc:`example_gallery/index` - Gallery of computational experiments and visualizations for lid-driven cavity flow. -:doc:`configuration` - Experiment configuration structure and customization guide. -:doc:`usage` - Running solvers locally and on HPC clusters. -:doc:`api_reference` - Complete API reference for solver modules. +.. code-block:: bash -.. toctree:: - :maxdepth: 2 - :hidden: - :caption: Examples + # Install + uv sync - example_gallery/index + # Run FV validation + uv run python main.py -m +experiment/validation/ghia=fv + + # View results + uv run mlflow ui + +Contents +-------- .. toctree:: :maxdepth: 2 - :hidden: - :titlesonly: :caption: User Guide - configuration usage + configuration .. toctree:: :maxdepth: 2 - :hidden: - :titlesonly: :caption: Reference api_reference - -Installation ------------- - -The package requires Python 3.12 and uses ``uv`` for dependency management. - -Run the setup script from the project root:: - - bash setup.sh - -This will create a virtual environment and install all dependencies including PETSc and petsc4py. - diff --git a/docs/source/usage.rst b/docs/source/usage.rst index dac6728..19d57a7 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -4,38 +4,54 @@ Usage Guide This guide covers running solvers locally with Hydra configuration management and on the DTU HPC cluster. -Hydra Configuration -------------------- +Quick Start +----------- -The project uses `Hydra `_ for configuration management. -All solver runs are executed via ``run_solver.py``. +.. code-block:: bash -Basic Usage -^^^^^^^^^^^ + # Run FV solver with validation experiment + uv run python main.py -m +experiment/validation/ghia=fv -.. code-block:: bash + # Run spectral solver + uv run python main.py -m +experiment/validation/ghia=spectral - # Finite Volume solver (32x32 cells, Re=100) - uv run python run_solver.py solver=fv N=32 Re=100 + # Single run (testing) + uv run python main.py solver=fv N=32 Re=100 - # Spectral solver (N=15 gives 16x16 nodes, Re=100) - uv run python run_solver.py solver=spectral N=15 Re=100 + # Regenerate plots without re-solving + uv run python main.py -m +experiment/validation/ghia=fv plot_only=true -Using Experiment Configs -^^^^^^^^^^^^^^^^^^^^^^^^ + # View MLflow UI + uv run mlflow ui -Pre-defined experiment configurations are in ``conf/experiment/``: +Hydra Configuration +------------------- -.. code-block:: bash +The project uses `Hydra `_ for configuration management. - # Quick test (small grid, few iterations) - uv run python run_solver.py +experiment=quick_test solver=fv +Configuration Structure +^^^^^^^^^^^^^^^^^^^^^^^ - # FV validation (default settings for benchmarking) - uv run python run_solver.py +experiment=fv_validation +.. code-block:: text - # Spectral validation - uv run python run_solver.py +experiment=spectral_validation + conf/ + ├── config.yaml # Main config (N, Re, tolerance) + ├── problem/ + │ └── ldc.yaml # Physics (Re, domain size) + ├── solver/ + │ ├── fv.yaml # Finite Volume settings + │ └── spectral/ # Spectral solver variants + │ ├── sg.yaml # Single Grid + │ ├── fsg.yaml # Full Single Grid MG + │ ├── vmg.yaml # V-cycle MultiGrid + │ └── fmg.yaml # Full MultiGrid + ├── experiment/ + │ └── validation/ghia/ # Ghia benchmark experiments + │ ├── fv.yaml + │ └── spectral.yaml + └── mlflow/ + ├── local.yaml # File-based tracking (default) + └── coolify.yaml # Remote server Parameter Sweeps ^^^^^^^^^^^^^^^^ @@ -44,178 +60,90 @@ Run multiple configurations with Hydra's multirun (``-m``): .. code-block:: bash - # Sweep over grid sizes (sequential) - uv run python run_solver.py -m solver=fv N=16,32,64 Re=100 + # Sweep over grid sizes + uv run python main.py -m solver=fv N=16,32,64 Re=100 # Sweep over Reynolds numbers - uv run python run_solver.py -m solver=spectral N=31 Re=100,400,1000 - -Parallel Sweeps (Joblib) -^^^^^^^^^^^^^^^^^^^^^^^^ - -Run sweeps in parallel using all CPU cores with the Joblib launcher: - -.. code-block:: bash - - # Parallel sweep over grid sizes - uv run python run_solver.py -m hydra/launcher=joblib solver=fv N=16,32,64 Re=100 - - # Parallel sweep over solvers - uv run python run_solver.py -m hydra/launcher=joblib solver=fv,spectral N=32 Re=100 - - # Parallel multi-dimensional sweep (solver x N x Re = 12 jobs) - uv run python run_solver.py -m hydra/launcher=joblib solver=fv,spectral N=16,32,64 Re=100,400 - - # Control parallelism (e.g., 4 concurrent jobs) - uv run python run_solver.py -m hydra/launcher=joblib hydra.launcher.n_jobs=4 solver=fv N=16,32,64 - -Configuration Structure -^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: text - - conf/ - ├── config.yaml # Main config (N, Re, tolerance, etc.) - ├── solver/ - │ ├── fv.yaml # FV-specific (alpha_uv, alpha_p, scheme) - │ └── spectral.yaml # Spectral-specific (CFL, beta_squared) - ├── experiment/ - │ ├── quick_test.yaml # Fast debugging runs - │ ├── fv_validation.yaml # FV benchmark settings - │ └── spectral_validation.yaml - ├── mlflow/ - │ ├── local.yaml # File-based tracking (default) - │ └── coolify.yaml # Remote server - └── hydra/ - └── launcher/ - └── joblib.yaml # Parallel launcher (all cores) - -Nested Runs for Sweeps -^^^^^^^^^^^^^^^^^^^^^^ + uv run python main.py -m solver=fv N=32 Re=100,400,1000 -Parameter sweeps automatically create a parent-child run hierarchy in MLflow: + # Multi-dimensional sweep + uv run python main.py -m solver=fv,spectral/sg N=16,32 Re=100,400 -- **Parent run**: Created before sweep starts, logs sweep configuration -- **Child runs**: Each parameter combination nested under the parent - -This makes it easy to: - -- View all runs from a sweep together in the MLflow UI -- Compare metrics across parameter combinations -- Track sweep-level metadata (HPC job ID, sweep config) +Sweeps automatically create parent-child run hierarchies in MLflow for easy comparison. MLflow Tracking ^^^^^^^^^^^^^^^ -Results are tracked with `MLflow `_. Two modes available: - -**Local Files (Default):** +Results are tracked with `MLflow `_: .. code-block:: bash - uv run python run_solver.py solver=fv mlflow=local + # Local file-based tracking (default) + uv run python main.py solver=fv N=32 Re=100 - # View UI - uv run main.py --mlflow-ui + # View results + uv run mlflow ui + # Open http://localhost:5000 -**Remote Server:** + # Remote server (configure .env first) + uv run python main.py solver=fv N=32 Re=100 mlflow=coolify -.. code-block:: bash +HPC Cluster (DTU LSF) +--------------------- - # Setup credentials (one-time) - cp .env.template .env - # Edit .env with your credentials - - # Run solver - uv run python run_solver.py solver=fv mlflow=coolify - -HPC Cluster (DTU) ------------------ +Running experiments on the DTU HPC cluster using LSF job arrays. -This section covers running parameter sweeps on the DTU HPC cluster using LSF. - -Initial Setup -^^^^^^^^^^^^^ +Setup +^^^^^ 1. Clone the repository on the HPC cluster -2. Navigate into the repo root -3. Set up MLflow credentials: +2. Configure MLflow credentials: .. code-block:: bash cp .env.template .env # Edit .env with your credentials -Submitting Jobs -^^^^^^^^^^^^^^^ - -Submit jobs using bsub with Hydra: - -.. code-block:: bash - - # Single job - bsub -q hpc -W 1:00 -n 4 -R "rusage[mem=4GB]" \ - "uv run python run_solver.py solver=fv N=32 Re=100 mlflow=coolify" - - # Sequential parameter sweep - bsub -q hpc -W 4:00 -n 4 -R "rusage[mem=4GB]" \ - "uv run python run_solver.py -m solver=fv N=16,32,64 Re=100,400 mlflow=coolify" - -Parallel Sweeps on HPC +Submitting Experiments ^^^^^^^^^^^^^^^^^^^^^^ -Use the Joblib launcher to run parameter combinations in parallel on a single node: +Use the ``hpc_submit.py`` script to submit experiment sweeps as job arrays: .. code-block:: bash - # Parallel sweep using all cores on the node - bsub -q hpc -W 2:00 -n 16 -R "rusage[mem=2GB]" -R "span[hosts=1]" \ - "uv run python run_solver.py -m hydra/launcher=joblib solver=fv,spectral N=16,32,64 Re=100,400 mlflow=coolify" - - # Control number of parallel jobs (e.g., 8 concurrent) - bsub -q hpc -W 2:00 -n 8 -R "rusage[mem=4GB]" -R "span[hosts=1]" \ - "uv run python run_solver.py -m hydra/launcher=joblib hydra.launcher.n_jobs=8 solver=fv N=16,32,64,128 Re=100,400,1000 mlflow=coolify" - -.. note:: + # Preview job script (dry run) + uv run python scripts/hpc_submit.py +experiment/validation/ghia=fv --dry-run - The ``-R "span[hosts=1]"`` flag ensures all cores are allocated on a single node. - This is required because joblib uses local multiprocessing - it cannot distribute - work across multiple nodes. Without this flag, LSF might split your cores across - nodes, leaving some unusable. + # Submit FV validation (2 jobs: N=32, N=64) + uv run python scripts/hpc_submit.py +experiment/validation/ghia=fv -Monitoring Jobs -^^^^^^^^^^^^^^^ - -Check the status of your running jobs: + # Submit with custom resources + uv run python scripts/hpc_submit.py +experiment/validation/ghia=spectral \ + --queue gpuv100 --time 4:00 --cores 8 --mem 8GB -.. code-block:: bash +The script: - bstat +1. Parses sweep parameters from the experiment config +2. Generates all parameter combinations +3. Submits an LSF job array where each job runs one configuration Example output: .. code-block:: text - JOBID USER QUEUE JOB_NAME NALLOC STAT START_TIME ELAPSED - 27198794 s214960 hpc *N19-Re100 4 RUN Nov 27 23:11 0:01:39 - 27198795 s214960 hpc *N23-Re100 4 RUN Nov 27 23:11 0:01:39 - -Killing Jobs -^^^^^^^^^^^^ + Parsing experiment: +experiment/validation/ghia=fv + Sweep parameters: {'N': ['32', '64'], 'Re': ['100']} + Total jobs: 2 + [1] {'N': '32', 'Re': '100'} + [2] {'N': '64', 'Re': '100'} -Kill jobs by name or ID: +Job Management +^^^^^^^^^^^^^^ .. code-block:: bash - # Kill a specific job by name - bkill -J LDC-N32-Re100 - - # Kill a job by ID - bkill 27198795 - - # Kill all your jobs - bkill 0 - -.. tip:: + bstat # Check job status + bkill 12345 # Kill specific job + bkill 0 # Kill all your jobs - Track job progress in the MLflow UI - each run logs the LSF job ID as a tag. +Logs are written to ``logs/_.out``. diff --git a/main.py b/main.py index 55266f7..1c1588e 100644 --- a/main.py +++ b/main.py @@ -1,164 +1,138 @@ -#!/usr/bin/env python3 -"""Main entry point for project management - CLI driven.""" +""" +LDC Solver - Unified entry point for solving and plotting. -import argparse +Usage: + uv run python main.py -m +experiment/validation/ghia=fv + uv run python main.py -m +experiment/validation/ghia=fv plot_only=true + uv run python main.py solver=fv N=32 Re=100 +""" + +import logging +import os import sys +import tempfile from pathlib import Path -# Ensure src directory is in python path -sys.path.append(str(Path(__file__).parent / "src")) +import hydra +import mlflow +from dotenv import load_dotenv +from hydra.utils import instantiate +from omegaconf import DictConfig, OmegaConf -from utilities import runners -from utilities.config import get_repo_root, clean_all +load_dotenv() +sys.path.insert(0, str(Path(__file__).parent / "src")) +log = logging.getLogger(__name__) -def build_docs(): - """Build Sphinx documentation.""" - import subprocess - repo_root = get_repo_root() - docs_dir = repo_root / "docs" - source_dir = docs_dir / "source" - build_dir = docs_dir / "build" +def get_experiment_name(cfg: DictConfig) -> str: + """Build full experiment name with optional prefix.""" + name = cfg.experiment_name + prefix = cfg.mlflow.get("project_prefix", "") + if prefix and not name.startswith("/"): + return f"{prefix}/{name}" + return name - print("\nBuilding Sphinx documentation...") - if not source_dir.exists(): - print(f" Error: Documentation source directory not found: {source_dir}") - return False +def setup_mlflow(cfg: DictConfig) -> str: + """Setup MLflow tracking and return experiment name.""" + tracking_uri = cfg.mlflow.get("tracking_uri", "./mlruns") + if str(cfg.mlflow.get("mode", "")).lower() in ("files", "local"): + os.environ.pop("MLFLOW_TRACKING_URI", None) + os.environ["MLFLOW_TRACKING_URI"] = str(tracking_uri) + mlflow.set_tracking_uri(tracking_uri) + experiment_name = get_experiment_name(cfg) try: - result = subprocess.run( - [ - "uv", - "run", - "sphinx-build", - "-M", - "html", - str(source_dir), - str(build_dir), - ], - capture_output=True, - text=True, - timeout=300, - cwd=str(repo_root), - ) - - if result.returncode == 0: - print(" ✓ Documentation built successfully") - print(f" → Open: {build_dir / 'html' / 'index.html'}\n") - return True - else: - print(f" ✗ Documentation build failed (exit {result.returncode})") - if result.stderr: - print(f" Error: {result.stderr[:500]}") - return False - - except subprocess.TimeoutExpired: - print(" ✗ Documentation build timed out") - return False - except FileNotFoundError: - print(" ✗ sphinx-build not found. Install with: uv sync") - return False - except Exception as e: - print(f" ✗ Documentation build failed: {e}") - return False - - -def main(): - """Main CLI entry point.""" - parser = argparse.ArgumentParser( - description="Project management for MPI Poisson Solver", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - - actions = parser.add_argument_group("Actions") - actions.add_argument( - "--docs", action="store_true", help="Build Sphinx HTML documentation" - ) - actions.add_argument( - "--compute", action="store_true", help="Run all compute scripts (sequentially)" - ) - actions.add_argument( - "--plot", action="store_true", help="Run all plotting scripts (in parallel)" - ) - actions.add_argument( - "--copy-plots", action="store_true", help="Copy plots to report directory" - ) - actions.add_argument( - "--clean", action="store_true", help="Clean all generated files and caches" - ) - actions.add_argument( - "--setup-mlflow", - action="store_true", - help="Interactive MLflow setup (login to Databricks)", - ) - actions.add_argument( - "--mlflow-ui", action="store_true", help="Start local MLflow UI (./mlruns)" - ) - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) + mlflow.set_experiment(experiment_name) + except Exception as exc: + experiment_name = f"{experiment_name}-restored" + log.warning(f"MLflow set_experiment failed ({exc}); using '{experiment_name}'") + mlflow.set_experiment(experiment_name) - args = parser.parse_args() + return experiment_name - # Execute commands in logical order - if args.clean: - clean_all() - if args.setup_mlflow: - import mlflow +def find_existing_run(cfg: DictConfig) -> str: + """Find existing MLflow run matching config parameters.""" + experiment = mlflow.get_experiment_by_name(get_experiment_name(cfg)) + if not experiment: + raise ValueError(f"Experiment not found: {cfg.experiment_name}") - print("\nSetting up MLflow...") - mlflow.login(backend="databricks", interactive=True) - - if args.compute: - runners.run_compute_scripts() - - if args.plot: - runners.run_plot_scripts() - - if args.copy_plots: - runners.copy_to_report() - - if args.mlflow_ui: - import socket - import subprocess - import threading - import webbrowser - - def is_port_free(port): - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(("localhost", port)) != 0 - - # Find available port - port = 5001 - while not is_port_free(port) and port < 5010: - port += 1 - - url = f"http://localhost:{port}" - print(f"\nStarting MLflow UI at {url}") - print("Press Ctrl+C to stop\n") - - # Open browser after short delay - def open_browser(): - import time - - time.sleep(2) - webbrowser.open(url) + runs = mlflow.search_runs( + experiment_ids=[experiment.experiment_id], + filter_string=f"params.Re = '{cfg.Re}' AND params.nx = '{cfg.N}' AND attributes.status = 'FINISHED'", + max_results=1, + ) + if runs.empty: + raise ValueError(f"No matching run found for N={cfg.N}, Re={cfg.Re}") + + run_id = runs.iloc[0]["run_id"] + log.info(f"Found existing run: {run_id[:8]}") + return run_id + + +def run_solver(cfg: DictConfig) -> str: + """Run solver and log to MLflow. Returns run_id.""" + solver = instantiate(cfg.solver, _convert_="partial") + solver_name = cfg.solver.name + + # Run name: spectral uses N+1 (Chebyshev points) + N_display = cfg.N + 1 if solver_name.startswith("spectral") else cfg.N + run_name = f"{solver_name}_N{N_display}" + + # Parent run tagging for sweeps + parent_run_id = os.environ.get("MLFLOW_PARENT_RUN_ID") + tags = {"solver": solver_name} + if parent_run_id: + tags.update({"mlflow.parentRunId": parent_run_id, "parent_run_id": parent_run_id, "sweep": "child"}) + + with mlflow.start_run(run_name=run_name, tags=tags, nested=bool(parent_run_id)) as run: + mlflow.log_params(solver.params.to_mlflow()) + mlflow.log_dict(OmegaConf.to_container(cfg), "config.yaml") + + log.info(f"Solving: {solver_name} N={cfg.N} Re={cfg.Re}") + solver.solve() + + mlflow.log_metrics(solver.metrics.to_mlflow()) + if solver.time_series: + batch = solver.time_series.to_mlflow_batch() + if batch: + mlflow.tracking.MlflowClient().log_batch(run.info.run_id, metrics=batch) + + with tempfile.TemporaryDirectory() as tmpdir: + vtk_path = Path(tmpdir) / "solution.vts" + solver.to_vtk().save(str(vtk_path)) + mlflow.log_artifact(str(vtk_path)) + + log.info(f"Done: {solver.metrics.iterations} iter, converged={solver.metrics.converged}, time={solver.metrics.wall_time_seconds:.2f}s") + return run.info.run_id + + +def generate_plots(cfg: DictConfig, run_id: str): + """Generate plots for a completed run.""" + from shared.plotting.ldc import generate_plots_for_run + + generate_plots_for_run( + run_id=run_id, + tracking_uri=cfg.mlflow.get("tracking_uri", "./mlruns"), + output_dir=Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir), + solver_name=cfg.solver.name, + N=cfg.N, + Re=cfg.Re, + parent_run_id=os.environ.get("MLFLOW_PARENT_RUN_ID"), + upload_to_mlflow=True, + ) - threading.Thread(target=open_browser, daemon=True).start() - # Run in foreground (blocks until Ctrl+C) - try: - subprocess.run(["uv", "run", "mlflow", "ui", "--port", str(port)]) - except KeyboardInterrupt: - print("\nMLflow UI stopped.") +@hydra.main(config_path="conf", config_name="config", version_base=None) +def main(cfg: DictConfig) -> None: + """Main entry point.""" + log.info(f"Solver: {cfg.solver.name}, N={cfg.N}, Re={cfg.Re}") + log.info(f"MLflow experiment: {setup_mlflow(cfg)}") - if args.docs: - if not build_docs(): - sys.exit(1) + run_id = find_existing_run(cfg) if cfg.get("plot_only") else run_solver(cfg) + generate_plots(cfg, run_id) if __name__ == "__main__": diff --git a/plot_runs.py b/plot_runs.py deleted file mode 100644 index 6a6d142..0000000 --- a/plot_runs.py +++ /dev/null @@ -1,337 +0,0 @@ -""" -Plot generation script for LDC experiments. - -Finds parent runs for an experiment and generates: -1. Individual plots for all child runs -2. Comparison plots for each parent run - -Usage: - # Plot all runs in an experiment - uv run python plot_runs.py experiment_name=LDC-Validation - - # Plot runs for a specific parent run ID - uv run python plot_runs.py parent_run_id=abc123 - - # Plot using experiment config - uv run python plot_runs.py +experiment=fv_validation - - # Plot with multirun (regenerate plots for sweep) - uv run python plot_runs.py -m +experiment=fv_validation -""" - -import logging -import sys -from pathlib import Path -from typing import Optional - -import hydra -import mlflow -from dotenv import load_dotenv -from omegaconf import DictConfig - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent / "src")) - -load_dotenv() -log = logging.getLogger(__name__) - - -def find_parent_runs_for_experiment( - experiment_name: str, tracking_uri: str -) -> list[dict]: - """Find all parent runs for an experiment. - - Parameters - ---------- - experiment_name : str - MLflow experiment name - tracking_uri : str - MLflow tracking URI - - Returns - ------- - list[dict] - List of parent run info dicts with run_id, name, Re (if tagged) - """ - mlflow.set_tracking_uri(tracking_uri) - - # Search for parent runs - runs = mlflow.search_runs( - experiment_names=[experiment_name], - filter_string="tags.sweep = 'parent'", - order_by=["start_time DESC"], - ) - - if runs.empty: - log.warning(f"No parent runs found in experiment: {experiment_name}") - return [] - - parent_runs = [] - for _, row in runs.iterrows(): - parent_info = { - "run_id": row["run_id"], - "name": row["tags.mlflow.runName"], - } - # Extract Re if tagged - if "tags.Re" in row and row["tags.Re"]: - parent_info["Re"] = int(row["tags.Re"]) - - parent_runs.append(parent_info) - - log.info(f"Found {len(parent_runs)} parent run(s) in {experiment_name}") - return parent_runs - - -def plot_all_runs_for_parent( - parent_run_id: str, - tracking_uri: str, - output_dir: Path, - upload_to_mlflow: bool = True, -) -> dict: - """Generate all plots for a parent run and its children. - - Parameters - ---------- - parent_run_id : str - Parent run ID - tracking_uri : str - MLflow tracking URI - output_dir : Path - Output directory for plots - upload_to_mlflow : bool - Whether to upload plots to MLflow - - Returns - ------- - dict - Summary with child_plots (list) and comparison_plot (Path or None) - """ - from shared.plotting.ldc import ( - find_sibling_runs, - generate_plots_for_run, - plot_ghia_comparison, - upload_plots_to_mlflow, - ) - - mlflow.set_tracking_uri(tracking_uri) - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - # Get parent run info - client = mlflow.tracking.MlflowClient() - parent_run = client.get_run(parent_run_id) - parent_name = parent_run.info.run_name or parent_run_id[:8] - - log.info(f"Generating plots for parent run: {parent_name} ({parent_run_id[:8]})") - - # Find all child runs - siblings = find_sibling_runs(parent_run_id, tracking_uri) - - if not siblings: - log.warning(f" No child runs found for parent: {parent_name}") - return {"child_plots": [], "comparison_plot": None} - - log.info(f" Found {len(siblings)} child run(s)") - - # Filter to finished runs - finished_siblings = [s for s in siblings if s.get("status") == "FINISHED"] - if len(finished_siblings) < len(siblings): - log.warning( - f" Only {len(finished_siblings)}/{len(siblings)} runs finished, " - "plotting finished runs only" - ) - - # Generate individual plots for each child - child_plot_results = [] - for i, sibling in enumerate(finished_siblings, 1): - run_id = sibling["run_id"] - solver = sibling.get("solver", "unknown") - N = sibling["N"] - Re = sibling["Re"] - - log.info(f" [{i}/{len(finished_siblings)}] Plotting {solver} N={N} Re={Re}") - - child_output_dir = output_dir / parent_name / f"{solver}_N{N}_Re{Re}" - child_output_dir.mkdir(parents=True, exist_ok=True) - - try: - plot_paths = generate_plots_for_run( - run_id=run_id, - tracking_uri=tracking_uri, - output_dir=child_output_dir, - solver_name=solver, - N=N, - Re=Re, - parent_run_id=parent_run_id, - upload_to_mlflow=upload_to_mlflow, - ) - child_plot_results.append( - {"run_id": run_id, "plots": plot_paths, "status": "success"} - ) - log.info(f" Generated {len(plot_paths)} plot(s)") - except Exception as e: - log.error(f" Failed to generate plots for {run_id}: {e}") - child_plot_results.append({"run_id": run_id, "status": "failed", "error": str(e)}) - - # Generate comparison plot for parent - comparison_plot = None - if len(finished_siblings) >= 2: - log.info(f" Generating comparison plot for parent: {parent_name}") - comparison_dir = output_dir / parent_name / "comparison" - comparison_dir.mkdir(parents=True, exist_ok=True) - - try: - comparison_plot = plot_ghia_comparison( - finished_siblings, tracking_uri, comparison_dir - ) - - if comparison_plot and upload_to_mlflow: - upload_plots_to_mlflow( - parent_run_id, [comparison_plot], tracking_uri, "plots" - ) - log.info(f" Uploaded comparison plot to parent run") - except Exception as e: - log.error(f" Failed to generate comparison plot: {e}") - else: - log.warning( - f" Only {len(finished_siblings)} finished run(s), " - "skipping comparison plot (need at least 2)" - ) - - summary = { - "parent_run_id": parent_run_id, - "parent_name": parent_name, - "child_plots": child_plot_results, - "comparison_plot": comparison_plot, - } - - log.info( - f"Completed plotting for {parent_name}: " - f"{len([r for r in child_plot_results if r['status'] == 'success'])} child runs, " - f"comparison={'yes' if comparison_plot else 'no'}" - ) - - return summary - - -def plot_experiment( - experiment_name: str, - tracking_uri: str, - output_dir: Path, - parent_run_ids: Optional[list[str]] = None, - upload_to_mlflow: bool = True, -) -> list[dict]: - """Generate all plots for an experiment. - - Parameters - ---------- - experiment_name : str - MLflow experiment name - tracking_uri : str - MLflow tracking URI - output_dir : Path - Output directory for plots - parent_run_ids : list[str], optional - Specific parent run IDs to plot (if None, plots all parents in experiment) - upload_to_mlflow : bool - Whether to upload plots to MLflow - - Returns - ------- - list[dict] - List of summaries for each parent run - """ - mlflow.set_tracking_uri(tracking_uri) - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - # Find parent runs if not provided - if parent_run_ids is None: - parent_runs = find_parent_runs_for_experiment(experiment_name, tracking_uri) - parent_run_ids = [p["run_id"] for p in parent_runs] - - if not parent_run_ids: - log.warning("No parent runs to plot") - return [] - - log.info(f"Generating plots for {len(parent_run_ids)} parent run(s)") - - # Plot each parent run - summaries = [] - for i, parent_run_id in enumerate(parent_run_ids, 1): - log.info(f"[{i}/{len(parent_run_ids)}] Processing parent run {parent_run_id[:8]}") - - summary = plot_all_runs_for_parent( - parent_run_id=parent_run_id, - tracking_uri=tracking_uri, - output_dir=output_dir, - upload_to_mlflow=upload_to_mlflow, - ) - summaries.append(summary) - - # Print summary - log.info("\n" + "=" * 80) - log.info("PLOTTING SUMMARY") - log.info("=" * 80) - for summary in summaries: - success_count = len([r for r in summary["child_plots"] if r["status"] == "success"]) - total_count = len(summary["child_plots"]) - log.info( - f"{summary['parent_name']}: {success_count}/{total_count} child runs plotted, " - f"comparison={'yes' if summary['comparison_plot'] else 'no'}" - ) - - return summaries - - -@hydra.main(config_path="conf", config_name="config", version_base=None) -def main(cfg: DictConfig) -> None: - """Hydra entry point for plot generation. - - Supports multiple modes: - 1. Plot by experiment name: experiment_name=LDC-Validation - 2. Plot by parent run ID: parent_run_id=abc123 - 3. Plot using experiment config: +experiment=fv_validation - """ - tracking_uri = cfg.mlflow.get("tracking_uri", "./mlruns") - - # Determine output directory - output_dir = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) - - # Mode 1: Explicit parent_run_id provided - if cfg.get("parent_run_id"): - parent_run_id = cfg.parent_run_id - log.info(f"Plotting for parent run: {parent_run_id}") - - plot_all_runs_for_parent( - parent_run_id=parent_run_id, - tracking_uri=tracking_uri, - output_dir=output_dir, - upload_to_mlflow=cfg.get("upload_to_mlflow", True), - ) - return - - # Mode 2: Use experiment_name from config - experiment_name = cfg.experiment_name - project_prefix = cfg.mlflow.get("project_prefix", "") - if project_prefix and not experiment_name.startswith("/"): - full_experiment_name = f"{project_prefix}/{experiment_name}" - else: - full_experiment_name = experiment_name - - log.info(f"Plotting for experiment: {full_experiment_name}") - - # Check if specific parent_run_ids provided as list - parent_run_ids = cfg.get("parent_run_ids", None) - - plot_experiment( - experiment_name=full_experiment_name, - tracking_uri=tracking_uri, - output_dir=output_dir, - parent_run_ids=parent_run_ids, - upload_to_mlflow=cfg.get("upload_to_mlflow", True), - ) - - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml index 462ee75..9b319d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,6 @@ dependencies = [ "pydata-sphinx-theme>=0.14.0", "numpydoc>=1.6.0", "sphinx-copybutton>=0.5.2", - "sphinx-gallery>=0.14.0", - "pillow>=10.0.0", # Required for sphinx-gallery image handling # Development dependencies "ruff>=0.1.0", "pytest>=7.4.0", diff --git a/run_solver.py b/run_solver.py deleted file mode 100644 index c944621..0000000 --- a/run_solver.py +++ /dev/null @@ -1,242 +0,0 @@ -""" -LDC Solver Runner - Hydra + MLflow integration for FV and Spectral solvers. - -STANDARD USAGE - Always use -m (multirun mode): - # Validation experiments - uv run python run_solver.py -m +experiment=validation/ghia - - # Benchmarking experiments - uv run python run_solver.py -m +experiment=benchmarking/multigrid_comparison - - # Quick testing - uv run python run_solver.py -m +experiment=testing/quick_test - - # Custom sweeps - uv run python run_solver.py -m solver=fv N=16,32,64 Re=100,400 - -Multirun mode provides: - - Parent runs for organizing results - - Automatic plot generation (individual + comparisons) - - Everything uploaded to MLflow - -Single runs (testing only - no plots generated): - uv run python run_solver.py solver=fv N=32 Re=100 - uv run python run_solver.py solver=spectral/sg N=15 Re=100 - -Plot generation (separate tool): - uv run python plot_runs.py +experiment=validation/ghia - uv run python plot_runs.py parent_run_id=abc123 - -MLflow modes: - local - file-based ./mlruns (default) - coolify - remote server (requires .env with credentials) -""" - -import logging -import os -import sys -from pathlib import Path - -import hydra -import mlflow -from dotenv import load_dotenv -from hydra.utils import instantiate -from mlflow.tracking import MlflowClient -from omegaconf import DictConfig, OmegaConf - -# Load .env file (for MLflow credentials) -load_dotenv() - -# Add src to path for imports -sys.path.insert(0, str(Path(__file__).parent / "src")) - -log = logging.getLogger(__name__) - - -# ============================================================================= -# Solver Factory -# ============================================================================= - - -def create_solver(cfg: DictConfig): - """Instantiate solver using Hydra's instantiate on solver subtree. - - Common parameters from root config are passed to the solver constructor. - """ - return instantiate( - cfg.solver, - Re=cfg.Re, - lid_velocity=cfg.lid_velocity, - Lx=cfg.Lx, - Ly=cfg.Ly, - nx=cfg.N, - ny=cfg.N, - max_iterations=cfg.max_iterations, - tolerance=cfg.tolerance, - _convert_="partial", - ) - - -# ============================================================================= -# MLflow Logging -# ============================================================================= - - -def setup_mlflow(cfg: DictConfig) -> str: - """Setup MLflow tracking and return experiment name.""" - tracking_uri = cfg.mlflow.get("tracking_uri", "./mlruns") - # If defaulting to local file backend, clear any env override - if str(cfg.mlflow.get("mode", "")).lower() in ("files", "local"): - os.environ.pop("MLFLOW_TRACKING_URI", None) - os.environ["MLFLOW_TRACKING_URI"] = str(tracking_uri) - mlflow.set_tracking_uri(tracking_uri) - - # Build experiment name with optional project prefix - experiment_name = cfg.experiment_name - project_prefix = cfg.mlflow.get("project_prefix", "") - if project_prefix and not experiment_name.startswith("/"): - experiment_name = f"{project_prefix}/{experiment_name}" - - try: - mlflow.set_experiment(experiment_name) - except Exception as exc: - # If experiment was previously deleted, fall back to a new name - fallback = f"{experiment_name}-restored" - log.warning( - "MLflow set_experiment failed for '%s' (%s); falling back to '%s'", - experiment_name, - exc, - fallback, - ) - experiment_name = fallback - mlflow.set_experiment(experiment_name) - return experiment_name - - -def log_params(solver): - """Log solver params to MLflow using dataclass to_mlflow method.""" - mlflow.log_params(solver.params.to_mlflow()) - - -def log_metrics_and_timeseries(solver, run_id: str): - """Log final metrics and timeseries to MLflow.""" - # Final metrics (using dataclass to_mlflow method) - mlflow.log_metrics(solver.metrics.to_mlflow()) - - # Timeseries (batch logging using dataclass to_mlflow_batch method) - if solver.time_series is not None: - batch_metrics = solver.time_series.to_mlflow_batch() - if batch_metrics: - MlflowClient().log_batch(run_id=run_id, metrics=batch_metrics) - - -def log_fields(solver): - """Save solution fields as zarr arrays to MLflow artifacts.""" - import tempfile - - import zarr - - fields = solver.fields - - with tempfile.TemporaryDirectory() as tmpdir: - # Save each field as separate zarr array - for name in ["x", "y", "u", "v", "p"]: - arr = getattr(fields, name) - zarr_path = Path(tmpdir) / f"{name}.zarr" - zarr.save(zarr_path, arr) - mlflow.log_artifact(str(zarr_path), artifact_path="fields") - - log.info("Logged fields: x, y, u, v, p (zarr)") - - -# ============================================================================= -# Main Entry Point -# ============================================================================= - - -@hydra.main(config_path="conf", config_name="config", version_base=None) -def main(cfg: DictConfig) -> None: - """Hydra entry point - runs solver with MLflow tracking.""" - - # Check if running in multirun mode - try: - import hydra.core.hydra_config - hydra_cfg = hydra.core.hydra_config.HydraConfig.get() - is_multirun = hydra_cfg.mode.name == "MULTIRUN" - - # Warn if using experiment config without multirun - # Check if sweep params are defined in config (indicates experiment config) - has_sweep_params = "sweeper" in OmegaConf.to_container(cfg.get("hydra", {}), resolve=False) - - if not is_multirun and has_sweep_params: - log.warning( - "\n" + "="*80 + "\n" - "WARNING: You're using an experiment config without multirun mode!\n" - "Experiment configs are designed for sweeps. Add -m flag:\n" - " uv run python run_solver.py -m +experiment=...\n" - "\nContinuing with single run (no plots will be generated)...\n" - + "="*80 - ) - except Exception: - is_multirun = False - - log.info(f"Solver: {cfg.solver.name}, N={cfg.N}, Re={cfg.Re}") - - # Setup MLflow - experiment_name = setup_mlflow(cfg) - log.info(f"MLflow experiment: {experiment_name}") - - # Create solver - solver = create_solver(cfg) - - # Build run name (Re is in parent run, not child run name) - solver_name = cfg.solver.name - if solver_name.startswith("spectral"): - run_name = f"{solver_name}_N{cfg.N + 1}" - else: - run_name = f"{solver_name}_N{cfg.N}" - - # Check for parent run (from sweep callback) - parent_run_id = os.environ.get("MLFLOW_PARENT_RUN_ID") - - # Run with MLflow tracking - # Use nested=True when parent run is active (same process in local multirun) - run_tags = {"solver": solver_name} # Always tag with solver name - nested = False - if parent_run_id: - run_tags["mlflow.parentRunId"] = parent_run_id - run_tags["parent_run_id"] = ( - parent_run_id # Also store as regular tag for querying - ) - run_tags["sweep"] = "child" - nested = True # Required when parent run is active in same process - - with mlflow.start_run(run_name=run_name, tags=run_tags, nested=nested) as run: - log_params(solver) - - # Log Hydra config as artifact - mlflow.log_dict(OmegaConf.to_container(cfg), "config.yaml") - - # Tag with HPC job info if available - job_id = os.environ.get("LSB_JOBID") - if job_id: - mlflow.set_tag("lsf.job_id", job_id) - mlflow.set_tag("lsf.job_name", os.environ.get("LSB_JOBNAME", "")) - - # Solve - log.info("Starting solver...") - solver.solve() - - # Log results - log_metrics_and_timeseries(solver, run.info.run_id) - log_fields(solver) - - log.info( - f"Done: {solver.metrics.iterations} iter, " - f"converged={solver.metrics.converged}, " - f"time={solver.metrics.wall_time_seconds:.2f}s" - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/hpc_submit.py b/scripts/hpc_submit.py new file mode 100644 index 0000000..4061870 --- /dev/null +++ b/scripts/hpc_submit.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +""" +HPC Job Array Submission Script. + +Submits Hydra experiment sweeps as LSF job arrays on DTU HPC. + +Usage: + uv run python scripts/hpc_submit.py +experiment/validation/ghia=fv --dry-run + uv run python scripts/hpc_submit.py +experiment/validation/ghia=fv +""" + +import argparse +import itertools +import os +import subprocess +import sys +from pathlib import Path + +import yaml +from dotenv import load_dotenv + + +def create_parent_run(experiment: str, sweep_params: dict) -> str: + """Create MLflow parent run before submitting HPC jobs. + + This avoids race conditions when multiple jobs start simultaneously. + """ + import mlflow + + load_dotenv() + + # Load experiment config to get experiment_name and sweep_name + if "=" in experiment: + group, name = experiment.rsplit("=", 1) + yaml_path = Path("conf") / f"{group}/{name}.yaml" + else: + yaml_path = Path("conf") / f"{experiment}.yaml" + + with open(yaml_path) as f: + config = yaml.safe_load(f) + + # Load mlflow coolify config + mlflow_config_path = Path("conf/mlflow/coolify.yaml") + with open(mlflow_config_path) as f: + mlflow_config = yaml.safe_load(f) + + # Setup MLflow + tracking_uri = os.environ.get("MLFLOW_TRACKING_URI", mlflow_config.get("tracking_uri")) + mlflow.set_tracking_uri(tracking_uri) + + # Build experiment name + experiment_name = config.get("experiment_name", "LDC-Validation") + project_prefix = mlflow_config.get("project_prefix", "") + if project_prefix and not experiment_name.startswith("/"): + full_experiment_name = f"{project_prefix}/{experiment_name}" + else: + full_experiment_name = experiment_name + + mlflow.set_experiment(full_experiment_name) + + # Create parent run + sweep_name = config.get("sweep_name", experiment.replace("/", "_")) + with mlflow.start_run(run_name=sweep_name) as parent_run: + mlflow.set_tag("sweep", "parent") + mlflow.log_dict({"sweep_params": sweep_params}, "sweep_config.yaml") + + return parent_run.info.run_id + + +def parse_sweep_params(experiment_path: str) -> dict[str, list]: + """Parse sweep parameters from experiment YAML.""" + if experiment_path.startswith("+"): + experiment_path = experiment_path[1:] + + if "=" in experiment_path: + group, name = experiment_path.rsplit("=", 1) + yaml_path = Path("conf") / f"{group}/{name}.yaml" + else: + yaml_path = Path("conf") / f"{experiment_path}.yaml" + + if not yaml_path.exists(): + raise FileNotFoundError(f"Experiment config not found: {yaml_path}") + + with open(yaml_path) as f: + config = yaml.safe_load(f) + + sweep_params = config.get("hydra", {}).get("sweeper", {}).get("params", {}) + if not sweep_params: + raise ValueError(f"No sweep parameters found in {yaml_path}") + + parsed = {} + for key, value in sweep_params.items(): + if isinstance(value, str): + parsed[key] = [v.strip() for v in value.split(",")] + elif isinstance(value, list): + parsed[key] = [str(v) for v in value] + else: + parsed[key] = [str(value)] + + return parsed + + +def generate_combinations(sweep_params: dict[str, list]) -> list[dict[str, str]]: + """Generate all parameter combinations.""" + keys = list(sweep_params.keys()) + values = [sweep_params[k] for k in keys] + return [dict(zip(keys, combo)) for combo in itertools.product(*values)] + + +def get_command_for_index(experiment: str, combinations: list[dict], index: int) -> str: + """Get the command for a specific job index (1-indexed).""" + combo = combinations[index - 1] # Convert to 0-indexed + overrides = " ".join(f"{k}={v}" for k, v in combo.items()) + return f"uv run python main.py +{experiment} {overrides} mlflow=coolify" + + +def main(): + parser = argparse.ArgumentParser(description="Submit HPC job array") + parser.add_argument("experiment", help="Experiment (e.g., +experiment/validation/ghia=fv)") + parser.add_argument("--queue", "-q", default="hpc", help="LSF queue (default: hpc)") + parser.add_argument("--time", "-W", default="1:00", help="Wall time (default: 1:00)") + parser.add_argument("--cores", "-n", type=int, default=4, help="Cores per job (default: 4)") + parser.add_argument("--mem", default="6GB", help="Memory per core (default: 4GB)") + parser.add_argument("--dry-run", action="store_true", help="Show commands without submitting") + parser.add_argument("--test-index", type=int, help="Test: show command for specific index") + + args = parser.parse_args() + + # Parse and generate combinations + experiment = args.experiment.lstrip("+") + sweep_params = parse_sweep_params(args.experiment) + combinations = generate_combinations(sweep_params) + + print(f"Experiment: {args.experiment}") + print(f"Sweep: {sweep_params}") + print(f"Jobs: {len(combinations)}") + + # Test mode: show command for specific index + if args.test_index: + if args.test_index < 1 or args.test_index > len(combinations): + print(f"Error: index must be 1-{len(combinations)}", file=sys.stderr) + sys.exit(1) + cmd = get_command_for_index(experiment, combinations, args.test_index) + print(f"\n[{args.test_index}] {combinations[args.test_index - 1]}") + print(f"Command: {cmd}") + return + + # Show all combinations + for i, combo in enumerate(combinations, 1): + print(f" [{i}] {combo}") + + if args.dry_run: + print("\n--- Commands (dry run) ---") + for i in range(1, len(combinations) + 1): + print(f"[{i}] {get_command_for_index(experiment, combinations, i)}") + return + + # Submit job array + job_name = experiment.replace("/", "_").replace("=", "_") + n_jobs = len(combinations) + + # Build bash arrays for parameter mapping + # Replace dots with underscores for valid bash variable names + param_keys = list(combinations[0].keys()) + array_defs = [] + for key in param_keys: + vals = " ".join(c[key] for c in combinations) + bash_var = key.upper().replace(".", "_") + array_defs.append(f'{bash_var}=({vals})') + + # Build overrides: use bash var name but original key for hydra + overrides_parts = [] + for k in param_keys: + bash_var = k.upper().replace(".", "_") + overrides_parts.append(f'{k}=${{{bash_var}[$I]}}') + overrides = " ".join(overrides_parts) + + # Create parent run BEFORE submitting jobs to avoid race condition + parent_run_id = create_parent_run(experiment, sweep_params) + print(f"Created parent run: {parent_run_id}") + + script = f"""#!/bin/bash +mkdir -p logs +export MLFLOW_PARENT_RUN_ID={parent_run_id} +{chr(10).join(array_defs)} +I=$((LSB_JOBINDEX - 1)) +uv run python main.py +{experiment} {overrides} mlflow=coolify +""" + + bsub_cmd = [ + "bsub", + "-J", f"{job_name}[1-{n_jobs}]", + "-q", args.queue, + "-W", args.time, + "-n", str(args.cores), + "-R", f"rusage[mem={args.mem}]", + "-R", "span[hosts=1]", + "-o", "logs/%J_%I.out", + "-e", "logs/%J_%I.err", + ] + + print(f"\nSubmitting: {' '.join(bsub_cmd)}") + result = subprocess.run(bsub_cmd, input=script, text=True, capture_output=True) + + if result.returncode == 0: + print(result.stdout) + else: + print(f"Error: {result.stderr}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/shared/plotting/__init__.py b/src/shared/plotting/__init__.py deleted file mode 100644 index 148ec8a..0000000 --- a/src/shared/plotting/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Plotting utilities for scientific visualizations. - -This module provides LaTeX formatting utilities for labels and parameters. -""" - -from .formatters import ( - format_scientific_latex, - format_parameter_range, - build_parameter_string, -) - -__all__ = [ - "format_scientific_latex", - "format_parameter_range", - "build_parameter_string", -] diff --git a/src/shared/plotting/formatters.py b/src/shared/plotting/formatters.py deleted file mode 100644 index 02dcc6e..0000000 --- a/src/shared/plotting/formatters.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Formatting utilities for scientific plot labels and annotations. - -Provides LaTeX-compatible formatting for: -- Scientific notation (e.g., 1.00 × 10⁻³) -- Parameter ranges (e.g., N ∈ [10, 100]) -- Parameter strings for titles/legends -""" - -from __future__ import annotations - -from typing import Any - - -def format_scientific_latex(value: float | str, precision: int = 2) -> str: - """Format a value as LaTeX scientific notation. - - Parameters - ---------- - value : float or str - Value to format. If str and equals '?', returns '?' - precision : int, default 2 - Number of decimal places for mantissa - - Returns - ------- - str - LaTeX-formatted string in the form 'mantissa \\times 10^{exponent}' - - Examples - -------- - >>> format_scientific_latex(0.001) - '1.00 \\times 10^{-3}' - >>> format_scientific_latex(1.5e-6, precision=1) - '1.5 \\times 10^{-6}' - """ - if value == "?": - return "?" - - value_str = f"{float(value):.{precision}e}" - mantissa, exp = value_str.split("e") - exp_int = int(exp) - return rf"{mantissa} \times 10^{{{exp_int}}}" - - -def format_parameter_range( - values: list | tuple, - name: str, - latex: bool = True, -) -> str: - """Format a parameter range for display. - - Parameters - ---------- - values : list or tuple - Parameter values (should be sorted) - name : str - Parameter name (e.g., 'N', 'L', 'dt') - latex : bool, default True - Whether to use LaTeX formatting - - Returns - ------- - str - Formatted string - - Examples - -------- - >>> format_parameter_range([10, 20, 30], 'N') - '$N \\in [10, 30]$' - """ - if len(values) == 0: - return f"{name} = ?" - - if len(values) == 1: - val = values[0] - if latex: - return rf"${name} = {val}$" - return f"{name} = {val}" - - min_val, max_val = min(values), max(values) - - # Format based on type - if isinstance(min_val, int) and isinstance(max_val, int): - range_str = f"[{min_val}, {max_val}]" - else: - range_str = f"[{min_val:.1f}, {max_val:.1f}]" - - if latex: - return rf"${name} \in {range_str}$" - return f"{name} ∈ {range_str}" - - -def build_parameter_string( - params: dict[str, Any], - separator: str = ", ", - latex: bool = True, -) -> str: - """Build a parameter string from a dictionary. - - Parameters - ---------- - params : dict - Dictionary of parameter names and values - separator : str, default ', ' - Separator between parameters - latex : bool, default True - Whether to use LaTeX formatting (wraps each param in $ $) - - Returns - ------- - str - Formatted parameter string - - Examples - -------- - >>> build_parameter_string({'N': 100, 'dt': 0.001}) - '$N = 100$, $dt = 1.00 \\times 10^{-3}$' - """ - parts = [] - for name, value in params.items(): - if isinstance(value, (list, tuple)): - parts.append(format_parameter_range(value, name, latex=latex)) - else: - # Handle special formatting for timestep-like parameters - if "dt" in name.lower() or "delta" in name.lower(): - value_str = format_scientific_latex(value) - if latex: - parts.append(rf"${name} = {value_str}$") - else: - parts.append(f"{name} = {value_str}") - else: - if latex: - parts.append(rf"${name} = {value}$") - else: - parts.append(f"{name} = {value}") - - return separator.join(parts) diff --git a/src/shared/plotting/ldc/__init__.py b/src/shared/plotting/ldc/__init__.py index 8cdb7b2..92132a5 100644 --- a/src/shared/plotting/ldc/__init__.py +++ b/src/shared/plotting/ldc/__init__.py @@ -1,66 +1,36 @@ """ -LDC Plotting Package - Modular structure for lid-driven cavity plots. +LDC Plotting Package. -This package provides: -- Individual run plots (fields, streamlines, vorticity, centerlines, convergence) -- Comparison plots (Ghia benchmark comparisons) -- MLflow integration for artifact management -- High-level orchestration functions - -Main API --------- -generate_plots_for_run : function - Generate all plots for a single completed run -generate_comparison_plots_for_sweep : function - Generate comparison plots for sweep results -main : function - Hydra entry point for standalone CLI usage +Provides plot generation for lid-driven cavity solver results. """ from .convergence import plot_convergence -from .data_loading import ( - fields_to_dataframe, - load_fields_from_zarr, - restructure_fields, -) +from .data_loading import fields_to_dataframe, load_fields_from_zarr, restructure_fields from .fields import plot_fields, plot_streamlines, plot_vorticity from .mlflow_utils import ( download_mlflow_artifacts, - find_matching_run, find_sibling_runs, load_timeseries_from_mlflow, upload_plots_to_mlflow, ) -from .orchestrator import ( - generate_comparison_plots_for_sweep, - generate_plots_for_run, - main, -) +from .orchestrator import generate_comparison_plots_for_sweep, generate_plots_for_run +from .validation import plot_ghia_comparison # Import style module to trigger sns.set_theme() on package import from . import style # noqa: F401 -from .validation import plot_centerlines, plot_ghia_comparison - __all__ = [ - # High-level API (most commonly used) "generate_plots_for_run", "generate_comparison_plots_for_sweep", - "main", - # Individual plot functions "plot_fields", "plot_streamlines", "plot_vorticity", - "plot_centerlines", "plot_convergence", "plot_ghia_comparison", - # MLflow utilities - "find_matching_run", "find_sibling_runs", "download_mlflow_artifacts", "load_timeseries_from_mlflow", "upload_plots_to_mlflow", - # Data utilities "load_fields_from_zarr", "restructure_fields", "fields_to_dataframe", diff --git a/src/shared/plotting/ldc/data_loading.py b/src/shared/plotting/ldc/data_loading.py index d9af990..1d6bea9 100644 --- a/src/shared/plotting/ldc/data_loading.py +++ b/src/shared/plotting/ldc/data_loading.py @@ -1,30 +1,72 @@ """ Data Loading and Transformation for LDC Plotting. -Handles loading fields from zarr, restructuring data, -and converting to DataFrame format. +Handles loading solution from VTS files and converting to formats +needed by plotting functions. """ from pathlib import Path import numpy as np import pandas as pd -import zarr +import pyvista as pv + + +def load_solution(artifact_dir: Path) -> pv.StructuredGrid: + """Load solution as PyVista StructuredGrid. + + The VTS file contains all fields (u, v, p, vorticity, velocity_magnitude) + and metadata (Re, N, solver name). + + Parameters + ---------- + artifact_dir : Path + Directory containing solution.vts + + Returns + ------- + pv.StructuredGrid + Solution grid with all fields + """ + vtk_path = artifact_dir / "solution.vts" + + if not vtk_path.exists(): + # Fallback to legacy location + vtk_path = artifact_dir / "fields" / "solution.vts" + + if not vtk_path.exists(): + raise FileNotFoundError(f"VTK file not found in {artifact_dir}") + + return pv.read(vtk_path) def load_fields_from_zarr(artifact_dir: Path) -> dict: - """Load solution fields from zarr artifacts.""" - fields_dir = artifact_dir / "fields" - if not fields_dir.exists(): - raise FileNotFoundError(f"Fields directory not found: {fields_dir}") - - fields = {} - for name in ["x", "y", "u", "v", "p"]: - zarr_path = fields_dir / f"{name}.zarr" - if zarr_path.exists(): - fields[name] = zarr.load(zarr_path) - else: - raise FileNotFoundError(f"Field not found: {zarr_path}") + """Load solution fields from VTK artifact. + + Note: Legacy function name kept for backwards compatibility. + Use load_solution() for new code. + """ + grid = load_solution(artifact_dir) + + # Extract coordinates from structured grid + points = grid.points + x = points[:, 0] + y = points[:, 1] + + # Extract fields + fields = { + "x": x, + "y": y, + "u": grid["u"], + "v": grid["v"], + "p": grid["pressure"], + } + + # Include derived fields if available + if "vorticity" in grid.array_names: + fields["vorticity"] = grid["vorticity"] + if "velocity_magnitude" in grid.array_names: + fields["velocity_magnitude"] = grid["velocity_magnitude"] return fields diff --git a/src/shared/plotting/ldc/fields.py b/src/shared/plotting/ldc/fields.py index b63c07d..51456fe 100644 --- a/src/shared/plotting/ldc/fields.py +++ b/src/shared/plotting/ldc/fields.py @@ -11,6 +11,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import pyvista as pv from scipy.interpolate import RectBivariateSpline log = logging.getLogger(__name__) @@ -107,8 +108,8 @@ def plot_streamlines( # Smooth contours with coolwarm colormap cf = ax.contourf(X_fine, Y_fine, vel_mag, levels=40, cmap="coolwarm") - # Semi-transparent white streamlines to show velocity magnitude through them - ax.streamplot( + # Semi-transparent white streamlines (RGBA for transparency) + stream = ax.streamplot( x_fine, y_fine, U_interp, @@ -117,8 +118,7 @@ def plot_streamlines( linewidth=1.5, arrowsize=1.3, arrowstyle="->", - color="white", - alpha=0.7, + color=(1, 1, 1, 0.7), # RGBA white with 70% opacity zorder=2, ) @@ -202,3 +202,171 @@ def plot_vorticity( plt.close(fig) return output_path + + +def plot_streamlines_pyvista( + fields_df: pd.DataFrame, Re: float, solver: str, N: int, output_dir: Path +) -> Path: + """Generate beautiful streamline plot using PyVista with ParaView theme. + + Creates a visually striking visualization with velocity magnitude field + and streamlines, with transparent background for easy compositing. + """ + x_unique = np.sort(fields_df["x"].unique()) + y_unique = np.sort(fields_df["y"].unique()) + nx, ny = len(x_unique), len(y_unique) + + sorted_df = fields_df.sort_values(["y", "x"]) + U = sorted_df["u"].values.reshape(ny, nx) + V = sorted_df["v"].values.reshape(ny, nx) + + # Interpolate to finer grid for smoother visualization + n_fine = 200 + x_fine = np.linspace(x_unique[0], x_unique[-1], n_fine) + y_fine = np.linspace(y_unique[0], y_unique[-1], n_fine) + + U_interp = RectBivariateSpline(y_unique, x_unique, U)(y_fine, x_fine) + V_interp = RectBivariateSpline(y_unique, x_unique, V)(y_fine, x_fine) + + # Create 3D grid (z=0 plane) + X, Y = np.meshgrid(x_fine, y_fine) + Z = np.zeros_like(X) + + # Create structured grid + grid = pv.StructuredGrid(X, Y, Z) + + # Add velocity as vector field (3D with w=0) + vel_mag = np.sqrt(U_interp**2 + V_interp**2) + vectors = np.zeros((n_fine * n_fine, 3)) + vectors[:, 0] = U_interp.ravel() + vectors[:, 1] = V_interp.ravel() + vectors[:, 2] = 0.0 + + grid["velocity"] = vectors + grid["velocity_magnitude"] = vel_mag.ravel() + grid.set_active_vectors("velocity") + + # Create seed points for streamlines + n_seeds = 15 + seed_points_left = np.column_stack([ + np.full(n_seeds, x_fine[3]), + np.linspace(y_fine[3], y_fine[-4], n_seeds), + np.zeros(n_seeds) + ]) + seed_points_bottom = np.column_stack([ + np.linspace(x_fine[3], x_fine[-4], n_seeds), + np.full(n_seeds, y_fine[3]), + np.zeros(n_seeds) + ]) + seed_points = np.vstack([seed_points_left, seed_points_bottom]) + seeds = pv.PolyData(seed_points) + + # Generate streamlines + streamlines = grid.streamlines_from_source( + seeds, + vectors="velocity", + max_steps=2000, + integration_direction="both", + ) + + # Set up PyVista plotter with ParaView theme + pv.set_plot_theme("paraview") + plotter = pv.Plotter(off_screen=True, window_size=[1400, 1200]) + + # Add velocity magnitude field as background surface + surface = grid.extract_surface() + plotter.add_mesh( + surface, + scalars="velocity_magnitude", + cmap="turbo", + show_edges=False, + lighting=False, + scalar_bar_args={ + "title": "Velocity Magnitude |u|", + "vertical": True, + "position_x": 0.85, + "position_y": 0.2, + "width": 0.08, + "height": 0.6, + "title_font_size": 16, + "label_font_size": 14, + "fmt": "%.3f", + "n_labels": 5, + }, + ) + + # Add streamlines as white tubes on top + if streamlines.n_points > 0: + tubes = streamlines.tube(radius=0.004, n_sides=8) + plotter.add_mesh( + tubes, + color="white", + opacity=0.85, + smooth_shading=True, + specular=0.3, + ) + + # Set up camera for clean 2D view + plotter.camera_position = "xy" + plotter.camera.zoom(1.15) + + # Enable anti-aliasing for smooth edges + plotter.enable_anti_aliasing("ssaa") + + # Save with transparent background + output_path = output_dir / "streamlines_3d.png" + plotter.screenshot(output_path, transparent_background=True, scale=2) + plotter.close() + + return output_path + + +def export_fields_to_vtk( + fields_df: pd.DataFrame, Re: float, solver: str, N: int, output_dir: Path +) -> Path: + """Export solution fields to VTK format for ParaView visualization. + + Creates a structured grid VTK file with pressure, velocity components, + velocity magnitude, and vorticity fields. + """ + x_unique = np.sort(fields_df["x"].unique()) + y_unique = np.sort(fields_df["y"].unique()) + nx, ny = len(x_unique), len(y_unique) + + sorted_df = fields_df.sort_values(["y", "x"]) + P = sorted_df["p"].values.reshape(ny, nx) + U = sorted_df["u"].values.reshape(ny, nx) + V = sorted_df["v"].values.reshape(ny, nx) + + # Create 3D grid (z=0 plane) + X, Y = np.meshgrid(x_unique, y_unique) + Z = np.zeros_like(X) + + # Create structured grid + grid = pv.StructuredGrid(X, Y, Z) + + # Add scalar fields + grid["pressure"] = P.ravel() + grid["u"] = U.ravel() + grid["v"] = V.ravel() + grid["velocity_magnitude"] = np.sqrt(U**2 + V**2).ravel() + + # Compute and add vorticity + U_spline = RectBivariateSpline(y_unique, x_unique, U) + V_spline = RectBivariateSpline(y_unique, x_unique, V) + dvdx = V_spline(y_unique, x_unique, dx=1) + dudy = U_spline(y_unique, x_unique, dy=1) + vorticity = dvdx - dudy + grid["vorticity"] = vorticity.ravel() + + # Add velocity vector field + vectors = np.zeros((nx * ny, 3)) + vectors[:, 0] = U.ravel() + vectors[:, 1] = V.ravel() + grid["velocity"] = vectors + + # Save as VTK + output_path = output_dir / "solution.vtk" + grid.save(output_path) + + return output_path diff --git a/src/shared/plotting/ldc/mlflow_utils.py b/src/shared/plotting/ldc/mlflow_utils.py index 24bee90..c05547e 100644 --- a/src/shared/plotting/ldc/mlflow_utils.py +++ b/src/shared/plotting/ldc/mlflow_utils.py @@ -1,106 +1,27 @@ """ -MLflow Interaction Utilities for LDC Plotting. +MLflow utilities for LDC plotting. -Handles finding runs, downloading artifacts, loading timeseries data, -and uploading plots to MLflow. +Handles downloading artifacts, loading timeseries, and uploading plots. """ import logging import tempfile from pathlib import Path -from typing import Optional import mlflow import pandas as pd -from omegaconf import DictConfig log = logging.getLogger(__name__) -def find_matching_run(cfg: DictConfig, tracking_uri: str) -> tuple[str, Optional[str]]: - """Find MLflow run matching the config parameters. - - Returns - ------- - tuple[str, Optional[str]] - (run_id, parent_run_id) - parent_run_id is None if not a sweep child - """ - mlflow.set_tracking_uri(tracking_uri) - client = mlflow.tracking.MlflowClient() - - # Get experiment - experiment_name = cfg.experiment_name - project_prefix = cfg.mlflow.get("project_prefix", "") - if project_prefix and not experiment_name.startswith("/"): - experiment_name = f"{project_prefix}/{experiment_name}" - - experiment = client.get_experiment_by_name(experiment_name) - if experiment is None: - raise ValueError(f"Experiment not found: {experiment_name}") - - # Build filter string for matching runs - solver_name = cfg.solver.name - N = cfg.N - Re = cfg.Re - - filter_parts = [ - f"params.Re = '{Re}'", - f"params.nx = '{N}'", - f"params.ny = '{N}'", - "attributes.status = 'FINISHED'", - ] - - # Add solver-specific filter - if solver_name == "spectral": - filter_parts.append(f"params.basis_type = '{cfg.solver.basis_type}'") - elif solver_name == "fv": - filter_parts.append( - f"params.convection_scheme = '{cfg.solver.convection_scheme}'" - ) - - filter_string = " AND ".join(filter_parts) - - log.info(f"Searching in experiment: {experiment_name}") - log.info(f"Filter: solver={solver_name}, N={N}, Re={Re}") - - # Search for runs - runs = client.search_runs( - experiment_ids=[experiment.experiment_id], - filter_string=filter_string, - order_by=["attributes.start_time DESC"], - max_results=10, - ) - - if not runs: - raise ValueError( - f"No matching runs found for solver={solver_name}, N={N}, Re={Re}\n" - f"Filter used: {filter_string}" - ) - - # Return most recent matching run - run = runs[0] - parent_run_id = run.data.tags.get("parent_run_id") - - log.info(f"Found run: {run.info.run_name} (id: {run.info.run_id[:8]}...)") - if parent_run_id: - log.info(f" Parent run: {parent_run_id[:8]}...") - - return run.info.run_id, parent_run_id - - def find_sibling_runs(parent_run_id: str, tracking_uri: str) -> list[dict]: - """Find all child runs of a parent (siblings in a sweep). - - Returns list of dicts with run info for comparison plotting. - """ + """Find all child runs of a parent (siblings in a sweep).""" mlflow.set_tracking_uri(tracking_uri) client = mlflow.tracking.MlflowClient() - # Get parent run to find experiment parent_run = client.get_run(parent_run_id) experiment_id = parent_run.info.experiment_id - # Find all FINISHED children of this parent filter_string = ( f"tags.parent_run_id = '{parent_run_id}' AND attributes.status = 'FINISHED'" ) @@ -108,36 +29,25 @@ def find_sibling_runs(parent_run_id: str, tracking_uri: str) -> list[dict]: runs = client.search_runs( experiment_ids=[experiment_id], filter_string=filter_string, - order_by=["params.nx ASC"], # Sort by N for nice legend order + order_by=["params.nx ASC"], max_results=50, ) siblings = [] for run in runs: run_name = run.info.run_name or "" - - # Extract solver name from run_name (format: {solver}_N{n} or {solver}_N{n}_Re{re}) - # Examples: "fv_N32", "spectral_N33", "spectral_fsg_N16" - if "_N" in run_name: - solver_name = run_name.rsplit("_N", 1)[ - 0 - ] # rsplit to handle underscores in solver name - else: - solver_name = "unknown" - - siblings.append( - { - "run_id": run.info.run_id, - "run_name": run_name, - "N": int(run.data.params.get("nx", 0)), - "Re": float(run.data.params.get("Re", 0)), - "solver": solver_name, - "status": run.info.status, - } - ) - - finished = sum(1 for s in siblings if s["status"] == "FINISHED") - log.info(f"Found {len(siblings)} sibling runs in sweep ({finished} finished)") + solver_name = run_name.rsplit("_N", 1)[0] if "_N" in run_name else "unknown" + + siblings.append({ + "run_id": run.info.run_id, + "run_name": run_name, + "N": int(run.data.params.get("nx", 0)), + "Re": float(run.data.params.get("Re", 0)), + "solver": solver_name, + "status": run.info.status, + }) + + log.info(f"Found {len(siblings)} sibling runs in sweep") return siblings @@ -160,15 +70,9 @@ def load_timeseries_from_mlflow(run_id: str, tracking_uri: str) -> pd.DataFrame: mlflow.set_tracking_uri(tracking_uri) client = mlflow.tracking.MlflowClient() - # Get metric history metrics_to_fetch = [ - "residual", - "u_residual", - "v_residual", - "continuity_residual", - "energy", - "enstrophy", - "palinstrophy", + "residual", "u_residual", "v_residual", "continuity_residual", + "energy", "enstrophy", "palinstrophy", ] data = {} @@ -176,16 +80,13 @@ def load_timeseries_from_mlflow(run_id: str, tracking_uri: str) -> pd.DataFrame: try: history = client.get_metric_history(run_id, metric_name) if history: - data[metric_name] = [ - m.value for m in sorted(history, key=lambda x: x.step) - ] + data[metric_name] = [m.value for m in sorted(history, key=lambda x: x.step)] except Exception: - pass # Metric might not exist + pass if not data: return pd.DataFrame() - # Create DataFrame with iteration index max_len = max(len(v) for v in data.values()) df = pd.DataFrame({k: v + [None] * (max_len - len(v)) for k, v in data.items()}) df["iteration"] = range(len(df)) @@ -193,24 +94,19 @@ def load_timeseries_from_mlflow(run_id: str, tracking_uri: str) -> pd.DataFrame: return df -def upload_plots_to_mlflow( - run_id: str, plot_paths: list, tracking_uri: str, artifact_subdir: str = "plots" -): +def upload_plots_to_mlflow(run_id: str, plot_paths: list, tracking_uri: str): """Upload generated plots to MLflow run as artifacts.""" mlflow.set_tracking_uri(tracking_uri) valid_paths = [p for p in plot_paths if p and p.exists()] - # Check if we're already in an active run active_run = mlflow.active_run() if active_run and active_run.info.run_id == run_id: - # Already in the correct run, just log artifacts for path in valid_paths: - mlflow.log_artifact(str(path), artifact_path=artifact_subdir) - log.info(f"Uploaded: {artifact_subdir}/{path.name}") + mlflow.log_artifact(str(path)) + log.info(f"Uploaded: {path.name}") else: - # Start/resume run to upload artifacts with mlflow.start_run(run_id=run_id, nested=True): for path in valid_paths: - mlflow.log_artifact(str(path), artifact_path=artifact_subdir) - log.info(f"Uploaded: {artifact_subdir}/{path.name}") + mlflow.log_artifact(str(path)) + log.info(f"Uploaded: {path.name}") diff --git a/src/shared/plotting/ldc/orchestrator.py b/src/shared/plotting/ldc/orchestrator.py index 0b659d5..8bc6b04 100644 --- a/src/shared/plotting/ldc/orchestrator.py +++ b/src/shared/plotting/ldc/orchestrator.py @@ -1,32 +1,31 @@ """ -High-level Plot Generation Orchestration for LDC. +Plot Generation for LDC runs. -Coordinates the generation of all plots for individual runs and sweep comparisons. -Provides both a direct API for programmatic use and a Hydra entry point for CLI. +Provides functions to generate plots for individual runs and sweep comparisons. """ import logging from pathlib import Path from typing import Optional -import hydra import mlflow -from dotenv import load_dotenv -from omegaconf import DictConfig from .convergence import plot_convergence from .data_loading import fields_to_dataframe, load_fields_from_zarr -from .fields import plot_fields, plot_streamlines, plot_vorticity +from .fields import ( + plot_fields, + plot_streamlines, + plot_streamlines_pyvista, + plot_vorticity, +) from .mlflow_utils import ( download_mlflow_artifacts, - find_matching_run, find_sibling_runs, load_timeseries_from_mlflow, upload_plots_to_mlflow, ) -from .validation import plot_centerlines, plot_ghia_comparison +from .validation import plot_ghia_comparison -load_dotenv() log = logging.getLogger(__name__) @@ -40,34 +39,7 @@ def generate_plots_for_run( parent_run_id: Optional[str] = None, upload_to_mlflow: bool = True, ) -> list[Path]: - """Generate all plots for a completed run. - - Called directly from run_solver.py after solver completes. - - Parameters - ---------- - run_id : str - MLflow run ID - tracking_uri : str - MLflow tracking URI - output_dir : Path - Directory to save plots - solver_name : str - Solver name (e.g., "spectral", "spectral_fsg", "fv") - N : int - Grid size parameter - Re : float - Reynolds number - parent_run_id : str, optional - Parent run ID if this is part of a sweep - upload_to_mlflow : bool - Whether to upload plots to MLflow - - Returns - ------- - list[Path] - List of generated plot paths - """ + """Generate all plots for a completed run.""" output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -83,19 +55,11 @@ def generate_plots_for_run( plot_paths = [] plot_paths.append(plot_fields(fields_df, Re, solver_name, N, output_dir)) plot_paths.append(plot_streamlines(fields_df, Re, solver_name, N, output_dir)) + plot_paths.append(plot_streamlines_pyvista(fields_df, Re, solver_name, N, output_dir)) plot_paths.append(plot_vorticity(fields_df, Re, solver_name, N, output_dir)) - plot_paths.append(plot_centerlines(fields_df, Re, solver_name, N, output_dir)) plot_paths.append(plot_convergence(timeseries_df, Re, solver_name, N, output_dir)) ghia_path = plot_ghia_comparison( - [ - { - "run_id": run_id, - "N": N, - "Re": Re, - "solver": solver_name, - "status": "FINISHED", - } - ], + [{"run_id": run_id, "N": N, "Re": Re, "solver": solver_name, "status": "FINISHED"}], tracking_uri, output_dir, ) @@ -105,7 +69,6 @@ def generate_plots_for_run( plot_paths = [p for p in plot_paths if p is not None] log.info(f"Generated {len(plot_paths)} plots for run") - # Upload to individual run if upload_to_mlflow: upload_plots_to_mlflow(run_id, plot_paths, tracking_uri) @@ -119,26 +82,7 @@ def generate_comparison_plots_for_sweep( output_dir: Path, upload_to_mlflow: bool = True, ) -> dict[str, Path]: - """Generate comparison plots for all parent runs after sweep completes. - - Called from MLflow callback's on_multirun_end after all jobs finish. - - Parameters - ---------- - parent_run_ids : list[str] - List of parent run IDs (one per Re value) - tracking_uri : str - MLflow tracking URI - output_dir : Path - Base output directory for comparison plots - upload_to_mlflow : bool - Whether to upload plots to MLflow - - Returns - ------- - dict[str, Path] - Mapping of parent_run_id to comparison plot path - """ + """Generate comparison plots for all parent runs after sweep completes.""" mlflow.set_tracking_uri(tracking_uri) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -148,25 +92,21 @@ def generate_comparison_plots_for_sweep( for parent_run_id in parent_run_ids: log.info(f"Generating comparison plot for parent run: {parent_run_id[:8]}...") - # Find all children of this parent siblings = find_sibling_runs(parent_run_id, tracking_uri) if len(siblings) < 2: log.warning(f" Only {len(siblings)} child run(s), skipping comparison") continue - # Check all siblings are finished unfinished = [s for s in siblings if s.get("status") != "FINISHED"] if unfinished: log.warning(f" {len(unfinished)} run(s) still not finished, skipping") continue - # Get parent run info for naming client = mlflow.tracking.MlflowClient() parent_run = client.get_run(parent_run_id) parent_name = parent_run.info.run_name or parent_run_id[:8] - # Create comparison plot comparison_dir = output_dir / parent_name comparison_dir.mkdir(exist_ok=True) @@ -177,81 +117,8 @@ def generate_comparison_plots_for_sweep( log.info(f" Created comparison plot: {comparison_path.name}") if upload_to_mlflow: - upload_plots_to_mlflow( - parent_run_id, [comparison_path], tracking_uri, "plots" - ) + upload_plots_to_mlflow(parent_run_id, [comparison_path], tracking_uri) log.info(" Uploaded to parent run") log.info(f"Generated {len(results)} comparison plot(s)") return results - - -@hydra.main(config_path="../../conf", config_name="config", version_base=None) -def main(cfg: DictConfig) -> None: - """Hydra entry point - finds matching run and generates plots.""" - - tracking_uri = cfg.mlflow.get("tracking_uri", "./mlruns") - solver_name = cfg.solver.name - N = cfg.N - Re = cfg.Re - - # Find matching MLflow run - run_id, parent_run_id = find_matching_run(cfg, tracking_uri) - - # Setup output directory - output_dir = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - # Download artifacts for this run - artifact_dir = download_mlflow_artifacts(run_id, tracking_uri) - fields = load_fields_from_zarr(artifact_dir) - fields_df = fields_to_dataframe(fields) - timeseries_df = load_timeseries_from_mlflow(run_id, tracking_uri) - - log.info(f"Generating plots for {solver_name} N={N} Re={Re}") - - # ========================================================================== - # Individual run plots - # ========================================================================== - plot_paths = [] - - plot_paths.append(plot_fields(fields_df, Re, solver_name, N, output_dir)) - plot_paths.append(plot_streamlines(fields_df, Re, solver_name, N, output_dir)) - plot_paths.append(plot_vorticity(fields_df, Re, solver_name, N, output_dir)) - plot_paths.append(plot_centerlines(fields_df, Re, solver_name, N, output_dir)) - plot_paths.append(plot_convergence(timeseries_df, Re, solver_name, N, output_dir)) - - plot_paths = [p for p in plot_paths if p is not None] - log.info(f"Generated {len(plot_paths)} plots for run") - - # Upload to individual run - if cfg.get("upload_to_mlflow", True): - upload_plots_to_mlflow(run_id, plot_paths, tracking_uri) - - # ========================================================================== - # Comparison plot for parent (if this is part of a sweep) - # ========================================================================== - if parent_run_id: - log.info("This run is part of a sweep - generating comparison plot for parent") - - siblings = find_sibling_runs(parent_run_id, tracking_uri) - - if len(siblings) > 1: - comparison_dir = output_dir / "comparison" - comparison_dir.mkdir(exist_ok=True) - - comparison_path = plot_ghia_comparison( - siblings, tracking_uri, comparison_dir - ) - - if comparison_path and cfg.get("upload_to_mlflow", True): - upload_plots_to_mlflow( - parent_run_id, [comparison_path], tracking_uri, "plots" - ) - log.info("Comparison plot uploaded to parent run") - - log.info("Done!") - - -if __name__ == "__main__": - main() diff --git a/src/shared/plotting/ldc/validation.py b/src/shared/plotting/ldc/validation.py index 2c07061..3621be4 100644 --- a/src/shared/plotting/ldc/validation.py +++ b/src/shared/plotting/ldc/validation.py @@ -1,7 +1,7 @@ """ Validation and Comparison Plots for LDC. -Generates centerline velocity profiles and Ghia benchmark comparisons. +Generates Ghia benchmark comparisons. """ import logging @@ -11,9 +11,8 @@ import numpy as np import pandas as pd import seaborn as sns -from scipy.interpolate import RectBivariateSpline -from spectral import spectral_interpolate +from solvers.spectral.basis import spectral_interpolate from utilities.config.paths import get_repo_root from .data_loading import load_fields_from_zarr, restructure_fields @@ -22,61 +21,6 @@ log = logging.getLogger(__name__) -def plot_centerlines( - fields_df: pd.DataFrame, Re: float, solver: str, N: int, output_dir: Path -) -> Path: - """Plot velocity profiles along centerlines.""" - x_unique = np.sort(fields_df["x"].unique()) - y_unique = np.sort(fields_df["y"].unique()) - nx, ny = len(x_unique), len(y_unique) - - sorted_df = fields_df.sort_values(["y", "x"]) - U = sorted_df["u"].values.reshape(ny, nx) - V = sorted_df["v"].values.reshape(ny, nx) - - U_spline = RectBivariateSpline(y_unique, x_unique, U) - V_spline = RectBivariateSpline(y_unique, x_unique, V) - - n_points = 200 - y_line = np.linspace(y_unique[0], y_unique[-1], n_points) - x_line = np.linspace(x_unique[0], x_unique[-1], n_points) - - x_center = (x_unique[0] + x_unique[-1]) / 2 - y_center = (y_unique[0] + y_unique[-1]) / 2 - - u_vertical = U_spline(y_line, x_center).ravel() - v_horizontal = V_spline(y_center, x_line).ravel() - - fig, axes = plt.subplots(1, 2, figsize=(12, 5)) - - axes[0].plot(u_vertical, y_line, linewidth=2) - axes[0].set_xlabel(r"$u$", fontsize=11) - axes[0].set_ylabel(r"$y$", fontsize=11) - axes[0].set_title(r"\textbf{$u$-velocity along vertical centerline}", fontsize=12) - axes[0].axvline(x=0, color="gray", linestyle="--", alpha=0.5, linewidth=1) - - axes[1].plot(x_line, v_horizontal, linewidth=2) - axes[1].set_xlabel(r"$x$", fontsize=11) - axes[1].set_ylabel(r"$v$", fontsize=11) - axes[1].set_title(r"\textbf{$v$-velocity along horizontal centerline}", fontsize=12) - axes[1].axhline(y=0, color="gray", linestyle="--", alpha=0.5, linewidth=1) - - solver_label = solver.upper().replace("_", r"\_") - fig.suptitle( - rf"\textbf{{Centerline Profiles}} --- {solver_label}, $N={N}$, $\mathrm{{Re}}={Re:.0f}$", - fontsize=13, - y=0.98, - ) - - plt.tight_layout() - - output_path = output_dir / "centerlines.pdf" - fig.savefig(output_path, dpi=300, bbox_inches="tight") - plt.close(fig) - - return output_path - - def _build_method_label(sibling: dict) -> str: """Build a unified method label from solver name. diff --git a/src/shared/plotting/plotting.py b/src/shared/plotting/plotting.py deleted file mode 100644 index 0f27262..0000000 --- a/src/shared/plotting/plotting.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Plotting utilities for spectral method visualizations.""" - -from __future__ import annotations - -from pathlib import Path - - -def get_repo_root() -> Path: - """Get repository root directory, handling both local and sphinx-gallery execution. - - Returns the repository root by detecting the presence of pyproject.toml. - Works in both local execution (via main.py) and sphinx-gallery contexts. - - Returns - ------- - Path - Absolute path to the repository root - - """ - try: - # Try to get caller's __file__ if available (local execution) - import inspect - - frame = inspect.currentframe().f_back - caller_file = frame.f_globals.get("__file__") - if caller_file: - current = Path(caller_file).resolve().parent - else: - # __file__ not available (sphinx-gallery) - current = Path.cwd() - except (AttributeError, KeyError): - # Fallback to cwd - current = Path.cwd() - - # Walk up until we find pyproject.toml (marks repo root) - for parent in [current] + list(current.parents): - if (parent / "pyproject.toml").exists(): - return parent - - # Fallback: assume 2 levels up from script directory - # Works for Exercises/exercise_X/script.py structure - return current.parent.parent if caller_file else current.parent.parent diff --git a/src/solvers/base.py b/src/solvers/base.py index a0d74f6..ee2d832 100644 --- a/src/solvers/base.py +++ b/src/solvers/base.py @@ -4,8 +4,10 @@ import logging import os import time +from pathlib import Path import numpy as np +import pyvista as pv import mlflow from dataclasses import asdict @@ -341,25 +343,81 @@ def _compute_enstrophy(self) -> float: return 0.5 * float(np.sum(omega * omega) * dA) def _compute_palinstrophy(self) -> float: - """Compute palinstrophy: P = ∫ (∂ω/∂x)² + (∂ω/∂y)² dA.""" + """Compute palinstrophy: P = 0.5 * ∫ ||∇ω||² dA.""" omega = self._compute_vorticity() domega_dx, domega_dy = self._compute_gradient(omega) dA = self._get_cell_area() - return float(np.sum(domega_dx**2 + domega_dy**2) * dA) + return 0.5 * float(np.sum(domega_dx**2 + domega_dy**2) * dA) + + def _compute_gradient( + self, field: np.ndarray, bc_walls: float = 0.0, bc_lid: float = None + ) -> tuple: + """Compute gradient of scalar field using finite differences. + + Uses proper ghost cell values for Dirichlet BCs: + ghost = 2 * wall_value - interior_value + + Parameters + ---------- + field : np.ndarray + Scalar field values at cell centers + bc_walls : float + Dirichlet BC value at walls (bottom, left, right). Default 0. + bc_lid : float or None + Dirichlet BC value at top lid. If None, uses bc_walls. + """ + if bc_lid is None: + bc_lid = bc_walls - def _compute_gradient(self, field: np.ndarray) -> tuple: - """Compute gradient of scalar field using finite differences.""" dx, dy = self.dx_min, self.dy_min shape = getattr(self, "shape_full", (self.params.nx, self.params.ny)) - field_2d = np.pad(field.reshape(shape), 1, mode="edge") - df_dx = (field_2d[1:-1, 2:] - field_2d[1:-1, :-2]) / (2 * dx) - df_dy = (field_2d[2:, 1:-1] - field_2d[:-2, 1:-1]) / (2 * dy) + field_2d = field.reshape(shape) # shape = (ny, nx) + ny, nx = shape + + # Create padded array with proper ghost cell values for Dirichlet BCs + # Ghost value = 2 * BC_value - interior_value + field_padded = np.zeros((ny + 2, nx + 2), dtype=field.dtype) + field_padded[1:-1, 1:-1] = field_2d + + # Bottom boundary (j=0 in original, row 0 in padded is ghost) + field_padded[0, 1:-1] = 2 * bc_walls - field_2d[0, :] + + # Top boundary (j=ny-1 in original, row ny+1 in padded is ghost) + field_padded[-1, 1:-1] = 2 * bc_lid - field_2d[-1, :] + + # Left boundary (i=0 in original, col 0 in padded is ghost) + field_padded[1:-1, 0] = 2 * bc_walls - field_2d[:, 0] + + # Right boundary (i=nx-1 in original, col nx+1 in padded is ghost) + field_padded[1:-1, -1] = 2 * bc_walls - field_2d[:, -1] + + # Corners (average of adjacent ghost values) + field_padded[0, 0] = 0.5 * (field_padded[0, 1] + field_padded[1, 0]) + field_padded[0, -1] = 0.5 * (field_padded[0, -2] + field_padded[1, -1]) + field_padded[-1, 0] = 0.5 * (field_padded[-1, 1] + field_padded[-2, 0]) + field_padded[-1, -1] = 0.5 * (field_padded[-1, -2] + field_padded[-2, -1]) + + # Central differences + df_dx = (field_padded[1:-1, 2:] - field_padded[1:-1, :-2]) / (2 * dx) + df_dy = (field_padded[2:, 1:-1] - field_padded[:-2, 1:-1]) / (2 * dy) return df_dx.ravel(), df_dy.ravel() def _compute_vorticity(self) -> np.ndarray: - """Compute vorticity ω = ∂v/∂x - ∂u/∂y using finite differences.""" - dv_dx, _ = self._compute_gradient(self.arrays.v) - _, du_dy = self._compute_gradient(self.arrays.u) + """Compute vorticity ω = ∂v/∂x - ∂u/∂y using finite differences. + + Uses proper boundary conditions for lid-driven cavity: + - u: bc_walls=0, bc_lid=lid_velocity + - v: bc_walls=0, bc_lid=0 + """ + # Get lid velocity from params (default 1.0 for lid-driven cavity) + lid_velocity = getattr(self.params, "lid_velocity", 1.0) + + # v has zero BC on all walls including lid + dv_dx, _ = self._compute_gradient(self.arrays.v, bc_walls=0.0, bc_lid=0.0) + + # u has zero BC on walls, lid_velocity on top + _, du_dy = self._compute_gradient(self.arrays.u, bc_walls=0.0, bc_lid=lid_velocity) + return dv_dx - du_dy def _get_cell_area(self) -> float: @@ -372,6 +430,127 @@ def _get_cell_area(self) -> float: n = len(self.arrays.u) return 1.0 / n + # ========================================================================= + # VTK Export - to_vtk() creates StructuredGrid with all fields + # ========================================================================= + + def to_vtk(self) -> pv.StructuredGrid: + """Export solution to VTK StructuredGrid with all fields and metadata. + + Creates a structured grid with: + - Primary fields: u, v, p + - Derived fields: velocity_magnitude, vorticity, velocity (vector) + - Metadata: Re, N, solver name + + Subclasses may override to use native differentiation for derived fields. + + Returns + ------- + pv.StructuredGrid + Solution on structured grid, ready for VTS export + """ + # Get unique sorted coordinates + x_unique = np.sort(np.unique(self.fields.x)) + y_unique = np.sort(np.unique(self.fields.y)) + nx, ny = len(x_unique), len(y_unique) + + # Reshape fields to 2D grid: U_2d[j, i] = u at (x_unique[i], y_unique[j]) + indices = np.lexsort((self.fields.x, self.fields.y)) + u_sorted = self.fields.u[indices] + v_sorted = self.fields.v[indices] + p_sorted = self.fields.p[indices] + + U_2d = u_sorted.reshape(ny, nx) + V_2d = v_sorted.reshape(ny, nx) + P_2d = p_sorted.reshape(ny, nx) + + # Create 3D grid (z=0 plane) + X, Y = np.meshgrid(x_unique, y_unique) + Z = np.zeros_like(X) + grid = pv.StructuredGrid(X, Y, Z) + + # Add primary fields - use Fortran order to match VTK's column-major point ordering + grid["u"] = U_2d.ravel("F") + grid["v"] = V_2d.ravel("F") + grid["pressure"] = P_2d.ravel("F") + + # Add derived fields + grid["velocity_magnitude"] = np.sqrt(U_2d**2 + V_2d**2).ravel("F") + + # Compute vorticity using native differentiation + vorticity = self._compute_vorticity_for_export(U_2d, V_2d, x_unique, y_unique) + grid["vorticity"] = vorticity.ravel("F") + + # Add velocity vector field + vectors = np.zeros((nx * ny, 3)) + vectors[:, 0] = U_2d.ravel("F") + vectors[:, 1] = V_2d.ravel("F") + grid["velocity"] = vectors + + # Add metadata + grid.field_data["Re"] = np.array([self.params.Re]) + grid.field_data["N"] = np.array([self.params.nx]) + grid.field_data["solver"] = np.array([self.params.name]) + + return grid + + def _compute_vorticity_for_export( + self, U_2d: np.ndarray, V_2d: np.ndarray, x: np.ndarray, y: np.ndarray + ) -> np.ndarray: + """Compute vorticity for VTK export. Override for native differentiation. + + Default uses scipy RectBivariateSpline for smooth derivatives. + + Parameters + ---------- + U_2d, V_2d : np.ndarray + 2D velocity arrays (ny, nx) + x, y : np.ndarray + 1D coordinate arrays + + Returns + ------- + np.ndarray + Vorticity field (ny, nx) + """ + from scipy.interpolate import RectBivariateSpline + + U_spline = RectBivariateSpline(y, x, U_2d) + V_spline = RectBivariateSpline(y, x, V_2d) + dvdx = V_spline(y, x, dx=1) + dudy = U_spline(y, x, dy=1) + return dvdx - dudy + + def compute_global_quantities(self) -> dict: + """Compute global quantities E, Z, P for the current solution. + + Returns + ------- + dict + {'E': kinetic_energy, 'Z': enstrophy, 'P': palinstrophy} + """ + return { + "E": self._compute_energy(), + "Z": self._compute_enstrophy(), + "P": self._compute_palinstrophy(), + } + + def save_vtk(self, filepath: Path): + """Save solution to VTS file. + + Parameters + ---------- + filepath : Path + Output file path (should have .vts extension) + """ + filepath = Path(filepath) + filepath.parent.mkdir(parents=True, exist_ok=True) + + grid = self.to_vtk() + grid.save(str(filepath)) + + log.info(f"Saved VTS to {filepath}") + # ======================================================================== # MLflow Integration # ======================================================================== diff --git a/src/solvers/fv/linear_solvers/scipy_solver.py b/src/solvers/fv/linear_solvers/scipy_solver.py index 363492f..26c5e53 100644 --- a/src/solvers/fv/linear_solvers/scipy_solver.py +++ b/src/solvers/fv/linear_solvers/scipy_solver.py @@ -1,6 +1,7 @@ -"""Scipy-based linear solver using BiCGSTAB.""" +"""Scipy-based linear solver using BiCGSTAB with PyAMG preconditioning.""" import numpy as np +import pyamg from scipy.sparse import csr_matrix from scipy.sparse.linalg import bicgstab @@ -8,12 +9,11 @@ def scipy_solver( A_csr: csr_matrix, b_np: np.ndarray, - M=None, # Unused, kept for API compatibility + M=None, tolerance=1e-6, max_iterations=1000, - remove_nullspace=False, ): - """Solve A x = b using scipy BiCGSTAB. + """Solve A x = b using scipy BiCGSTAB with PyAMG preconditioning. Parameters ---------- @@ -21,29 +21,27 @@ def scipy_solver( Sparse matrix in CSR format. b_np : np.ndarray Right-hand side vector. - M : unused - Kept for API compatibility, ignored. + M : LinearOperator, optional + Preconditioner. If None, builds AMG preconditioner automatically. tolerance : float, optional Convergence tolerance (default: 1e-6). max_iterations : int, optional Maximum iterations (default: 1000). - remove_nullspace : bool, optional - If True, removes the mean from RHS and solution (for pressure eq). Returns ------- x_np : np.ndarray Solution vector. - None - Placeholder for API compatibility. + M : LinearOperator + Preconditioner for reuse in subsequent solves. """ - # Handle nullspace if requested (for pressure Poisson equation) - b = b_np.copy() - if remove_nullspace: - b = b - np.mean(b) + # Build AMG preconditioner if not provided + if M is None: + ml = pyamg.smoothed_aggregation_solver(A_csr, max_coarse=10) + M = ml.aspreconditioner() - # Solve using BiCGSTAB - x, info = bicgstab(A_csr, b, rtol=tolerance, atol=0, maxiter=max_iterations) + # Solve using BiCGSTAB with AMG preconditioner + x, info = bicgstab(A_csr, b_np, M=M, rtol=tolerance, atol=0, maxiter=max_iterations) if info != 0: if info > 0: @@ -52,8 +50,4 @@ def scipy_solver( else: raise RuntimeError(f"BiCGSTAB failed (info={info})") - # Remove nullspace component from solution if requested - if remove_nullspace: - x = x - np.mean(x) - - return x, None + return x, M diff --git a/src/solvers/fv/solver.py b/src/solvers/fv/solver.py index 8a02a64..8a5d792 100644 --- a/src/solvers/fv/solver.py +++ b/src/solvers/fv/solver.py @@ -83,6 +83,27 @@ def __init__(self, **kwargs): self.dx_min = self.params.Lx / self.params.nx self.dy_min = self.params.Ly / self.params.ny + # Pre-assemble pressure correction matrix (constant for structured mesh) + self.A_p = self._build_pressure_correction_matrix() + + def _build_pressure_correction_matrix(self): + """Build pressure correction matrix once (constant for structured mesh). + + The matrix structure and values only depend on mesh geometry and rho, + which are constant. We also apply the pressure pinning here. + """ + row, col, data = assemble_pressure_correction_matrix(self.mesh, self.rho) + A_p = csr_matrix((data, (row, col)), shape=(self.n_cells, self.n_cells)) + + # Pin pressure at cell 0 to make system non-singular + # Use lil_matrix for efficient row/col modification, then convert back + A_p_lil = A_p.tolil() + A_p_lil[0, :] = 0.0 + A_p_lil[:, 0] = 0.0 + A_p_lil[0, 0] = 1.0 + + return A_p_lil.tocsr() + def _solve_momentum_equation( self, component_idx, phi, grad_phi, phi_prev_iter, grad_p_component, M ): @@ -196,18 +217,16 @@ def step(self): mdot_calculation(self.mesh, self.rho, a.U_star_rc, out=a.mdot_star) - # Assemble pressure correction matrix - row, col, data = assemble_pressure_correction_matrix(self.mesh, self.rho) + # Pressure correction RHS (matrix is pre-built in __init__) rhs_p = -compute_divergence_from_face_fluxes(self.mesh, a.mdot_star) + rhs_p[0] = 0.0 # Pin pressure at cell 0 - # Solve pressure correction with scipy (handles nullspace internally) - A_p = csr_matrix((data, (row, col)), shape=(self.n_cells, self.n_cells)) + # Solve pressure correction with PyAMG-preconditioned BiCGSTAB p_prime, a.M_p = scipy_solver( - A_p, + self.A_p, rhs_p, M=a.M_p, tolerance=self.params.linear_solver_tol, - remove_nullspace=True, ) # Velocity and pressure corrections - reuse buffers diff --git a/src/solvers/metrics.py b/src/solvers/metrics.py deleted file mode 100644 index 598e787..0000000 --- a/src/solvers/metrics.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Shared metrics and formatting utilities for solvers.""" - -from __future__ import annotations - -from typing import Any - -import numpy as np -import pandas as pd - - -# ----------------------------------------------------------------------------- -# Norms / errors -# ----------------------------------------------------------------------------- - - -def discrete_l2_norm(values: np.ndarray, h: float) -> float: - """Approximate L2 norm using composite trapezoidal rule.""" - return np.sqrt(h * np.sum(np.abs(values) ** 2)) - - -def discrete_l2_error( - f_exact: np.ndarray, f_num: np.ndarray, interval_length: float -) -> float: - """Compute discrete L2 error between exact and numerical solutions.""" - diff = f_num - f_exact - h = interval_length / f_exact.size - return np.sqrt(h) * np.linalg.norm(diff) - - -def discrete_linf_error(f_exact: np.ndarray, f_num: np.ndarray) -> float: - """Compute discrete L-infinity (maximum) error.""" - return np.max(np.abs(f_num - f_exact)) - - -# ----------------------------------------------------------------------------- -# Formatting helpers -# ----------------------------------------------------------------------------- - - -def format_dt_latex(dt: float | str) -> str: - """Format a timestep value as LaTeX scientific notation.""" - if dt == "?": - return "?" - - dt_str = f"{float(dt):.2e}" - mantissa, exp = dt_str.split("e") - exp_int = int(exp) - return rf"{mantissa} \times 10^{{{exp_int}}}" - - -def extract_metadata( - df: pd.DataFrame, - cols: list[str] | None = None, - row_idx: int = 0, -) -> dict[str, Any]: - """Extract metadata from a DataFrame (assumes constant columns).""" - if cols is None: - cols = df.columns.tolist() - return {col: df[col].iloc[row_idx] for col in cols if col in df.columns} - - -def format_parameter_range( - values: list | tuple, - name: str, - latex: bool = True, -) -> str: - """Format a parameter range for display.""" - if len(values) == 0: - return f"{name} = ?" - - if len(values) == 1: - val = values[0] - return rf"${name} = {val}$" if latex else f"{name} = {val}" - - min_val, max_val = min(values), max(values) - if isinstance(min_val, int) and isinstance(max_val, int): - range_str = f"[{min_val}, {max_val}]" - else: - range_str = f"[{min_val:.1f}, {max_val:.1f}]" - - return rf"${name} \in {range_str}$" if latex else f"{name} ∈ {range_str}" - - -def build_parameter_string( - params: dict[str, Any], - separator: str = ", ", - latex: bool = True, -) -> str: - """Build a parameter string from a dictionary.""" - parts = [] - for name, value in params.items(): - if isinstance(value, (list, tuple)): - parts.append(format_parameter_range(value, name, latex=latex)) - elif "dt" in name.lower() or "delta t" in name: - value_str = format_dt_latex(value) - parts.append( - rf"${name} = {value_str}$" if latex else f"{name} = {value_str}" - ) - else: - parts.append(rf"${name} = {value}$" if latex else f"{name} = {value}") - return separator.join(parts) diff --git a/src/solvers/spectral/sg.py b/src/solvers/spectral/sg.py index b21369c..2857b5c 100644 --- a/src/solvers/spectral/sg.py +++ b/src/solvers/spectral/sg.py @@ -458,3 +458,102 @@ def _compute_algebraic_residuals(self): "v_residual": np.linalg.norm(self.arrays.R_v), "continuity_residual": np.linalg.norm(self.arrays.R_p), } + + def _compute_vorticity_for_export( + self, U_2d: np.ndarray, V_2d: np.ndarray, x: np.ndarray, y: np.ndarray + ) -> np.ndarray: + """Compute vorticity using spectral differentiation. + + Override base class to use spectral differentiation matrices + for higher accuracy. + + Parameters + ---------- + U_2d, V_2d : np.ndarray + 2D velocity arrays (ny, nx) - note: different from internal (nx+1, ny+1) + x, y : np.ndarray + 1D coordinate arrays + + Returns + ------- + np.ndarray + Vorticity field (ny, nx) + """ + # Use internal spectral differentiation on the full grid arrays + # The fields are already finalized in self.arrays + dv_dx = self.Dx @ self.arrays.v + du_dy = self.Dy @ self.arrays.u + vorticity = dv_dx - du_dy + + # Reshape to match the expected output (ny, nx) from VTK grid ordering + # Internal shape is (Nx+1, Ny+1), but VTK uses (Ny+1, Nx+1) ordering + vort_2d = vorticity.reshape(self.shape_full) # (Nx+1, Ny+1) + return vort_2d.T # Transpose to (Ny+1, Nx+1) for VTK + + def _compute_vorticity(self) -> np.ndarray: + """Compute vorticity using spectral differentiation. + + Override base class finite difference implementation. + """ + dv_dx = self.Dx @ self.arrays.v + du_dy = self.Dy @ self.arrays.u + return dv_dx - du_dy + + def _compute_gradient( + self, field: np.ndarray, bc_walls: float = 0.0, bc_lid: float = None + ) -> tuple: + """Compute gradient using spectral differentiation. + + Override base class finite difference implementation. + BC parameters are ignored since spectral methods handle BCs through + the differentiation matrices and boundary point values. + """ + df_dx = self.Dx @ field + df_dy = self.Dy @ field + return df_dx, df_dy + + def _compute_quadrature_weights(self) -> np.ndarray: + """Compute 2D quadrature weights for integration on Gauss-Lobatto grid. + + Uses trapezoidal rule weights based on non-uniform node spacing. + Returns weights as 1D array matching self.arrays.u ordering. + """ + # Get 1D nodes + x_nodes = self.basis_x.nodes(self.params.nx + 1) + y_nodes = self.basis_y.nodes(self.params.ny + 1) + + # Compute 1D trapezoidal weights + def trapezoidal_weights(nodes): + n = len(nodes) + w = np.zeros(n) + for i in range(1, n - 1): + w[i] = 0.5 * (nodes[i + 1] - nodes[i - 1]) + w[0] = 0.5 * (nodes[1] - nodes[0]) + w[-1] = 0.5 * (nodes[-1] - nodes[-2]) + return w + + wx = trapezoidal_weights(x_nodes) + wy = trapezoidal_weights(y_nodes) + + # 2D weights via outer product, then flatten to match array ordering + # shape_full = (nx+1, ny+1) with indexing='ij', so W[i,j] = wx[i] * wy[j] + W_2d = np.outer(wx, wy) + return W_2d.ravel() + + def _compute_energy(self) -> float: + """Compute kinetic energy using spectral quadrature: E = 0.5 * ∫(u² + v²) dA.""" + W = self._compute_quadrature_weights() + return 0.5 * float(np.sum(W * (self.arrays.u**2 + self.arrays.v**2))) + + def _compute_enstrophy(self) -> float: + """Compute enstrophy using spectral quadrature: Z = 0.5 * ∫ω² dA.""" + omega = self._compute_vorticity() + W = self._compute_quadrature_weights() + return 0.5 * float(np.sum(W * omega**2)) + + def _compute_palinstrophy(self) -> float: + """Compute palinstrophy using spectral quadrature: P = 0.5 * ∫||∇ω||² dA.""" + omega = self._compute_vorticity() + domega_dx, domega_dy = self._compute_gradient(omega) + W = self._compute_quadrature_weights() + return 0.5 * float(np.sum(W * (domega_dx**2 + domega_dy**2))) diff --git a/src/spectral/__init__.py b/src/spectral/__init__.py deleted file mode 100644 index 9a22bef..0000000 --- a/src/spectral/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Compatibility layer for spectral utilities (forwarded to new locations).""" - -from solvers.spectral.basis.spectral import ( - ChebyshevLobattoBasis, - FourierEquispacedBasis, - LegendreLobattoBasis, - chebyshev_diff_matrix, - chebyshev_gauss_lobatto_nodes, - fourier_diff_matrix_complex, - fourier_diff_matrix_cotangent, - fourier_diff_matrix_on_interval, - legendre_diff_matrix, - legendre_mass_matrix, -) -from solvers.spectral.basis.polynomial import spectral_interpolate -from solvers.spectral.operators.transfer_operators import ( - FFTProlongation, - FFTRestriction, - InjectionRestriction, - PolynomialProlongation, - TransferOperators, - create_transfer_operators, -) -from solvers.spectral.operators.corner import ( - CornerTreatment, - SmoothingTreatment, - SubtractionTreatment, - create_corner_treatment, -) - -__all__ = [ - # Spectral bases - "LegendreLobattoBasis", - "ChebyshevLobattoBasis", - "FourierEquispacedBasis", - # Differentiation matrices - "legendre_diff_matrix", - "legendre_mass_matrix", - "chebyshev_diff_matrix", - "chebyshev_gauss_lobatto_nodes", - "fourier_diff_matrix_cotangent", - "fourier_diff_matrix_complex", - "fourier_diff_matrix_on_interval", - # Interpolation - "spectral_interpolate", - # Transfer operators (multigrid) - "TransferOperators", - "create_transfer_operators", - "FFTProlongation", - "FFTRestriction", - "PolynomialProlongation", - "InjectionRestriction", - # Corner singularity treatment - "CornerTreatment", - "SmoothingTreatment", - "SubtractionTreatment", - "create_corner_treatment", -] diff --git a/src/spectral/utils/__init__.py b/src/spectral/utils/__init__.py deleted file mode 100644 index 0f6df8e..0000000 --- a/src/spectral/utils/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Compatibility wrappers for migrated utilities.""" - -from utilities.io import ensure_output_dir, load_simulation_data, save_simulation_data -from solvers.metrics import ( - extract_metadata, - format_dt_latex, - format_parameter_range, - build_parameter_string, - discrete_l2_error, - discrete_l2_norm, - discrete_linf_error, -) -from shared.plotting.plotting import get_repo_root - -__all__ = [ - # I/O - "ensure_output_dir", - "load_simulation_data", - "save_simulation_data", - # Formatting - "extract_metadata", - "format_dt_latex", - "format_parameter_range", - "build_parameter_string", - # Plotting - "get_repo_root", - # Norms - "discrete_l2_error", - "discrete_l2_norm", - "discrete_linf_error", -] diff --git a/src/utilities/__init__.py b/src/utilities/__init__.py index 2c1546d..0dc418b 100644 --- a/src/utilities/__init__.py +++ b/src/utilities/__init__.py @@ -1,10 +1,3 @@ -"""Cross-project utilities (Hydra/MLflow/HPC, config, IO, plotting).""" +"""Cross-project utilities (Hydra/MLflow/HPC, config).""" # Keep __init__ lightweight to avoid circular imports during Hydra callback loading. -from utilities.io import load_simulation_data, save_simulation_data, ensure_output_dir # noqa: F401 - -__all__ = [ - "load_simulation_data", - "save_simulation_data", - "ensure_output_dir", -] diff --git a/src/utilities/config/__init__.py b/src/utilities/config/__init__.py index 5794c7f..9e6ce04 100644 --- a/src/utilities/config/__init__.py +++ b/src/utilities/config/__init__.py @@ -1,9 +1,5 @@ -"""Configuration utilities. - -This package contains configuration utilities. -""" +"""Configuration utilities.""" from .paths import get_repo_root -from .clean import clean_all -__all__ = ["get_repo_root", "clean_all"] +__all__ = ["get_repo_root"] diff --git a/src/utilities/config/clean.py b/src/utilities/config/clean.py deleted file mode 100644 index ca3904a..0000000 --- a/src/utilities/config/clean.py +++ /dev/null @@ -1,269 +0,0 @@ -"""Cleanup utilities for generated files and caches. - -Provides configurable cleanup of build artifacts, caches, -and generated data files. -""" - -import shutil -from pathlib import Path -from typing import List, Tuple, Optional - -from .paths import get_repo_root - - -def _remove_item(path: Path) -> Tuple[bool, Optional[str]]: - """Remove a file or directory. - - Returns - ------- - tuple - (success, error_message) - """ - try: - if path.is_dir(): - shutil.rmtree(path) - else: - path.unlink() - return True, None - except Exception as e: - return False, str(e) - - -def clean_directories( - directories: Optional[List[str]] = None, - repo_root: Optional[Path] = None, -) -> Tuple[int, int]: - """Clean specified directories. - - Parameters - ---------- - directories : list of str, optional - Directories to clean (relative to repo root). - Uses defaults if not specified. - repo_root : Path, optional - Repository root path. - - Returns - ------- - tuple - (cleaned_count, failed_count) - """ - if repo_root is None: - repo_root = get_repo_root() - - if directories is None: - directories = [ - "docs/build", - "docs/source/example_gallery", - "docs/source/generated", - "docs/source/gen_modules", - "plots", - "build", - "dist", - ".pytest_cache", - ".ruff_cache", - ".mypy_cache", - ] - - cleaned, failed = 0, 0 - for d in directories: - path = repo_root / d - if path.exists(): - success, _ = _remove_item(path) - cleaned += success - failed += not success - - return cleaned, failed - - -def clean_files( - files: Optional[List[str]] = None, - repo_root: Optional[Path] = None, -) -> Tuple[int, int]: - """Clean specified files. - - Parameters - ---------- - files : list of str, optional - Files to clean (relative to repo root). - repo_root : Path, optional - Repository root path. - - Returns - ------- - tuple - (cleaned_count, failed_count) - """ - if repo_root is None: - repo_root = get_repo_root() - - if files is None: - files = [ - "docs/source/sg_execution_times.rst", - ] - - cleaned, failed = 0, 0 - for f in files: - path = repo_root / f - if path.exists(): - success, _ = _remove_item(path) - cleaned += success - failed += not success - - return cleaned, failed - - -def clean_patterns( - patterns: Optional[List[str]] = None, - repo_root: Optional[Path] = None, -) -> Tuple[int, int]: - """Clean files/directories matching patterns recursively. - - Parameters - ---------- - patterns : list of str, optional - Glob patterns to match. - repo_root : Path, optional - Repository root path. - - Returns - ------- - tuple - (cleaned_count, failed_count) - """ - if repo_root is None: - repo_root = get_repo_root() - - if patterns is None: - patterns = [ - "__pycache__", - "*.pyc", - ".DS_Store", - "mlruns", - "multirun", - "output", - "outputs", - ] - - cleaned, failed = 0, 0 - for pattern in patterns: - for path in repo_root.rglob(pattern): - success, _ = _remove_item(path) - cleaned += success - failed += not success - - return cleaned, failed - - -def clean_data_directory( - data_dir: str = "data", - preserve: Optional[List[str]] = None, - repo_root: Optional[Path] = None, -) -> Tuple[int, int]: - """Clean data directory contents, preserving specific files. - - Parameters - ---------- - data_dir : str - Data directory relative to repo root. - preserve : list of str, optional - Filenames to preserve. - repo_root : Path, optional - Repository root path. - - Returns - ------- - tuple - (cleaned_count, failed_count) - """ - if repo_root is None: - repo_root = get_repo_root() - - if preserve is None: - preserve = ["README.md", ".gitkeep"] - - data_path = repo_root / data_dir - if not data_path.exists(): - return 0, 0 - - cleaned, failed = 0, 0 - for item in data_path.iterdir(): - if item.name not in preserve: - success, _ = _remove_item(item) - cleaned += success - failed += not success - - return cleaned, failed - - -def clean_experiment_outputs( - experiments_dir: str = "Experiments", - output_dir_name: str = "output", - repo_root: Optional[Path] = None, -) -> Tuple[int, int]: - """Clean output directories in experiment folders. - - Parameters - ---------- - experiments_dir : str - Experiments directory relative to repo root. - output_dir_name : str - Name of output subdirectories to clean. - repo_root : Path, optional - Repository root path. - - Returns - ------- - tuple - (cleaned_count, failed_count) - """ - if repo_root is None: - repo_root = get_repo_root() - - exp_path = repo_root / experiments_dir - if not exp_path.exists(): - return 0, 0 - - cleaned, failed = 0, 0 - for output_dir in exp_path.glob(f"*/{output_dir_name}"): - success, _ = _remove_item(output_dir) - cleaned += success - failed += not success - - return cleaned, failed - - -def clean_all() -> None: - """Clean all generated files and caches.""" - print("\nCleaning all generated files and caches...") - - total_cleaned = 0 - total_failed = 0 - - c, f = clean_directories() - total_cleaned += c - total_failed += f - - c, f = clean_files() - total_cleaned += c - total_failed += f - - c, f = clean_patterns() - total_cleaned += c - total_failed += f - - c, f = clean_data_directory() - total_cleaned += c - total_failed += f - - c, f = clean_experiment_outputs() - total_cleaned += c - total_failed += f - - if total_cleaned: - print(f" ✓ Cleaned {total_cleaned} items") - if total_failed: - print(f" ✗ Failed to clean {total_failed} items") - if not total_cleaned and not total_failed: - print(" Nothing to clean") - print() diff --git a/src/utilities/hpc/__init__.py b/src/utilities/hpc/__init__.py deleted file mode 100644 index 61736b6..0000000 --- a/src/utilities/hpc/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""HPC job management utilities.""" - -# This package previously contained custom job generation tools. -# They have been replaced by Hydra for configuration and parameter sweeping. diff --git a/src/utilities/hpc/sweeper.py b/src/utilities/hpc/sweeper.py deleted file mode 100644 index 71d8048..0000000 --- a/src/utilities/hpc/sweeper.py +++ /dev/null @@ -1,202 +0,0 @@ -""" -HPC Sweeper: Generates LSF Job Arrays from YAML configuration. - -Key Features: -1. Smart Grouping: Automatically groups jobs into Arrays based on identical resource requirements. -2. Runtime Lookup: Uses 'lookup.py' to resolve arguments at runtime, keeping the YAML as the source of truth. -3. Pack Generation: Creates a master submission script that submits the Universal Runner Template. -""" - -import itertools -from pathlib import Path -from typing import Any, Dict, List, Tuple - -from .jobgen import load_config - -# Use the universal template path relative to project root -RUNNER_TEMPLATE = Path("src/utilities/hpc/runner_template.sh") - - -def get_combinations(group_config: Dict[str, Any]) -> List[Dict[str, Any]]: - """Generate all flattened parameter combinations for a group.""" - static_args = group_config.get("static_args", {}) - sweep = group_config.get("sweep", {}) - sweep_paired = group_config.get("sweep_paired", {}) - - paired_combinations = [{}] - if sweep_paired: - keys = list(sweep_paired.keys()) - values = list(sweep_paired.values()) - paired_combinations = [dict(zip(keys, v)) for v in zip(*values)] - - regular_combinations = [{}] - if sweep: - keys = list(sweep.keys()) - values = list(sweep.values()) - regular_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)] - - all_combos = [] - for paired in paired_combinations: - for regular in regular_combinations: - all_combos.append({**static_args, **paired, **regular}) - - return all_combos - - -def extract_resources( - combo: Dict[str, Any], resource_template: Dict[str, Any] -) -> Tuple[frozenset, Dict[str, Any]]: - """ - Determine the resource signature for a specific combination. - """ - resources = resource_template.copy() - # Update resources from combo if keys match - for key, value in combo.items(): - if key in resources: - resources[key] = value - - signature = frozenset(resources.items()) - return signature, resources - - -def generate_arrays(config_path: Path, output_dir: Path = None): - """Parse config and generate array scripts.""" - config = load_config(config_path) - - # Determine Output Directories from Config - base_dir = config_path.parent - - job_script_dir_str = config.get("job_script_output_dir") - pack_dir_str = config.get("packs_output_dir") - - if job_script_dir_str: - job_script_dir = Path(job_script_dir_str) - # Ensure path is handled relative to CWD if not absolute - else: - job_script_dir = base_dir / "generated_jobs" - - if pack_dir_str: - pack_dir = Path(pack_dir_str) - else: - pack_dir = base_dir / "generated_packs" - - if not job_script_dir.exists(): - job_script_dir.mkdir(parents=True, exist_ok=True) - if not pack_dir.exists(): - pack_dir.mkdir(parents=True, exist_ok=True) - - print(f"Configuration: {config_path}") - print(f"Index Maps Dir: {job_script_dir}") - print(f"Submit Script Dir: {pack_dir}") - - submission_lines = [] - submission_lines.append("#!/bin/sh") - submission_lines.append(f"# Generated from {config_path}") - submission_lines.append("# Submits Universal Runner Template") - submission_lines.append("") - - for group_name, group_config in config.items(): - if not isinstance(group_config, dict): - continue - - if ( - "sweep" not in group_config - and "static_args" not in group_config - and "sweep_paired" not in group_config - ): - continue - - print(f"Processing group: {group_name}") - - combos = get_combinations(group_config) - if not combos: - continue - - res_template = group_config.get("resources", {}) - - # Group by Resources - groups = {} - - for i, combo in enumerate(combos): - idx = i + 1 # 1-based LSF index - sig, res = extract_resources(combo, res_template) - - if sig not in groups: - groups[sig] = {"resources": res, "indices": []} - groups[sig]["indices"].append(idx) - - base_cmd = group_config.get("command_prefix", "python main.py") - - for i, (sig, data) in enumerate(groups.items()): - indices = data["indices"] - resources = data["resources"] - - suffix = f"_sub{i}" if len(groups) > 1 else "" - job_name = f"{group_name}{suffix}" - num_jobs = len(indices) - - # 1. Generate Mapping File (.idx) - map_file = job_script_dir / f"{job_name}.idx" - with open(map_file, "w") as f: - for global_idx in indices: - f.write(f"{global_idx}\n") - - # 2. Generate Submission Command - # Construct BSUB CLI args - # Resources - bsub_args = [] - bsub_args.append(f'-J "{job_name}[1-{num_jobs}]"') - bsub_args.append(f"-q {resources.get('queue', 'hpcintro')}") - bsub_args.append(f"-W {resources.get('walltime', '00:10')}") - bsub_args.append(f"-n {resources.get('n_cores', 1)}") - bsub_args.append(f'-R "rusage[mem={resources.get("mem", "4GB")}]"') - bsub_args.append('-R "span[ptile=24]"') # Default, could be configurable - - # Output logs - bsub_args.append("-o logs/lsf/%J_%I.out") - bsub_args.append("-e logs/lsf/%J_%I.err") - - # Environment Variables - # Use -env "VAR=VAL,VAR2=VAL2" - env_vars = [] - env_vars.append(f"SWEEP_CONFIG={config_path}") - env_vars.append(f"SWEEP_GROUP={group_name}") - env_vars.append(f"SWEEP_MAP_FILE={map_file}") - env_vars.append(f"SWEEP_CMD='{base_cmd}'") - - # Quote the env vars string properly - env_string = ",".join(env_vars) - bsub_args.append(f'-env "{env_string}"') - - # Command to submit the template - # Note: We assume RUNNER_TEMPLATE is executable or we run it via sh? - # LSF runs the script. - # bsub < script is standard. - - cmd = f"bsub {' '.join(bsub_args)} < {RUNNER_TEMPLATE}" - submission_lines.append(cmd) - - print(f" ✓ Queued Array {job_name} (Size: {num_jobs})") - - # Write Master Submit Script - if len(submission_lines) > 4: # Header is 4 lines - submit_file_name = config_path.stem + "_submit.sh" - submit_file_path = pack_dir / submit_file_name - - with open(submit_file_path, "w") as f: - for line in submission_lines: - f.write(line + "\n") - - print(f"\nSubmission Script generated: {submit_file_path}") - print(f"Run: sh {submit_file_path}") - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--config", type=Path, required=True) - parser.add_argument("--out", type=Path, default=None) - args = parser.parse_args() - - generate_arrays(args.config, args.out) diff --git a/src/utilities/hydra/__init__.py b/src/utilities/hydra/__init__.py deleted file mode 100644 index f2a54b6..0000000 --- a/src/utilities/hydra/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Hydra utilities and callbacks.""" - -from utilities.hydra.callbacks import MLflowLogCallback - -__all__ = ["MLflowLogCallback"] diff --git a/src/utilities/hydra/callbacks.py b/src/utilities/hydra/callbacks.py deleted file mode 100644 index 5e711c2..0000000 --- a/src/utilities/hydra/callbacks.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Hydra callbacks for MLflow integration. - -This module provides callbacks to integrate Hydra's job logging with MLflow. -The MLflowLogCallback uploads the Hydra job log file as an MLflow artifact -after each job completes, allowing you to view job output (including MPI -report-bindings) in the MLflow UI. -""" - -import logging -from pathlib import Path -from typing import Any - -from hydra.core.utils import JobReturn -from hydra.experimental.callback import Callback -from omegaconf import DictConfig - -log = logging.getLogger(__name__) - - -class MLflowLogCallback(Callback): - """Callback to log Hydra job output to MLflow as an artifact. - - This callback runs after each Hydra job completes and uploads the - job log file to the active MLflow run as an artifact. - - Configuration (in hydra/callbacks/mlflow_log.yaml): - - .. code-block:: yaml - - # @package _global_ - hydra: - callbacks: - mlflow_log: - _target_: utilities.hydra.callbacks.MLflowLogCallback - artifact_path: logs - - The log file will be uploaded to the "logs" artifact path in MLflow. - """ - - def __init__(self, artifact_path: str = "logs") -> None: - """Initialize the callback. - - Parameters - ---------- - artifact_path : str - MLflow artifact subdirectory for log files (default: "logs") - """ - self.artifact_path = artifact_path - - def on_job_end( - self, config: DictConfig, job_return: JobReturn, **kwargs: Any - ) -> None: - """Upload job log to MLflow after job completes. - - Parameters - ---------- - config : DictConfig - The job config - job_return : JobReturn - Return value from the job (contains status, return value, etc.) - kwargs : Any - Additional keyword arguments - """ - try: - import mlflow - from hydra.core.hydra_config import HydraConfig - - # Check if MLflow run is active - if not mlflow.active_run(): - log.debug("No active MLflow run, skipping log upload") - return - - # Get the job log file path from Hydra - hc = HydraConfig.get() - output_dir = Path(hc.runtime.output_dir) - job_name = hc.job.name - log_file = output_dir / f"{job_name}.log" - - if log_file.exists(): - mlflow.log_artifact(str(log_file), artifact_path=self.artifact_path) - log.info(f"Uploaded job log to MLflow: {log_file.name}") - else: - log.debug(f"Job log not found: {log_file}") - - except Exception as e: - # Don't fail the job if logging fails - log.warning(f"Failed to upload job log to MLflow: {e}") diff --git a/src/utilities/io.py b/src/utilities/io.py deleted file mode 100644 index ada81e7..0000000 --- a/src/utilities/io.py +++ /dev/null @@ -1,111 +0,0 @@ -"""I/O utilities for loading and saving simulation data.""" - -from __future__ import annotations - -from pathlib import Path -from typing import Literal - -import pandas as pd - - -def load_simulation_data( - data_dir: Path | str, - filename_base: str, - prefer: Literal["parquet", "pickle"] = "parquet", -) -> pd.DataFrame: - """Load simulation data with automatic fallback between parquet and pickle. - - Parameters - ---------- - data_dir : Path or str - Directory containing the data files - filename_base : str - Base filename without extension (e.g., 'kdv_two_soliton') - prefer : {'parquet', 'pickle'} - Preferred format to try first - - Returns - ------- - pd.DataFrame - Loaded dataframe - - Raises - ------ - FileNotFoundError - If neither parquet nor pickle file exists - - """ - data_dir = Path(data_dir) - parquet_path = data_dir / f"{filename_base}.parquet" - pickle_path = data_dir / f"{filename_base}.pkl" - - if prefer == "parquet": - primary, secondary = parquet_path, pickle_path - primary_loader, secondary_loader = pd.read_parquet, pd.read_pickle - primary_fmt, secondary_fmt = "parquet", "pickle" - else: - primary, secondary = pickle_path, parquet_path - primary_loader, secondary_loader = pd.read_pickle, pd.read_parquet - primary_fmt, secondary_fmt = "pickle", "parquet" - - if primary.exists(): - print(f"Loading {primary_fmt} data: {primary}") - return primary_loader(primary) - elif secondary.exists(): - print( - f"{primary_fmt.capitalize()} not found; loading {secondary_fmt} data: {secondary}" - ) - return secondary_loader(secondary) - else: - raise FileNotFoundError( - f"No dataset found at {data_dir / filename_base}.{{parquet,pkl}}. " - f"Run the corresponding compute script first." - ) - - -def save_simulation_data( - df: pd.DataFrame, - output_path: Path | str, - format: Literal["parquet", "pickle"] = "parquet", -) -> None: - """Save simulation data to disk. - - Parameters - ---------- - df : pd.DataFrame - DataFrame to save - output_path : Path or str - Output file path (should include extension) - format : {'parquet', 'pickle'} - Output format - - """ - output_path = Path(output_path) - - if format == "parquet": - df.to_parquet(output_path, index=False) - elif format == "pickle": - df.to_pickle(output_path) - else: - raise ValueError(f"Unsupported format: {format}") - - print(f"Saved {format} data → {output_path} ({df.shape})") - - -def ensure_output_dir(path: Path | str) -> Path: - """Ensure output directory exists, creating it if necessary. - - Parameters - ---------- - path : Path or str - Directory path to create - - Returns - ------- - Path - The created/existing directory path - - """ - path = Path(path) - path.mkdir(parents=True, exist_ok=True) - return path diff --git a/src/utilities/mlflow/__init__.py b/src/utilities/mlflow/__init__.py index 650af38..784cd99 100644 --- a/src/utilities/mlflow/__init__.py +++ b/src/utilities/mlflow/__init__.py @@ -1,33 +1,5 @@ -"""MLflow utilities for experiment tracking and artifact management. +"""MLflow utilities for experiment tracking.""" -Provides: -- Context manager for MLflow run orchestration -- Granular logging functions for parameters, metrics, time-series, and artifacts -- Run fetching and filtering -- Artifact downloading with naming conventions -- Log uploading for HPC jobs -""" +from .io import setup_mlflow_tracking -from .io import ( - setup_mlflow_tracking, - start_mlflow_run_context, - log_parameters, - log_metrics_dict, - log_timeseries_metrics, - log_artifact_file, - load_runs, - download_artifacts, -) -from .logs import upload_logs - -__all__ = [ - "setup_mlflow_tracking", - "start_mlflow_run_context", - "log_parameters", - "log_metrics_dict", - "log_timeseries_metrics", - "log_artifact_file", - "load_runs", - "download_artifacts", - "upload_logs", -] +__all__ = ["setup_mlflow_tracking"] diff --git a/src/utilities/mlflow/callback.py b/src/utilities/mlflow/callback.py index 3118043..60ef0d2 100644 --- a/src/utilities/mlflow/callback.py +++ b/src/utilities/mlflow/callback.py @@ -34,6 +34,34 @@ def __init__(self) -> None: None # Store sweep dir while HydraConfig available ) + def _find_recent_parent_runs(self) -> list[str]: + """Find parent runs from this sweep based on sweep_name pattern.""" + import mlflow + + if not self._full_experiment_name or not self._base_sweep_name: + return [] + + try: + # Search for parent runs matching the sweep_name pattern + # If sweep_name contains ${Re}, search for all Re variants + sweep_pattern = self._base_sweep_name.replace("${Re}", "%").replace("{Re}", "%") + + runs = mlflow.search_runs( + experiment_names=[self._full_experiment_name], + filter_string=f"tags.sweep = 'parent' AND tags.`mlflow.runName` LIKE '{sweep_pattern}'", + order_by=["start_time DESC"], + max_results=10, + ) + + if runs.empty: + return [] + + return runs["run_id"].tolist() + + except Exception as e: + log.warning(f"Error finding parent runs: {e}") + return [] + def _find_existing_parent( self, experiment_name: str, sweep_name: str ) -> Optional[str]: @@ -189,45 +217,40 @@ def on_job_start(self, config: DictConfig, **kwargs) -> None: os.environ["MLFLOW_PARENT_RUN_ID"] = parent_id def on_multirun_end(self, config: DictConfig, **kwargs) -> None: - """Clean up after sweep completes and generate plots.""" + """Clean up after sweep completes and generate comparison plots.""" if os.environ.get("MLFLOW_SWEEP_ACTIVE") != "1": return - # Clean up env var os.environ.pop("MLFLOW_PARENT_RUN_ID", None) os.environ.pop("MLFLOW_SWEEP_ACTIVE", None) log.info("Multirun sweep completed") - # Generate plots using plot_runs.py + # Generate comparison plots for all parent runs try: - import sys from pathlib import Path + from shared.plotting.ldc import generate_comparison_plots_for_sweep - # Add repo root to path for imports - repo_root = Path(__file__).parent.parent.parent.parent - if str(repo_root) not in sys.path: - sys.path.insert(0, str(repo_root)) - - # Use stored sweep directory for output - if self._sweep_dir: - output_dir = Path(self._sweep_dir) / "plots" - else: - output_dir = Path("outputs") / "plots" + output_dir = Path(self._sweep_dir) / "plots" if self._sweep_dir else Path("outputs") / "plots" output_dir.mkdir(parents=True, exist_ok=True) - from plot_runs import plot_experiment - - log.info(f"Generating plots for experiment: {self._full_experiment_name}") - - # Plot all parent runs and their children - plot_experiment( - experiment_name=self._full_experiment_name, - tracking_uri=self._tracking_uri, - output_dir=output_dir, - parent_run_ids=None, # Find all parent runs automatically - upload_to_mlflow=True, - ) - + # Find parent runs - _parent_runs may be empty due to joblib multiprocessing + # So we search MLflow directly for parent runs in this experiment + parent_run_ids = list(self._parent_runs.values()) + if not parent_run_ids: + parent_run_ids = self._find_recent_parent_runs() + + log.info(f"Parent runs for comparison plots: {parent_run_ids}") + if parent_run_ids: + generate_comparison_plots_for_sweep( + parent_run_ids=parent_run_ids, + tracking_uri=self._tracking_uri, + output_dir=output_dir, + upload_to_mlflow=True, + ) + else: + log.warning("No parent runs found for comparison plots") except Exception as e: - log.warning(f"Failed to generate plots: {e}") + log.warning(f"Failed to generate comparison plots: {e}") + import traceback + log.warning(traceback.format_exc()) diff --git a/src/utilities/mlflow/io.py b/src/utilities/mlflow/io.py index 2c16cdc..bb7ff73 100644 --- a/src/utilities/mlflow/io.py +++ b/src/utilities/mlflow/io.py @@ -1,33 +1,12 @@ -"""MLflow I/O utilities for experiment tracking, and fetching runs and artifacts. +"""MLflow I/O utilities for experiment tracking.""" -This module provides helpers for: -- Setting up MLflow tracking (local or Databricks) via environment variables. -- Orchestrating MLflow runs (context manager for parent/nested runs). -- Logging parameters, metrics, and artifacts. -- Retrieving experiment data from MLflow. -""" - -import os -import time -from dataclasses import asdict from pathlib import Path -from typing import List, Optional -import argparse -from contextlib import contextmanager import mlflow -import pandas as pd - -# Conditional import for type hint to avoid circular dependency -try: - from Poisson.solver import JacobiPoisson -except ImportError: - JacobiPoisson = None def setup_mlflow_tracking(mode: str = "databricks"): - """ - Configures MLflow tracking. + """Configure MLflow tracking. Parameters ---------- @@ -44,10 +23,6 @@ def setup_mlflow_tracking(mode: str = "databricks"): "MLflow Databricks setup failed. Ensure credentials are configured." ) from e elif mode == "local": - # Use default local file-based backend (./mlruns) - # Setting it to None or "" often defaults to ./mlruns, but explicit is better if env var is set differently. - # However, the standard way to 'unset' to default is just not setting it, or setting it to a local path. - # Let's explicitly set it to the local ./mlruns directory to be safe and clear. mlruns_path = Path.cwd() / "mlruns" mlruns_uri = f"file://{mlruns_path}" mlflow.set_tracking_uri(mlruns_uri) @@ -56,310 +31,3 @@ def setup_mlflow_tracking(mode: str = "databricks"): print( f"WARNING: Unknown MLflow mode '{mode}'. Using existing URI: {mlflow.get_tracking_uri()}" ) - - -def get_mlflow_client() -> mlflow.tracking.MlflowClient: - """Get an MLflow tracking client.""" - return mlflow.tracking.MlflowClient() - - -@contextmanager -def start_mlflow_run_context( - experiment_name: str, - parent_run_name: str, - child_run_name: str, - project_prefix: str = "/Shared/LSM-PoissonMPI-v3", - args: Optional[argparse.Namespace] = None, -): - """ - Context manager to start a nested MLflow run. - """ - if mlflow.get_tracking_uri() == "databricks" and not experiment_name.startswith( - "/" - ): - original_experiment_name = experiment_name - experiment_name = f"{project_prefix}/{experiment_name}" - print( - f"DEBUG: Adjusted experiment name for Databricks: {original_experiment_name} -> {experiment_name}" - ) - - print(f"DEBUG: Attempting to set MLflow experiment: {experiment_name}") - mlflow.set_experiment(experiment_name) - print(f"INFO: Using MLflow experiment: {experiment_name}") - - client = get_mlflow_client() - exp = mlflow.get_experiment_by_name(experiment_name) - if exp is None: - try: - exp_id = client.create_experiment(experiment_name) - exp = client.get_experiment(exp_id) - print( - f"DEBUG: Created new MLflow experiment: {experiment_name} with ID {exp_id}" - ) - except Exception as e: - print(f"ERROR: Failed to create MLflow experiment '{experiment_name}': {e}") - raise # Re-raise to ensure failure is visible - - parent_runs = client.search_runs( - experiment_ids=[exp.experiment_id], - filter_string=f"tags.mlflow.runName = '{parent_run_name}' AND tags.is_parent = 'true'", - max_results=1, - ) - parent_run_id = parent_runs[0].info.run_id if parent_runs else None - - with mlflow.start_run( - run_id=parent_run_id, run_name=parent_run_name, tags={"is_parent": "true"} - ) as _parent_mlflow_run: # noqa: F841 - with mlflow.start_run(run_name=child_run_name, nested=True) as child_mlflow_run: - # Tag run with environment (HPC vs local) for easy filtering - env = ( - "hpc" - if os.environ.get("LSB_JOBID") or os.environ.get("SLURM_JOB_ID") - else "local" - ) - mlflow.set_tag("environment", env) - - print( - f"INFO: Started MLflow run '{child_mlflow_run.info.run_name}' ({child_mlflow_run.info.run_id}) [{env}]" - ) - if args and args.job_name: - try: - from Poisson import get_project_root - - project_root = get_project_root() - log_path = project_root / args.log_dir - log_path.mkdir(parents=True, exist_ok=True) - run_id_file = log_path / f"{args.job_name}.runid" - with open(run_id_file, "w") as f: - f.write(child_mlflow_run.info.run_id) - print(f" ✓ Saved run ID to {run_id_file}") - except Exception as e: - print(f" ✗ WARNING: Could not save run ID to file: {e}") - yield child_mlflow_run - - -def log_parameters(params: dict): - """Log a dictionary of parameters to the active MLflow run.""" - mlflow.log_params(params) - - -def log_metrics_dict(metrics: dict): - """Log a dictionary of metrics to the active MLflow run, filtering out None values.""" - filtered_metrics = {k: v for k, v in metrics.items() if v is not None} - mlflow.log_metrics(filtered_metrics) - - -def log_timeseries_metrics(timeseries_data: object): - """Log time series data as step-based metrics to the active MLflow run.""" - if not mlflow.active_run(): - return - client = get_mlflow_client() - run_id = mlflow.active_run().info.run_id - timestamp = int(time.time() * 1000) - metrics_to_log = [] - ts_dict = asdict(timeseries_data) - for name, values in ts_dict.items(): - if values: - for step, value in enumerate(values): - try: - val = float(value) - metrics_to_log.append( - mlflow.entities.Metric(name, val, timestamp, step) - ) - except (ValueError, TypeError): - continue - if metrics_to_log: - for i in range(0, len(metrics_to_log), 1000): - chunk = metrics_to_log[i : i + 1000] - client.log_batch(run_id=run_id, metrics=chunk, synchronous=True) - print(f" ✓ Logged {len(metrics_to_log)} time-series metrics.") - - -def log_artifact_file(filepath: Path): - """Log a file as an artifact to the active MLflow run.""" - if filepath.exists(): - mlflow.log_artifact(str(filepath)) - print(f" ✓ Logged artifact: {filepath.name}") - else: - print(f" ✗ WARNING: Artifact file not found at {filepath}") - - -def log_lsf_logs(job_name: Optional[str], log_dir: str = "logs/lsf"): - """ - Upload LSF .out and .err log files as MLflow artifacts. - - Parameters - ---------- - job_name : str or None - The LSF job name (used to find log files) - log_dir : str - Directory containing LSF logs (default: logs/lsf) - """ - if not job_name: - return - - try: - from Poisson import get_project_root - - project_root = get_project_root() - except ImportError: - project_root = Path.cwd() - - log_path = project_root / log_dir - - for ext in [".out", ".err"]: - log_file = log_path / f"{job_name}{ext}" - if log_file.exists(): - mlflow.log_artifact(str(log_file), artifact_path="lsf_logs") - print(f" ✓ Logged LSF log: {log_file.name}") - # Don't warn if not found - logs may not exist yet during local testing - - -def load_runs( - experiment: str, - converged_only: bool = True, - exclude_parent_runs: bool = True, - project_prefix: str = "/Shared/LSM-PoissonMPI-v3", -) -> pd.DataFrame: - """Load runs from ALL MLflow experiments matching the name. - - Parameters - ---------- - experiment : str - Experiment name (will be prefixed for Databricks) - converged_only : bool - Only include converged runs - exclude_parent_runs : bool - Exclude parent runs (keep only child/nested runs) - project_prefix : str - Databricks workspace prefix for experiment names - """ - # Apply prefix for Databricks - if mlflow.get_tracking_uri() == "databricks" and not experiment.startswith("/"): - full_experiment_name = f"{project_prefix}/{experiment}" - else: - full_experiment_name = experiment - - # Find ALL experiments matching this name (there can be duplicates) - client = get_mlflow_client() - all_experiments = client.search_experiments( - filter_string=f"name = '{full_experiment_name}'" - ) - - if not all_experiments: - return pd.DataFrame() - - experiment_ids = [exp.experiment_id for exp in all_experiments] - - # Build filter string - filters = [] - if converged_only: - filters.append("metrics.converged = 1") - - filter_string = " and ".join(filters) if filters else "" - - # Fetch runs from ALL matching experiments - df = mlflow.search_runs( - experiment_ids=experiment_ids, - filter_string=filter_string, - order_by=["start_time DESC"], - ) - - # Filter out parent runs in pandas (MLflow filter doesn't handle None well) - if exclude_parent_runs and "tags.is_parent" in df.columns: - df = df[df["tags.is_parent"] != "true"] - - return df - - -def download_artifacts( - experiment_name: str, - output_dir: Path, - exclude_parent_runs: bool = True, - force: bool = False, - max_workers: int = 8, -) -> List[Path]: - """ - Download artifacts from the newest run per run name in an experiment. - - When multiple runs have the same name, only artifacts from the most recent - run are downloaded to avoid duplicates and ensure latest data. - - Parameters - ---------- - experiment_name : str - MLflow experiment name - output_dir : Path - Local directory to download to - exclude_parent_runs : bool - Skip parent runs (default True) - force : bool - Re-download even if file exists locally (default False) - max_workers : int - Number of parallel download threads (default 8) - """ - from concurrent.futures import ThreadPoolExecutor, as_completed - - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - client = get_mlflow_client() - exp = client.get_experiment_by_name(experiment_name) - if not exp: - print(f" - Experiment '{experiment_name}' not found.") - return [] - - # Order by start_time DESC to get newest first - runs = client.search_runs( - experiment_ids=[exp.experiment_id], - order_by=["start_time DESC"], - ) - if not runs: - return [] - - # Filter out parent runs - if exclude_parent_runs: - runs = [r for r in runs if r.data.tags.get("is_parent") != "true"] - - # Keep only the newest run per run name (first occurrence since sorted DESC) - seen_names = set() - unique_runs = [] - for run in runs: - run_name = run.info.run_name or run.info.run_id - if run_name not in seen_names: - seen_names.add(run_name) - unique_runs.append(run) - - # Collect all artifacts to download - download_tasks = [] - for run in unique_runs: - run_id = run.info.run_id - artifacts = client.list_artifacts(run_id) - for artifact in artifacts: - # Check if already exists locally (skip if not forcing) - local_file = output_dir / artifact.path - if not force and local_file.exists(): - continue - download_tasks.append((run_id, artifact.path)) - - if not download_tasks: - return [] - - # Download in parallel - downloaded = [] - - def download_one(task): - run_id, artifact_path = task - return client.download_artifacts(run_id, artifact_path, str(output_dir)) - - with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = {executor.submit(download_one, task): task for task in download_tasks} - for future in as_completed(futures): - try: - local_path = future.result() - downloaded.append(Path(local_path)) - except Exception as e: - task = futures[future] - print(f" ✗ Failed to download {task[1]}: {e}") - - return downloaded diff --git a/src/utilities/mlflow/logs.py b/src/utilities/mlflow/logs.py deleted file mode 100644 index 4b58cb6..0000000 --- a/src/utilities/mlflow/logs.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Log uploading utilities for HPC jobs. - -Uploads stdout/stderr logs to MLflow after job completion. -""" - -import time -from pathlib import Path -from typing import Optional - -import mlflow - - -def upload_logs( - job_name: str, - log_dir: str = "logs", - experiment_name: str = "HPC-Experiment", - run_id: Optional[str] = None, -) -> bool: - """Upload job logs to MLflow. - - Parameters - ---------- - job_name : str - Job name (used to find log files). - log_dir : str - Directory containing log files. - experiment_name : str - MLflow experiment name (used if creating new run). - run_id : str, optional - Existing run ID to attach logs to. If not provided, - attempts to read from {log_dir}/{job_name}.runid file. - - Returns - ------- - bool - True if upload succeeded. - """ - log_path = Path(log_dir) - run_id_file = log_path / f"{job_name}.runid" - out_log = log_path / f"{job_name}.out" - err_log = log_path / f"{job_name}.err" - - # Give filesystem time to sync if job just finished - time.sleep(2) - - # Try to get run_id from file if not provided - if run_id is None and run_id_file.exists(): - try: - with open(run_id_file, "r") as f: - run_id = f.read().strip() - print(f"Found Run ID: {run_id}") - except Exception as e: - print(f"Error reading run ID file: {e}") - - try: - active_run = None - - if run_id: - active_run = mlflow.start_run(run_id=run_id, log_system_metrics=False) - else: - print("Run ID file not found. Creating new run for startup failure.") - - if mlflow.get_experiment_by_name(experiment_name) is None: - try: - mlflow.create_experiment(name=experiment_name) - except Exception: - pass # concurrent creation might fail - - mlflow.set_experiment(experiment_name) - active_run = mlflow.start_run(run_name=f"{job_name} (Startup Failure)") - mlflow.set_tag("status", "startup_failure") - - with active_run: - if out_log.exists(): - print(f"Uploading stdout: {out_log}") - mlflow.log_artifact(str(out_log), artifact_path="logs") - else: - print(f"Warning: stdout log not found at {out_log}") - - if err_log.exists(): - print(f"Uploading stderr: {err_log}") - mlflow.log_artifact(str(err_log), artifact_path="logs") - else: - print(f"Warning: stderr log not found at {err_log}") - - print("Log upload complete.") - return True - - except Exception as e: - print(f"Failed to upload logs to MLflow: {e}") - return False - - -def main(): - """CLI entry point for log uploading.""" - import argparse - - parser = argparse.ArgumentParser(description="Upload job logs to MLflow") - parser.add_argument("--job-name", type=str, required=True, help="Job name") - parser.add_argument( - "--log-dir", type=str, default="logs", help="Directory containing logs" - ) - parser.add_argument( - "--experiment-name", - type=str, - default="HPC-Experiment", - help="MLflow experiment name", - ) - args = parser.parse_args() - - upload_logs( - job_name=args.job_name, - log_dir=args.log_dir, - experiment_name=args.experiment_name, - ) - - -if __name__ == "__main__": - main() diff --git a/src/utilities/mlflow/upload_logs.py b/src/utilities/mlflow/upload_logs.py deleted file mode 100644 index 24a7885..0000000 --- a/src/utilities/mlflow/upload_logs.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Script to upload LSF logs to MLflow. - -This script scans a directory for *.runid files (created by the experiment runners), -reads the MLflow Run ID, finds the corresponding .out and .err files, and uploads -them as artifacts to that run. - -Processed files are moved to a 'processed' subdirectory. - -Usage: - uv run python src/utilities/mlflow/upload_logs.py --log-dir logs/lsf -""" - -import argparse -import shutil -from pathlib import Path -import mlflow -from utilities.mlflow.io import setup_mlflow_tracking - - -def upload_logs(log_dir: Path, dry_run: bool = False): - """ - Uploads LSF logs to MLflow. - - Parameters - ---------- - log_dir : Path - Directory containing .runid, .out, and .err files. - dry_run : bool - If True, does not perform upload or move files. - """ - log_dir = Path(log_dir) - if not log_dir.exists(): - print(f"Log directory not found: {log_dir}") - return - - processed_dir = log_dir / "processed" - if not dry_run: - processed_dir.mkdir(exist_ok=True) - - # Find all runid files - runid_files = list(log_dir.glob("*.runid")) - if not runid_files: - print(f"No .runid files found in {log_dir}") - return - - print(f"Found {len(runid_files)} pending log sets in {log_dir}...") - client = mlflow.tracking.MlflowClient() - - for runid_file in runid_files: - job_name = runid_file.stem - out_file = log_dir / f"{job_name}.out" - err_file = log_dir / f"{job_name}.err" - - # Check if log files exist - if not out_file.exists() or not err_file.exists(): - print(f" [SKIP] {job_name}: Missing .out or .err file.") - continue - - try: - # Read Run ID - with open(runid_file, "r") as f: - run_id = f.read().strip() - - print(f" [PROCESSING] {job_name} (Run ID: {run_id})") - - if not dry_run: - # Verify run exists - try: - client.get_run(run_id) # Just check if run exists - except Exception: - print(f" ! Run {run_id} not found in MLflow. Skipping.") - continue - - # Upload artifacts - print(f" Uploading {out_file.name}...") - client.log_artifact(run_id, str(out_file)) - - print(f" Uploading {err_file.name}...") - client.log_artifact(run_id, str(err_file)) - - # Move to processed - shutil.move(str(runid_file), str(processed_dir / runid_file.name)) - shutil.move(str(out_file), str(processed_dir / out_file.name)) - shutil.move(str(err_file), str(processed_dir / err_file.name)) - print(" Done.") - else: - print(f" (Dry Run) Would upload {out_file.name} and {err_file.name}") - - except Exception as e: - print(f" ! ERROR processing {job_name}: {e}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Upload LSF logs to MLflow") - parser.add_argument( - "--log-dir", type=str, default="logs/lsf", help="Directory to scan" - ) - parser.add_argument( - "--dry-run", action="store_true", help="Simulate without changes" - ) - args = parser.parse_args() - - setup_mlflow_tracking() - upload_logs(args.log_dir, args.dry_run) diff --git a/src/utilities/runners/__init__.py b/src/utilities/runners/__init__.py deleted file mode 100644 index 53647f0..0000000 --- a/src/utilities/runners/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Script execution utilities. - -Provides functions for discovering and running scripts: -- discover_scripts: Find scripts by pattern in Experiments/ -- run_scripts_parallel: Run scripts concurrently -- run_scripts_sequential: Run scripts one at a time -""" - -from .scripts import ( - discover_scripts, - run_scripts_parallel, - run_scripts_sequential, - run_plot_scripts, - run_compute_scripts, - copy_to_report, -) - -__all__ = [ - "discover_scripts", - "run_scripts_parallel", - "run_scripts_sequential", - "run_plot_scripts", - "run_compute_scripts", - "copy_to_report", -] diff --git a/src/utilities/runners/scripts.py b/src/utilities/runners/scripts.py deleted file mode 100644 index 8968d22..0000000 --- a/src/utilities/runners/scripts.py +++ /dev/null @@ -1,248 +0,0 @@ -"""Script discovery and execution utilities. - -Provides parallel and sequential script execution with -configurable timeouts and progress reporting. -""" - -import shutil -import subprocess -from concurrent.futures import ThreadPoolExecutor, as_completed -from pathlib import Path -from typing import List, Tuple, Optional - -from ..config import get_repo_root - - -def discover_scripts(pattern: str, directory: str = "Experiments") -> List[Path]: - """Find scripts in a directory matching pattern. - - Parameters - ---------- - pattern : str - Pattern to match in script names (e.g., "plot", "compute") - directory : str, default "Experiments" - Directory to search in, relative to repo root - - Returns - ------- - list of Path - Sorted list of matching script paths - """ - repo_root = get_repo_root() - search_dir = repo_root / directory - - if not search_dir.exists(): - return [] - - scripts = [ - p - for p in search_dir.rglob("*.py") - if p.is_file() and pattern in p.name and p.name != "__init__.py" - ] - - return sorted(scripts) - - -def _run_single_script( - script: Path, - repo_root: Path, - timeout: int = 180, - interpreter: str = "uv run python", -) -> Tuple[Path, bool, Optional[str]]: - """Run a single script and return its result. - - Parameters - ---------- - script : Path - Path to the script - repo_root : Path - Repository root for relative path display - timeout : int - Timeout in seconds - interpreter : str - Command to run the script - - Returns - ------- - tuple - (display_path, success, error_message) - """ - display_path = script.relative_to(repo_root) - - try: - cmd = interpreter.split() + [str(script)] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=timeout, - cwd=str(repo_root), - ) - - if result.returncode == 0: - return (display_path, True, None) - else: - error_msg = result.stderr[:200] if result.stderr else "" - return (display_path, False, f"exit {result.returncode}: {error_msg}") - - except subprocess.TimeoutExpired: - return (display_path, False, "timeout") - except Exception as e: - return (display_path, False, str(e)) - - -def run_scripts_parallel( - scripts: List[Path], - timeout: int = 180, - interpreter: str = "uv run python", - max_workers: int = None, -) -> Tuple[int, int]: - """Run scripts in parallel using ThreadPoolExecutor. - - Parameters - ---------- - scripts : list of Path - Scripts to run - timeout : int, default 180 - Timeout per script in seconds - interpreter : str, default "uv run python" - Command to run scripts - max_workers : int, optional - Maximum number of parallel workers - - Returns - ------- - tuple - (success_count, fail_count) - """ - if not scripts: - print(" No scripts to run") - return 0, 0 - - repo_root = get_repo_root() - print(f"\nRunning {len(scripts)} scripts in parallel...\n") - - success_count = 0 - fail_count = 0 - - with ThreadPoolExecutor(max_workers=max_workers) as executor: - future_to_script = { - executor.submit( - _run_single_script, script, repo_root, timeout, interpreter - ): script - for script in scripts - } - - for future in as_completed(future_to_script): - display_path, success, error_msg = future.result() - - if success: - print(f" ✓ {display_path}") - success_count += 1 - else: - print(f" ✗ {display_path} ({error_msg})") - fail_count += 1 - - print(f"\n Summary: {success_count} succeeded, {fail_count} failed\n") - return success_count, fail_count - - -def run_scripts_sequential( - scripts: List[Path], - timeout: int = 600, - interpreter: str = "uv run python", -) -> Tuple[int, int]: - """Run scripts sequentially. - - Parameters - ---------- - scripts : list of Path - Scripts to run - timeout : int, default 600 - Timeout per script in seconds - interpreter : str, default "uv run python" - Command to run scripts - - Returns - ------- - tuple - (success_count, fail_count) - """ - if not scripts: - print(" No scripts to run") - return 0, 0 - - repo_root = get_repo_root() - print(f"\nRunning {len(scripts)} scripts sequentially...\n") - - success_count = 0 - fail_count = 0 - - for script in scripts: - display_path = script.relative_to(repo_root) - print(f" → {display_path}...", end=" ", flush=True) - - _, success, error_msg = _run_single_script( - script, repo_root, timeout, interpreter - ) - - if success: - print("✓") - success_count += 1 - else: - print(f"✗ ({error_msg})") - fail_count += 1 - - print(f"\n Summary: {success_count} succeeded, {fail_count} failed\n") - return success_count, fail_count - - -def run_plot_scripts() -> Tuple[int, int]: - """Run all plot scripts in parallel.""" - scripts = discover_scripts("plot") - return run_scripts_parallel(scripts, timeout=180) - - -def run_compute_scripts() -> Tuple[int, int]: - """Run all compute scripts sequentially.""" - scripts = discover_scripts("compute") - return run_scripts_sequential(scripts, timeout=600) - - -def copy_to_report( - source_dir: str = "figures", - dest_dir: str = "docs/reports/TexReport/figures", -) -> bool: - """Copy a directory to the report location. - - Parameters - ---------- - source_dir : str - Source directory relative to repo root - dest_dir : str - Destination directory relative to repo root - - Returns - ------- - bool - True if successful - """ - repo_root = get_repo_root() - source = repo_root / source_dir - dest = repo_root / dest_dir - - print(f"\nCopying {source_dir}/ to {dest_dir}/...") - - if not source.exists(): - print(f" No {source_dir}/ directory found") - return False - - try: - if dest.exists(): - shutil.rmtree(dest) - shutil.copytree(source, dest) - print(f" ✓ Copied {source_dir}/ to {dest_dir}/") - return True - except Exception as e: - print(f" ✗ Failed to copy: {e}") - return False