Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/graspa/isotherm_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ pressure_unit: bar # bar, kPa, atm, or Pa

cutoff: 12.8
cycles: 1000
max_workers: 4 # parallel threads for CIF processing (null = auto)
9 changes: 6 additions & 3 deletions scripts/graspa/setup_isotherms.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ def main():
print(f"CIF dir: {cfg['cif_dir']}")
print(f"Adsorbates: {cfg['adsorbates']}")
print(f"Temperatures: {cfg['temperatures']}")
print(f"Pressures: {len(pressures_pa)} points "
f"({cfg['pressures'][0]}-{cfg['pressures'][-1]} "
f"{cfg.get('pressure_unit', 'Pa')})")
print(
f"Pressures: {len(pressures_pa)} points "
f"({cfg['pressures'][0]}-{cfg['pressures'][-1]} "
f"{cfg.get('pressure_unit', 'Pa')})"
)

manifest = setup_batch(
cif_dir=cfg["cif_dir"],
Expand All @@ -56,6 +58,7 @@ def main():
pressures=pressures_pa,
cutoff=cfg.get("cutoff", 12.8),
n_cycle=cfg.get("cycles", 1000),
max_workers=cfg.get("max_workers"),
)

print(f"\nSet up {len(manifest)} simulations in {cfg['outdir']}")
Expand Down
9 changes: 8 additions & 1 deletion src/matkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,14 @@ def graspa_setup(cif, outdir, adsorbate, temp, pressure, cutoff, cycles):
)
@click.option("--cutoff", default=12.8, help="Cutoff radius in Angstrom.")
@click.option("--cycles", default=1000, help="Number of MC cycles.")
@click.option(
"--workers",
default=None,
type=int,
help="Max parallel threads for CIF processing.",
)
def graspa_batch_setup(
cif_dir, outdir, adsorbate, temp, pressure, cutoff, cycles
cif_dir, outdir, adsorbate, temp, pressure, cutoff, cycles, workers
):
"""Set up gRASPA simulations for all CIF x T x P."""
from matkit.graspa import setup_batch
Expand All @@ -114,6 +120,7 @@ def graspa_batch_setup(
pressures=list(pressure),
cutoff=cutoff,
n_cycle=cycles,
max_workers=workers,
)
click.echo(f"Set up {len(manifest)} simulations in {outdir}")
click.echo(f"Manifest written to {outdir}/simulations.jsonl")
Expand Down
102 changes: 78 additions & 24 deletions src/matkit/graspa/graspa.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import json
from concurrent.futures import ThreadPoolExecutor
from itertools import product
from pathlib import Path
import shutil
Expand Down Expand Up @@ -163,6 +164,7 @@ def setup_simulation(
cutoff: float = 12.8,
n_cycle: int = 1000,
template_dir: str = "template",
cell_size: list[int] | None = None,
) -> bool:
"""Set up a gRASPA GCMC simulation.

Expand All @@ -179,6 +181,8 @@ def setup_simulation(
cutoff: Van der Waals cutoff radius in Angstrom.
n_cycle: Number of Monte Carlo cycles.
template_dir: Template subdirectory name under files/.
cell_size: Pre-computed unit cell dimensions [uc_x, uc_y, uc_z].
If provided, skips reading the CIF to calculate cell size.

Returns:
True on success.
Expand All @@ -203,9 +207,12 @@ def setup_simulation(
else:
shutil.copy2(item, outdir)

# Read CIF to get cell size
atoms = ase_read(cifpath)
uc_x, uc_y, uc_z = calculate_cell_size(atoms)
# Use pre-computed cell size or read CIF
if cell_size is not None:
uc_x, uc_y, uc_z = cell_size
else:
atoms = ase_read(cifpath)
uc_x, uc_y, uc_z = calculate_cell_size(atoms)

# Read template and replace placeholders
input_path = outdir / "simulation.input"
Expand Down Expand Up @@ -236,6 +243,50 @@ def setup_simulation(
return True


def _setup_single_cif(
cif: Path,
out_path: Path,
adsorbates: list[dict],
temperatures: list[float],
pressures: list[float],
cutoff: float,
n_cycle: int,
template_dir: str,
) -> list[dict]:
"""Set up all T x P simulations for a single CIF file.

Reads the CIF once to compute cell size, then creates a simulation
directory for each (temperature, pressure) combination.
"""
atoms = ase_read(cif)
cell_size = calculate_cell_size(atoms)

entries = []
for temp, pres in product(temperatures, pressures):
sim_dir = out_path / cif.stem / f"T{temp}_P{pres:g}"
setup_simulation(
cif=str(cif),
outpath=str(sim_dir),
adsorbates=adsorbates,
temperature=temp,
pressure=pres,
cutoff=cutoff,
n_cycle=n_cycle,
template_dir=template_dir,
cell_size=cell_size,
)
entries.append(
{
"sim_dir": str(sim_dir),
"cif": cif.name,
"temperature": temp,
"pressure": pres,
"adsorbates": [ad["MoleculeName"] for ad in adsorbates],
}
)
return entries


def setup_batch(
cif_dir: str,
outpath: str,
Expand All @@ -245,11 +296,15 @@ def setup_batch(
cutoff: float = 12.8,
n_cycle: int = 1000,
template_dir: str = "template",
max_workers: int | None = None,
) -> list[dict]:
"""Set up gRASPA simulations for all CIF x T x P.

Discovers all .cif files in cif_dir and creates a simulation directory
for each (CIF, temperature, pressure) combination using setup_simulation().
for each (CIF, temperature, pressure) combination. Each CIF is read
once to compute cell size, then all T x P combinations reuse the
cached result. CIFs are processed in parallel using threads.

Writes a simulations.jsonl manifest to outpath.

Args:
Expand All @@ -261,6 +316,8 @@ def setup_batch(
cutoff: Van der Waals cutoff radius in Angstrom.
n_cycle: Number of Monte Carlo cycles.
template_dir: Template subdirectory name under files/.
max_workers: Max threads for parallel CIF processing.
Defaults to None (lets ThreadPoolExecutor choose).

Returns:
List of manifest dicts, each with keys: sim_dir, cif,
Expand All @@ -280,26 +337,23 @@ def setup_batch(
raise ValueError(f"No .cif files found in {cif_dir}")

manifest = []
for cif, temp, pres in product(cif_files, temperatures, pressures):
sim_dir = out_path / cif.stem / f"T{temp}_P{pres:g}"
setup_simulation(
cif=str(cif),
outpath=str(sim_dir),
adsorbates=adsorbates,
temperature=temp,
pressure=pres,
cutoff=cutoff,
n_cycle=n_cycle,
template_dir=template_dir,
)
entry = {
"sim_dir": str(sim_dir),
"cif": cif.name,
"temperature": temp,
"pressure": pres,
"adsorbates": [ad["MoleculeName"] for ad in adsorbates],
}
manifest.append(entry)
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = [
pool.submit(
_setup_single_cif,
cif,
out_path,
adsorbates,
temperatures,
pressures,
cutoff,
n_cycle,
template_dir,
)
for cif in cif_files
]
for future in futures:
manifest.extend(future.result())

manifest_path = out_path / "simulations.jsonl"
with manifest_path.open("w") as f:
Expand Down
Loading