Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/eval/general/eval.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
# Wrapper around evaluate.py for data-engineering agent runs.
#
# Why this exists: the bind-mounted Python env at /opt/env contains the
# `vllm` CLI binary at /opt/env/local/bin/vllm, and run_task.sh injects
# that directory into PATH via `apptainer exec --env PATH=...`. However,
# the codex CLI runs every shell command through `bash -lc "..."` (login
# shell), which sources /etc/profile + ~/.bashrc and *overwrites* PATH
# with the container's defaults — stripping out /opt/env/local/bin. As a
# result the agent sees `vllm: command not found` and inspect_ai cannot
# spawn its local vLLM server.
#
# This wrapper re-asserts the bind-mounted env on PATH and forwards all
# arguments to evaluate.py. Agents should call `bash eval.sh ...` instead
# of `python3 evaluate.py ...` for self-evals.
export PATH="/opt/env/local/bin:/opt/env/bin:${PATH}"
exec python3 /home/ben/task/evaluate.py "$@"
7 changes: 7 additions & 0 deletions src/run_task.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ if [ "$POST_TRAIN_BENCH_PROMPT" = "data_eng_prompt" ]; then
cp src/eval/general/train_sft.py "${JOB_DIR}/task/"
cp src/eval/general/dataset_audit.py "${JOB_DIR}/task/"
cp src/eval/general/publish_experiment.py "${JOB_DIR}/task/"
# eval.sh wrapper: codex's `bash -lc` overwrites PATH and strips
# /opt/env/local/bin, so calling `python3 evaluate.py` directly fails
# to find the bind-mounted `vllm` CLI. This wrapper re-asserts PATH
# before exec'ing evaluate.py. Agents should `bash eval.sh ...` for
# self-evals.
cp src/eval/general/eval.sh "${JOB_DIR}/task/"
chmod +x "${JOB_DIR}/task/eval.sh"
mkdir -p "${JOB_DIR}/task/experiments"
fi

Expand Down