Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions kaievolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,12 @@ class PromptConfig:
# migration so it gets enough shots to matter. None => fall back to the
# database migration_interval.
research_director_interval: Optional[int] = None
# Dedicated model for the director's (meta-reasoning) call. The director is a
# strategy/diagnosis task, so it benefits from a strong reasoning model rather
# than whatever the run roster samples. None => use the run roster
# (config.llm.models). When set, the director uses ONLY this model, inheriting
# connection settings (api_base/key/temperature/...) from the run roster.
research_director_model: Optional[str] = None

# Strategy clustering + cluster bandit (Phase 3). Clusters programs by the
# embedding of their HMRD summary into emergent strategies, then biases
Expand Down
15 changes: 14 additions & 1 deletion kaievolve/research_director.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,24 @@ def __init__(
self._last_directive = ""
self._best_at_last: Optional[float] = None

def _director_models(self):
"""Models the director calls with. If prompt.research_director_model is
set, the director uses ONLY that model (a strong reasoning model for the
meta task), cloning connection settings from the run roster; otherwise it
uses the run roster (config.llm.models)."""
models = self.config.llm.models
name = getattr(self.config.prompt, "research_director_model", None)
if name and models:
import dataclasses

return [dataclasses.replace(models[0], name=name, weight=1.0)]
return models

def _llm(self):
if self._ensemble is None:
from kaievolve.llm.ensemble import LLMEnsemble

self._ensemble = LLMEnsemble(self.config.llm.models)
self._ensemble = LLMEnsemble(self._director_models())
return self._ensemble

@staticmethod
Expand Down
27 changes: 27 additions & 0 deletions tests/test_research_director.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,32 @@ def get_best_program(self):
self.assertIsNone(asyncio.run(d.run(Boom(), 20)))


class TestDirectorModelSelection(unittest.TestCase):
def _cfg(self, director_model):
from kaievolve.config import LLMModelConfig

cfg = Config()
cfg.llm.models = [
LLMModelConfig(name="fast/a", api_base="https://x/v1", temperature=0.7),
LLMModelConfig(name="fast/b"),
]
cfg.prompt.research_director_model = director_model
return cfg

def test_dedicated_model_used_and_inherits_connection(self):
with TemporaryDirectory() as tmp:
d = ResearchDirector(self._cfg("strong/reasoner"), tmp)
ms = d._director_models()
self.assertEqual([m.name for m in ms], ["strong/reasoner"]) # only the dedicated model
self.assertEqual(
ms[0].api_base, "https://x/v1"
) # connection settings cloned from roster

def test_default_falls_back_to_roster(self):
with TemporaryDirectory() as tmp:
d = ResearchDirector(self._cfg(None), tmp)
self.assertEqual([m.name for m in d._director_models()], ["fast/a", "fast/b"])


if __name__ == "__main__":
unittest.main()
Loading