diff --git a/kaievolve/config.py b/kaievolve/config.py index fe0f479..dd1f89c 100644 --- a/kaievolve/config.py +++ b/kaievolve/config.py @@ -275,6 +275,12 @@ class PromptConfig: # migration so it gets enough shots to matter. None => fall back to the # database migration_interval. research_director_interval: Optional[int] = None + # Dedicated model for the director's (meta-reasoning) call. The director is a + # strategy/diagnosis task, so it benefits from a strong reasoning model rather + # than whatever the run roster samples. None => use the run roster + # (config.llm.models). When set, the director uses ONLY this model, inheriting + # connection settings (api_base/key/temperature/...) from the run roster. + research_director_model: Optional[str] = None # Strategy clustering + cluster bandit (Phase 3). Clusters programs by the # embedding of their HMRD summary into emergent strategies, then biases diff --git a/kaievolve/research_director.py b/kaievolve/research_director.py index 7fb1b78..af61f3b 100644 --- a/kaievolve/research_director.py +++ b/kaievolve/research_director.py @@ -96,11 +96,24 @@ def __init__( self._last_directive = "" self._best_at_last: Optional[float] = None + def _director_models(self): + """Models the director calls with. If prompt.research_director_model is + set, the director uses ONLY that model (a strong reasoning model for the + meta task), cloning connection settings from the run roster; otherwise it + uses the run roster (config.llm.models).""" + models = self.config.llm.models + name = getattr(self.config.prompt, "research_director_model", None) + if name and models: + import dataclasses + + return [dataclasses.replace(models[0], name=name, weight=1.0)] + return models + def _llm(self): if self._ensemble is None: from kaievolve.llm.ensemble import LLMEnsemble - self._ensemble = LLMEnsemble(self.config.llm.models) + self._ensemble = LLMEnsemble(self._director_models()) return self._ensemble @staticmethod diff --git a/tests/test_research_director.py b/tests/test_research_director.py index 4a30f5c..18dc99f 100644 --- a/tests/test_research_director.py +++ b/tests/test_research_director.py @@ -113,5 +113,32 @@ def get_best_program(self): self.assertIsNone(asyncio.run(d.run(Boom(), 20))) +class TestDirectorModelSelection(unittest.TestCase): + def _cfg(self, director_model): + from kaievolve.config import LLMModelConfig + + cfg = Config() + cfg.llm.models = [ + LLMModelConfig(name="fast/a", api_base="https://x/v1", temperature=0.7), + LLMModelConfig(name="fast/b"), + ] + cfg.prompt.research_director_model = director_model + return cfg + + def test_dedicated_model_used_and_inherits_connection(self): + with TemporaryDirectory() as tmp: + d = ResearchDirector(self._cfg("strong/reasoner"), tmp) + ms = d._director_models() + self.assertEqual([m.name for m in ms], ["strong/reasoner"]) # only the dedicated model + self.assertEqual( + ms[0].api_base, "https://x/v1" + ) # connection settings cloned from roster + + def test_default_falls_back_to_roster(self): + with TemporaryDirectory() as tmp: + d = ResearchDirector(self._cfg(None), tmp) + self.assertEqual([m.name for m in d._director_models()], ["fast/a", "fast/b"]) + + if __name__ == "__main__": unittest.main()