fix: multi-model routing — each adapter uses own dspy.LM instance

- DSPyLLMAdapter now accepts dspy.LM instead of model string, uses dspy.context(lm=...) - DSPyJudgeAdapter, DSPyProposerAdapter, DSPySyntheticAdapter each accept and use own LM - OptimizationConfig gains per-model api_base/api_key_env override fields - cli/app.py creates separate dspy.LM per adapter with per-model overrides - New unit tests verify each adapter isolates its LM from global config Fixes Bug #1 (multi-model config not wired) and Bug #2 (DSPyLLMAdapter ignores model param). Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-29 12:31:48 +00:00
parent 837a44970f
commit f516ca4be6
8 changed files with 306 additions and 41 deletions
--- a/src/prometheus/infrastructure/judge_adapter.py
+++ b/src/prometheus/infrastructure/judge_adapter.py
@@ -5,6 +5,8 @@ Implements the JudgePort via the DSPy OutputJudge module.
 """
 from __future__ import annotations

+import dspy
+
 from prometheus.domain.ports import JudgePort
 from prometheus.infrastructure.dspy_modules import OutputJudge

@@ -15,7 +17,8 @@ class DSPyJudgeAdapter(JudgePort):
    Sequential for MVP. Future: parallelize via dspy.Parallel.
    """

-    def __init__(self) -> None:
+    def __init__(self, lm: dspy.LM) -> None:
+        self._lm = lm
        self._judge = OutputJudge()

    def judge_batch(
@@ -24,11 +27,12 @@ class DSPyJudgeAdapter(JudgePort):
        pairs: list[tuple[str, str]],
    ) -> list[tuple[float, str]]:
        results: list[tuple[float, str]] = []
-        for input_text, output_text in pairs:
-            pred = self._judge(
-                task_description=task_description,
-                input_text=input_text,
-                output_text=output_text,
-            )
-            results.append((pred.score, pred.feedback))
+        with dspy.context(lm=self._lm):
+            for input_text, output_text in pairs:
+                pred = self._judge(
+                    task_description=task_description,
+                    input_text=input_text,
+                    output_text=output_text,
+                )
+                results.append((pred.score, pred.feedback))
        return results