feat: async/parallel execution with configurable concurrency

Parallelize LLM calls across minibatches to reduce wall-clock time. All domain ports (LLMPort, JudgePort, ProposerPort) are now async. Adapter implementations wrap synchronous DSPy calls with asyncio.to_thread. Judge calls run in parallel within a batch using asyncio.gather + semaphore. Evaluator parallelizes minibatch execution with configurable concurrency. Evolution loop and use case are fully async. Proposer stays sequential. Added --max-concurrency CLI flag and max_concurrency YAML config field. Added async_retry_with_backoff for async error handling. All 139 unit tests pass. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-29 13:15:34 +00:00
parent e2d111ce5b
commit c92ca4a2b8
16 changed files with 297 additions and 159 deletions
--- a/src/prometheus/infrastructure/judge_adapter.py
+++ b/src/prometheus/infrastructure/judge_adapter.py
@@ -5,13 +5,15 @@ Implements the JudgePort via the DSPy OutputJudge module.
 """
 from __future__ import annotations

+import asyncio
 import logging
+from typing import Self

 import dspy

 from prometheus.domain.ports import JudgePort
 from prometheus.infrastructure.dspy_modules import OutputJudge
-from prometheus.infrastructure.retry import retry_with_backoff
+from prometheus.infrastructure.retry import async_retry_with_backoff

 logger = logging.getLogger(__name__)

@@ -21,6 +23,8 @@ class DSPyJudgeAdapter(JudgePort):

    Per-call isolation: a failure on one item returns a zero-score sentinel
    instead of crashing the whole batch.
+
+    Judge calls run in parallel (bounded by *max_concurrency*).
    """

    def __init__(
@@ -28,40 +32,60 @@ class DSPyJudgeAdapter(JudgePort):
        lm: dspy.LM,
        max_retries: int = 3,
        retry_delay_base: float = 1.0,
+        max_concurrency: int = 5,
    ) -> None:
        self._lm = lm
        self._judge = OutputJudge()
        self._max_retries = max_retries
        self._retry_delay_base = retry_delay_base
+        self._semaphore = asyncio.Semaphore(max_concurrency)

-    def judge_batch(
+    async def judge_batch(
        self,
        task_description: str,
        pairs: list[tuple[str, str]],
    ) -> list[tuple[float, str]]:
-        results: list[tuple[float, str]] = []
-        with dspy.context(lm=self._lm):
-            for input_text, output_text in pairs:
-                results.append(self._judge_single(task_description, input_text, output_text))
-        return results
+        tasks = [
+            self._judge_single_safe(task_description, input_text, output_text)
+            for input_text, output_text in pairs
+        ]
+        return list(await asyncio.gather(*tasks))

-    def _judge_single(
+    async def _judge_single_safe(
        self,
        task_description: str,
        input_text: str,
        output_text: str,
    ) -> tuple[float, str]:
-        try:
-            pred = retry_with_backoff(
-                lambda: self._judge(
-                    task_description=task_description,
-                    input_text=input_text,
-                    output_text=output_text,
-                ),
-                max_retries=self._max_retries,
-                retry_delay_base=self._retry_delay_base,
+        async with self._semaphore:
+            try:
+                return await self._judge_single(task_description, input_text, output_text)
+            except Exception as exc:
+                logger.warning("Judge call failed for input '%s…': %s", input_text[:40], exc)
+                return (0.0, f"[judge error: {exc}]")
+
+    async def _judge_single(
+        self,
+        task_description: str,
+        input_text: str,
+        output_text: str,
+    ) -> tuple[float, str]:
+        async def _call() -> tuple[float, str]:
+            pred = await asyncio.to_thread(
+                self._sync_judge, task_description, input_text, output_text,
            )
            return (pred.score, pred.feedback)
-        except Exception as exc:
-            logger.warning("Judge call failed for input '%s…': %s", input_text[:40], exc)
-            return (0.0, f"[judge error: {exc}]")
+
+        return await async_retry_with_backoff(
+            _call,
+            max_retries=self._max_retries,
+            retry_delay_base=self._retry_delay_base,
+        )
+
+    def _sync_judge(self, task_description: str, input_text: str, output_text: str):
+        with dspy.context(lm=self._lm):
+            return self._judge(
+                task_description=task_description,
+                input_text=input_text,
+                output_text=output_text,
+            )