feat: async/parallel execution with configurable concurrency

Parallelize LLM calls across minibatches to reduce wall-clock time.
All domain ports (LLMPort, JudgePort, ProposerPort) are now async.
Adapter implementations wrap synchronous DSPy calls with asyncio.to_thread.
Judge calls run in parallel within a batch using asyncio.gather + semaphore.
Evaluator parallelizes minibatch execution with configurable concurrency.
Evolution loop and use case are fully async. Proposer stays sequential.
Added --max-concurrency CLI flag and max_concurrency YAML config field.
Added async_retry_with_backoff for async error handling.
All 139 unit tests pass.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
FullStackDev
2026-03-29 13:15:34 +00:00
parent e2d111ce5b
commit c92ca4a2b8
16 changed files with 297 additions and 159 deletions

View File

@@ -1,7 +1,9 @@
"""Unit tests for the evolution loop — with full mocking."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from prometheus.application.bootstrap import SyntheticBootstrap
from prometheus.application.evaluator import PromptEvaluator
@@ -10,14 +12,15 @@ from prometheus.domain.entities import EvalResult, Prompt, SyntheticExample, Tra
class TestEvolutionLoop:
def test_accepts_improvement(
@pytest.mark.asyncio
async def test_accepts_improvement(
self,
seed_prompt: Prompt,
synthetic_pool: list[SyntheticExample],
task_description: str,
mock_llm_port: MagicMock,
mock_judge_port: MagicMock,
mock_proposer_port: MagicMock,
mock_llm_port: AsyncMock,
mock_judge_port: AsyncMock,
mock_proposer_port: AsyncMock,
) -> None:
"""When the new prompt improves the score, the best candidate is updated."""
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
@@ -45,7 +48,7 @@ class TestEvolutionLoop:
feedbacks=["good"] * 5,
trajectories=[],
)
evaluator.evaluate = MagicMock(side_effect=[initial_eval, old_eval, new_eval])
evaluator.evaluate = AsyncMock(side_effect=[initial_eval, old_eval, new_eval])
loop = EvolutionLoop(
evaluator=evaluator,
@@ -55,19 +58,20 @@ class TestEvolutionLoop:
minibatch_size=5,
)
with patch.object(loop, "_log"):
state = loop.run(seed_prompt, synthetic_pool, task_description)
state = await loop.run(seed_prompt, synthetic_pool, task_description)
assert state.best_candidate is not None
assert state.best_candidate.best_score > 0
def test_rejects_regression(
@pytest.mark.asyncio
async def test_rejects_regression(
self,
seed_prompt: Prompt,
synthetic_pool: list[SyntheticExample],
task_description: str,
mock_llm_port: MagicMock,
mock_judge_port: MagicMock,
mock_proposer_port: MagicMock,
mock_llm_port: AsyncMock,
mock_judge_port: AsyncMock,
mock_proposer_port: AsyncMock,
) -> None:
"""When the new prompt degrades the score, the best candidate stays unchanged."""
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
@@ -95,7 +99,7 @@ class TestEvolutionLoop:
feedbacks=["bad"] * 5,
trajectories=[],
)
evaluator.evaluate = MagicMock(side_effect=[initial_eval, old_eval, new_eval])
evaluator.evaluate = AsyncMock(side_effect=[initial_eval, old_eval, new_eval])
loop = EvolutionLoop(
evaluator=evaluator,
@@ -105,19 +109,20 @@ class TestEvolutionLoop:
minibatch_size=5,
)
with patch.object(loop, "_log"):
state = loop.run(seed_prompt, synthetic_pool, task_description)
state = await loop.run(seed_prompt, synthetic_pool, task_description)
assert state.best_candidate is not None
assert state.best_candidate.prompt.text == seed_prompt.text
def test_skips_perfect_scores(
@pytest.mark.asyncio
async def test_skips_perfect_scores(
self,
seed_prompt: Prompt,
synthetic_pool: list[SyntheticExample],
task_description: str,
mock_llm_port: MagicMock,
mock_judge_port: MagicMock,
mock_proposer_port: MagicMock,
mock_llm_port: AsyncMock,
mock_judge_port: AsyncMock,
mock_proposer_port: AsyncMock,
) -> None:
"""When all scores are perfect, no proposition is made."""
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
@@ -132,7 +137,7 @@ class TestEvolutionLoop:
for i in range(5)
],
)
evaluator.evaluate = MagicMock(return_value=perfect_eval)
evaluator.evaluate = AsyncMock(return_value=perfect_eval)
loop = EvolutionLoop(
evaluator=evaluator,
@@ -142,6 +147,6 @@ class TestEvolutionLoop:
minibatch_size=5,
)
with patch.object(loop, "_log"):
loop.run(seed_prompt, synthetic_pool, task_description)
await loop.run(seed_prompt, synthetic_pool, task_description)
mock_proposer_port.propose.assert_not_called()