feat: async/parallel execution with configurable concurrency
Parallelize LLM calls across minibatches to reduce wall-clock time. All domain ports (LLMPort, JudgePort, ProposerPort) are now async. Adapter implementations wrap synchronous DSPy calls with asyncio.to_thread. Judge calls run in parallel within a batch using asyncio.gather + semaphore. Evaluator parallelizes minibatch execution with configurable concurrency. Evolution loop and use case are fully async. Proposer stays sequential. Added --max-concurrency CLI flag and max_concurrency YAML config field. Added async_retry_with_backoff for async error handling. All 139 unit tests pass. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
"""Unit tests for the evolution loop — with full mocking."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from prometheus.application.bootstrap import SyntheticBootstrap
|
||||
from prometheus.application.evaluator import PromptEvaluator
|
||||
@@ -10,14 +12,15 @@ from prometheus.domain.entities import EvalResult, Prompt, SyntheticExample, Tra
|
||||
|
||||
|
||||
class TestEvolutionLoop:
|
||||
def test_accepts_improvement(
|
||||
@pytest.mark.asyncio
|
||||
async def test_accepts_improvement(
|
||||
self,
|
||||
seed_prompt: Prompt,
|
||||
synthetic_pool: list[SyntheticExample],
|
||||
task_description: str,
|
||||
mock_llm_port: MagicMock,
|
||||
mock_judge_port: MagicMock,
|
||||
mock_proposer_port: MagicMock,
|
||||
mock_llm_port: AsyncMock,
|
||||
mock_judge_port: AsyncMock,
|
||||
mock_proposer_port: AsyncMock,
|
||||
) -> None:
|
||||
"""When the new prompt improves the score, the best candidate is updated."""
|
||||
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
|
||||
@@ -45,7 +48,7 @@ class TestEvolutionLoop:
|
||||
feedbacks=["good"] * 5,
|
||||
trajectories=[],
|
||||
)
|
||||
evaluator.evaluate = MagicMock(side_effect=[initial_eval, old_eval, new_eval])
|
||||
evaluator.evaluate = AsyncMock(side_effect=[initial_eval, old_eval, new_eval])
|
||||
|
||||
loop = EvolutionLoop(
|
||||
evaluator=evaluator,
|
||||
@@ -55,19 +58,20 @@ class TestEvolutionLoop:
|
||||
minibatch_size=5,
|
||||
)
|
||||
with patch.object(loop, "_log"):
|
||||
state = loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
state = await loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
|
||||
assert state.best_candidate is not None
|
||||
assert state.best_candidate.best_score > 0
|
||||
|
||||
def test_rejects_regression(
|
||||
@pytest.mark.asyncio
|
||||
async def test_rejects_regression(
|
||||
self,
|
||||
seed_prompt: Prompt,
|
||||
synthetic_pool: list[SyntheticExample],
|
||||
task_description: str,
|
||||
mock_llm_port: MagicMock,
|
||||
mock_judge_port: MagicMock,
|
||||
mock_proposer_port: MagicMock,
|
||||
mock_llm_port: AsyncMock,
|
||||
mock_judge_port: AsyncMock,
|
||||
mock_proposer_port: AsyncMock,
|
||||
) -> None:
|
||||
"""When the new prompt degrades the score, the best candidate stays unchanged."""
|
||||
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
|
||||
@@ -95,7 +99,7 @@ class TestEvolutionLoop:
|
||||
feedbacks=["bad"] * 5,
|
||||
trajectories=[],
|
||||
)
|
||||
evaluator.evaluate = MagicMock(side_effect=[initial_eval, old_eval, new_eval])
|
||||
evaluator.evaluate = AsyncMock(side_effect=[initial_eval, old_eval, new_eval])
|
||||
|
||||
loop = EvolutionLoop(
|
||||
evaluator=evaluator,
|
||||
@@ -105,19 +109,20 @@ class TestEvolutionLoop:
|
||||
minibatch_size=5,
|
||||
)
|
||||
with patch.object(loop, "_log"):
|
||||
state = loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
state = await loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
|
||||
assert state.best_candidate is not None
|
||||
assert state.best_candidate.prompt.text == seed_prompt.text
|
||||
|
||||
def test_skips_perfect_scores(
|
||||
@pytest.mark.asyncio
|
||||
async def test_skips_perfect_scores(
|
||||
self,
|
||||
seed_prompt: Prompt,
|
||||
synthetic_pool: list[SyntheticExample],
|
||||
task_description: str,
|
||||
mock_llm_port: MagicMock,
|
||||
mock_judge_port: MagicMock,
|
||||
mock_proposer_port: MagicMock,
|
||||
mock_llm_port: AsyncMock,
|
||||
mock_judge_port: AsyncMock,
|
||||
mock_proposer_port: AsyncMock,
|
||||
) -> None:
|
||||
"""When all scores are perfect, no proposition is made."""
|
||||
evaluator = PromptEvaluator(mock_llm_port, mock_judge_port)
|
||||
@@ -132,7 +137,7 @@ class TestEvolutionLoop:
|
||||
for i in range(5)
|
||||
],
|
||||
)
|
||||
evaluator.evaluate = MagicMock(return_value=perfect_eval)
|
||||
evaluator.evaluate = AsyncMock(return_value=perfect_eval)
|
||||
|
||||
loop = EvolutionLoop(
|
||||
evaluator=evaluator,
|
||||
@@ -142,6 +147,6 @@ class TestEvolutionLoop:
|
||||
minibatch_size=5,
|
||||
)
|
||||
with patch.object(loop, "_log"):
|
||||
loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
await loop.run(seed_prompt, synthetic_pool, task_description)
|
||||
|
||||
mock_proposer_port.propose.assert_not_called()
|
||||
|
||||
Reference in New Issue
Block a user