Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
234 lines
7.4 KiB
Python
234 lines
7.4 KiB
Python
"""Unit tests for OptimizePromptUseCase — direct orchestration tests."""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from prometheus.application.bootstrap import SyntheticBootstrap
|
|
from prometheus.application.dto import OptimizationConfig, OptimizationResult
|
|
from prometheus.application.evaluator import PromptEvaluator
|
|
from prometheus.application.evolution import EvolutionLoop
|
|
from prometheus.application.use_cases import OptimizePromptUseCase
|
|
from prometheus.domain.entities import (
|
|
Candidate,
|
|
EvalResult,
|
|
OptimizationState,
|
|
Prompt,
|
|
SyntheticExample,
|
|
Trajectory,
|
|
)
|
|
|
|
|
|
def _make_eval(scores: list[float]) -> EvalResult:
|
|
return EvalResult(
|
|
scores=scores,
|
|
feedbacks=["feedback"] * len(scores),
|
|
trajectories=[
|
|
Trajectory(f"in{i}", f"out{i}", s, "feedback", "prompt")
|
|
for i, s in enumerate(scores)
|
|
],
|
|
)
|
|
|
|
|
|
def _make_state(
|
|
iterations: int = 3,
|
|
initial_score: float = 0.3,
|
|
final_score: float = 0.8,
|
|
accepted: bool = True,
|
|
) -> OptimizationState:
|
|
seed = Candidate(prompt=Prompt(text="seed"), best_score=initial_score, generation=0)
|
|
best = Candidate(
|
|
prompt=Prompt(text="optimized" if accepted else "seed"),
|
|
best_score=final_score,
|
|
generation=iterations if accepted else 0,
|
|
)
|
|
history = []
|
|
for i in range(1, iterations + 1):
|
|
event = "accepted" if accepted else "rejected"
|
|
history.append({"iteration": i, "event": event, "old_score": 0.3, "new_score": 0.8})
|
|
|
|
return OptimizationState(
|
|
iteration=iterations,
|
|
best_candidate=best,
|
|
candidates=[seed, best] if accepted else [seed],
|
|
total_llm_calls=iterations * 11 + 10,
|
|
history=history,
|
|
)
|
|
|
|
|
|
class TestOptimizePromptUseCaseExecute:
|
|
"""Tests for the execute() orchestration method."""
|
|
|
|
@pytest.fixture
|
|
def mock_evaluator(self) -> MagicMock:
|
|
return MagicMock(spec=PromptEvaluator)
|
|
|
|
@pytest.fixture
|
|
def mock_proposer(self) -> MagicMock:
|
|
return MagicMock()
|
|
|
|
@pytest.fixture
|
|
def mock_bootstrap(self) -> MagicMock:
|
|
return MagicMock(spec=SyntheticBootstrap)
|
|
|
|
@pytest.fixture
|
|
def use_case(
|
|
self,
|
|
mock_evaluator: MagicMock,
|
|
mock_proposer: MagicMock,
|
|
mock_bootstrap: MagicMock,
|
|
) -> OptimizePromptUseCase:
|
|
return OptimizePromptUseCase(
|
|
evaluator=mock_evaluator,
|
|
proposer=mock_proposer,
|
|
bootstrap=mock_bootstrap,
|
|
)
|
|
|
|
@pytest.fixture
|
|
def config(self) -> OptimizationConfig:
|
|
return OptimizationConfig(
|
|
seed_prompt="Answer the question.",
|
|
task_description="Q&A task",
|
|
max_iterations=5,
|
|
n_synthetic_inputs=20,
|
|
minibatch_size=5,
|
|
seed=42,
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_returns_optimization_result(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = [
|
|
SyntheticExample(input_text=f"q{i}", id=i) for i in range(20)
|
|
]
|
|
|
|
mock_state = _make_state(iterations=3, initial_score=0.3, final_score=0.9)
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
result = await use_case.execute(config)
|
|
|
|
assert isinstance(result, OptimizationResult)
|
|
assert result.initial_prompt == "Answer the question."
|
|
assert result.final_score == 0.9
|
|
assert result.improvement == pytest.approx(0.6)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bootstrap_called_with_config_params(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = []
|
|
mock_state = _make_state()
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
await use_case.execute(config)
|
|
|
|
mock_bootstrap.run.assert_called_once_with(
|
|
task_description="Q&A task",
|
|
n_examples=20,
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_evolution_loop_configured_from_config(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = []
|
|
mock_state = _make_state()
|
|
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state) as mock_run:
|
|
await use_case.execute(config)
|
|
|
|
# Verify the loop was instantiated with correct params
|
|
mock_run.assert_called_once()
|
|
call_args = mock_run.call_args
|
|
seed_prompt = call_args[0][0]
|
|
assert seed_prompt.text == "Answer the question."
|
|
synthetic_pool = call_args[0][1]
|
|
assert len(synthetic_pool) == 0 # bootstrap returned empty
|
|
assert call_args[0][2] == "Q&A task"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_total_llm_calls_includes_bootstrap_call(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = []
|
|
mock_state = _make_state(iterations=3)
|
|
# total_llm_calls from state + 1 for bootstrap
|
|
expected = mock_state.total_llm_calls + 1
|
|
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
result = await use_case.execute(config)
|
|
|
|
assert result.total_llm_calls == expected
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_candidates_fallback(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = [
|
|
SyntheticExample(input_text=f"q{i}", id=i) for i in range(20)
|
|
]
|
|
mock_state = OptimizationState(
|
|
iteration=0,
|
|
best_candidate=None,
|
|
candidates=[],
|
|
total_llm_calls=0,
|
|
)
|
|
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
result = await use_case.execute(config)
|
|
|
|
assert result.optimized_prompt == "Answer the question."
|
|
assert result.initial_score == 0.0
|
|
assert result.final_score == 0.0
|
|
assert result.improvement == 0.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_iterations_used_matches_state(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = []
|
|
mock_state = _make_state(iterations=7)
|
|
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
result = await use_case.execute(config)
|
|
|
|
assert result.iterations_used == 7
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_history_passed_through(
|
|
self,
|
|
use_case: OptimizePromptUseCase,
|
|
mock_bootstrap: MagicMock,
|
|
config: OptimizationConfig,
|
|
) -> None:
|
|
mock_bootstrap.run.return_value = []
|
|
history = [
|
|
{"iteration": 1, "event": "accepted"},
|
|
{"iteration": 2, "event": "rejected"},
|
|
]
|
|
mock_state = _make_state()
|
|
mock_state.history = history
|
|
|
|
with patch.object(EvolutionLoop, "run", return_value=mock_state):
|
|
result = await use_case.execute(config)
|
|
|
|
assert result.history == history
|