Files
Prompt-optimizer/tests/integration/test_full_pipeline.py
FullStackDev a5bf2ad59c feat: v0.2.0 sprint — ground truth eval, crossover/mutation, checkpointing, similarity guards, dataset loader, CLI commands, extended test coverage
Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes
2 integration tests that broke when the codebase went async (DSPyLLMAdapter
and full pipeline tests now properly await coroutines).

277 tests pass (260 unit + 17 integration).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-29 19:13:50 +00:00

78 lines
2.8 KiB
Python

"""End-to-end pipeline test with mocked LLM calls."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
import pytest
from prometheus.application.bootstrap import SyntheticBootstrap
from prometheus.application.dto import OptimizationConfig
from prometheus.application.evaluator import PromptEvaluator
from prometheus.application.use_cases import OptimizePromptUseCase
from prometheus.domain.entities import EvalResult, Prompt, SyntheticExample, Trajectory
from prometheus.domain.ports import JudgePort, LLMPort, ProposerPort
def _make_eval(scores: list[float]) -> EvalResult:
return EvalResult(
scores=scores,
feedbacks=["feedback"] * len(scores),
trajectories=[
Trajectory(f"in{i}", f"out{i}", s, "feedback", "prompt")
for i, s in enumerate(scores)
],
)
class TestFullPipeline:
@pytest.mark.asyncio
async def test_pipeline_produces_result(self) -> None:
"""Full pipeline with mocked ports produces an OptimizationResult."""
mock_llm = AsyncMock(spec=LLMPort)
mock_llm.execute.return_value = "mock response"
mock_judge = MagicMock(spec=JudgePort)
# Initial eval (low), then alternating current/new evals per iteration
eval_sequence = [
_make_eval([0.3, 0.3, 0.3, 0.3, 0.3]), # initial seed eval
]
for _ in range(5): # 5 iterations
eval_sequence.append(_make_eval([0.4, 0.4, 0.4, 0.4, 0.4])) # current eval
eval_sequence.append(_make_eval([0.6, 0.6, 0.6, 0.6, 0.6])) # new eval (accepted)
mock_judge.judge_batch.return_value = [(0.5, "ok")] * 5
mock_proposer = AsyncMock(spec=ProposerPort)
mock_proposer.propose.return_value = Prompt(text="Improved prompt")
evaluator = PromptEvaluator(mock_llm, mock_judge)
evaluator.evaluate = AsyncMock(side_effect=eval_sequence)
mock_gen = MagicMock()
mock_gen.generate_inputs.return_value = [
SyntheticExample(input_text=f"synth input {i}", id=i) for i in range(20)
]
bootstrap = SyntheticBootstrap(generator=mock_gen, seed=42)
use_case = OptimizePromptUseCase(
evaluator=evaluator,
proposer=mock_proposer,
bootstrap=bootstrap,
)
config = OptimizationConfig(
seed_prompt="Answer questions.",
task_description="Answer questions accurately.",
max_iterations=5,
n_synthetic_inputs=20,
minibatch_size=5,
seed=42,
)
result = await use_case.execute(config)
assert result.initial_prompt == "Answer questions."
assert result.optimized_prompt == "Improved prompt"
assert result.iterations_used == 5
assert result.total_llm_calls > 0
assert result.final_score > result.initial_score