feat: v0.2.0 sprint — ground truth eval, crossover/mutation, checkpointing, similarity guards, dataset loader, CLI commands, extended test coverage
Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
"""End-to-end pipeline test with mocked LLM calls."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from prometheus.application.bootstrap import SyntheticBootstrap
|
||||
from prometheus.application.dto import OptimizationConfig
|
||||
@@ -23,9 +25,10 @@ def _make_eval(scores: list[float]) -> EvalResult:
|
||||
|
||||
|
||||
class TestFullPipeline:
|
||||
def test_pipeline_produces_result(self) -> None:
|
||||
@pytest.mark.asyncio
|
||||
async def test_pipeline_produces_result(self) -> None:
|
||||
"""Full pipeline with mocked ports produces an OptimizationResult."""
|
||||
mock_llm = MagicMock(spec=LLMPort)
|
||||
mock_llm = AsyncMock(spec=LLMPort)
|
||||
mock_llm.execute.return_value = "mock response"
|
||||
|
||||
mock_judge = MagicMock(spec=JudgePort)
|
||||
@@ -38,11 +41,11 @@ class TestFullPipeline:
|
||||
eval_sequence.append(_make_eval([0.6, 0.6, 0.6, 0.6, 0.6])) # new eval (accepted)
|
||||
mock_judge.judge_batch.return_value = [(0.5, "ok")] * 5
|
||||
|
||||
mock_proposer = MagicMock(spec=ProposerPort)
|
||||
mock_proposer = AsyncMock(spec=ProposerPort)
|
||||
mock_proposer.propose.return_value = Prompt(text="Improved prompt")
|
||||
|
||||
evaluator = PromptEvaluator(mock_llm, mock_judge)
|
||||
evaluator.evaluate = MagicMock(side_effect=eval_sequence)
|
||||
evaluator.evaluate = AsyncMock(side_effect=eval_sequence)
|
||||
|
||||
mock_gen = MagicMock()
|
||||
mock_gen.generate_inputs.return_value = [
|
||||
@@ -65,7 +68,7 @@ class TestFullPipeline:
|
||||
seed=42,
|
||||
)
|
||||
|
||||
result = use_case.execute(config)
|
||||
result = await use_case.execute(config)
|
||||
|
||||
assert result.initial_prompt == "Answer questions."
|
||||
assert result.optimized_prompt == "Improved prompt"
|
||||
|
||||
Reference in New Issue
Block a user