Files
Prompt-optimizer/tests/unit/test_adapters.py
FullStackDev a5bf2ad59c feat: v0.2.0 sprint — ground truth eval, crossover/mutation, checkpointing, similarity guards, dataset loader, CLI commands, extended test coverage
Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes
2 integration tests that broke when the codebase went async (DSPyLLMAdapter
and full pipeline tests now properly await coroutines).

277 tests pass (260 unit + 17 integration).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-29 19:13:50 +00:00

295 lines
9.7 KiB
Python

"""Unit tests for infrastructure adapters — LLM, Judge, Proposer, Synthetic.
Uses mocked DSPy modules to isolate adapter logic from LLM calls.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import dspy
import pytest
from prometheus.domain.entities import Prompt, SyntheticExample, Trajectory
from prometheus.infrastructure.judge_adapter import DSPyJudgeAdapter
from prometheus.infrastructure.llm_adapter import DSPyLLMAdapter
from prometheus.infrastructure.proposer_adapter import DSPyProposerAdapter
from prometheus.infrastructure.synth_adapter import DSPySyntheticAdapter
# --- LLM Adapter ---
class TestDSPyLLMAdapter:
"""Tests for DSPyLLMAdapter.execute()."""
@pytest.fixture
def mock_lm(self) -> MagicMock:
return MagicMock(spec=dspy.LM)
@pytest.fixture
def adapter(self, mock_lm: MagicMock) -> DSPyLLMAdapter:
return DSPyLLMAdapter(lm=mock_lm)
@pytest.mark.asyncio
async def test_execute_returns_output_string(
self, adapter: DSPyLLMAdapter, mock_lm: MagicMock
) -> None:
mock_predictor = MagicMock()
mock_predictor.return_value = MagicMock(output="Hello response")
adapter._predictor = mock_predictor
prompt = Prompt(text="Say hello.")
result = await adapter.execute(prompt, "Hi there")
assert result == "Hello response"
@pytest.mark.asyncio
async def test_execute_passes_prompt_text_and_input(
self, adapter: DSPyLLMAdapter, mock_lm: MagicMock
) -> None:
mock_predictor = MagicMock()
mock_predictor.return_value = MagicMock(output="response")
adapter._predictor = mock_predictor
prompt = Prompt(text="Translate this.")
await adapter.execute(prompt, "Hello world")
mock_predictor.assert_called_once_with(
instruction="Translate this.",
input_text="Hello world",
)
@pytest.mark.asyncio
async def test_execute_uses_dspy_context(
self, adapter: DSPyLLMAdapter, mock_lm: MagicMock
) -> None:
mock_predictor = MagicMock()
mock_predictor.return_value = MagicMock(output="ok")
adapter._predictor = mock_predictor
with patch("prometheus.infrastructure.llm_adapter.dspy.context") as mock_ctx:
await adapter.execute(Prompt(text="test"), "input")
mock_ctx.assert_called_once_with(lm=mock_lm)
@pytest.mark.asyncio
async def test_execute_converts_output_to_str(
self, adapter: DSPyLLMAdapter, mock_lm: MagicMock
) -> None:
mock_predictor = MagicMock()
mock_predictor.return_value = MagicMock(output=42)
adapter._predictor = mock_predictor
result = await adapter.execute(Prompt(text="test"), "input")
assert isinstance(result, str)
assert result == "42"
# --- Judge Adapter ---
class TestDSPyJudgeAdapter:
"""Tests for DSPyJudgeAdapter.judge_batch()."""
@pytest.fixture
def mock_lm(self) -> MagicMock:
return MagicMock(spec=dspy.LM)
@pytest.fixture
def adapter(self, mock_lm: MagicMock) -> DSPyJudgeAdapter:
return DSPyJudgeAdapter(lm=mock_lm)
@pytest.mark.asyncio
async def test_judge_batch_returns_scores_and_feedback(
self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock
) -> None:
adapter._judge = MagicMock()
adapter._judge.side_effect = [
MagicMock(score=0.9, feedback="Excellent."),
MagicMock(score=0.4, feedback="Incomplete."),
]
pairs = [("What is 2+2?", "4"), ("Capital of France?", "London")]
result = await adapter.judge_batch("math and geography", pairs)
assert len(result) == 2
assert result[0] == (0.9, "Excellent.")
assert result[1] == (0.4, "Incomplete.")
@pytest.mark.asyncio
async def test_judge_batch_empty_pairs(
self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock
) -> None:
result = await adapter.judge_batch("task", [])
assert result == []
@pytest.mark.asyncio
async def test_judge_batch_uses_dspy_context(
self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock
) -> None:
adapter._judge = MagicMock()
adapter._judge.return_value = MagicMock(score=0.5, feedback="ok")
with patch("prometheus.infrastructure.judge_adapter.dspy.context") as mock_ctx:
await adapter.judge_batch("task", [("in", "out")])
mock_ctx.assert_called_once_with(lm=mock_lm)
@pytest.mark.asyncio
async def test_judge_batch_returns_all_results(
self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock
) -> None:
"""Judge calls run in parallel but all results are returned."""
adapter._judge = MagicMock()
adapter._judge.side_effect = [
MagicMock(score=0.5, feedback="ok"),
MagicMock(score=0.7, feedback="better"),
MagicMock(score=0.3, feedback="worse"),
]
pairs = [("first", "out1"), ("second", "out2"), ("third", "out3")]
results = await adapter.judge_batch("task", pairs)
assert len(results) == 3
scores = [r[0] for r in results]
assert 0.5 in scores
assert 0.7 in scores
assert 0.3 in scores
# --- Proposer Adapter ---
class TestDSPyProposerAdapter:
"""Tests for DSPyProposerAdapter.propose()."""
@pytest.fixture
def mock_lm(self) -> MagicMock:
return MagicMock(spec=dspy.LM)
@pytest.fixture
def adapter(self, mock_lm: MagicMock) -> DSPyProposerAdapter:
return DSPyProposerAdapter(lm=mock_lm)
@pytest.mark.asyncio
async def test_propose_returns_new_prompt(
self, adapter: DSPyProposerAdapter, mock_lm: MagicMock
) -> None:
adapter._proposer = MagicMock()
adapter._proposer.return_value = MagicMock(
new_instruction="Be concise and accurate."
)
current = Prompt(text="Answer questions.")
trajectories = [
Trajectory("in", "out", 0.3, "too verbose", "Answer questions.")
]
result = await adapter.propose(current, trajectories, "Q&A task")
assert isinstance(result, Prompt)
assert result.text == "Be concise and accurate."
@pytest.mark.asyncio
async def test_propose_uses_dspy_context(
self, adapter: DSPyProposerAdapter, mock_lm: MagicMock
) -> None:
adapter._proposer = MagicMock()
adapter._proposer.return_value = MagicMock(new_instruction="improved")
with patch("prometheus.infrastructure.proposer_adapter.dspy.context") as mock_ctx:
await adapter.propose(Prompt(text="t"), [], "task")
mock_ctx.assert_called_once_with(lm=mock_lm)
def test_format_failures_single_trajectory(self) -> None:
trajectories = [
Trajectory("What is AI?", "A type of robot.", 0.3, "Incomplete definition.", "prompt")
]
result = DSPyProposerAdapter._format_failures(trajectories)
assert "What is AI?" in result
assert "A type of robot." in result
assert "0.30" in result
assert "Incomplete definition." in result
assert "# Example 1" in result
def test_format_failures_multiple_trajectories(self) -> None:
trajectories = [
Trajectory("input1", "output1", 0.4, "bad", "prompt"),
Trajectory("input2", "output2", 0.2, "worse", "prompt"),
]
result = DSPyProposerAdapter._format_failures(trajectories)
assert "# Example 1" in result
assert "# Example 2" in result
assert "---" in result
assert "input1" in result
assert "input2" in result
def test_format_failures_empty_list(self) -> None:
result = DSPyProposerAdapter._format_failures([])
assert result == ""
# --- Synthetic Adapter ---
class TestDSPySyntheticAdapter:
"""Tests for DSPySyntheticAdapter.generate_inputs()."""
@pytest.fixture
def mock_lm(self) -> MagicMock:
return MagicMock(spec=dspy.LM)
@pytest.fixture
def adapter(self, mock_lm: MagicMock) -> DSPySyntheticAdapter:
return DSPySyntheticAdapter(lm=mock_lm)
def test_generate_inputs_returns_examples(
self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock
) -> None:
adapter._generator = MagicMock()
adapter._generator.return_value = MagicMock(
examples=["What is AI?", "Explain ML.", "What is NLP?"]
)
result = adapter.generate_inputs("AI task", 3)
assert len(result) == 3
assert all(isinstance(ex, SyntheticExample) for ex in result)
assert result[0].input_text == "What is AI?"
assert result[0].id == 0
assert result[1].id == 1
def test_generate_inputs_truncates_to_n(
self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock
) -> None:
adapter._generator = MagicMock()
adapter._generator.return_value = MagicMock(
examples=["q1", "q2", "q3", "q4", "q5"]
)
result = adapter.generate_inputs("task", 3)
assert len(result) == 3
def test_generate_inputs_passes_correct_args(
self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock
) -> None:
adapter._generator = MagicMock()
adapter._generator.return_value = MagicMock(examples=["q1"])
adapter.generate_inputs("my task", 5)
adapter._generator.assert_called_once_with(
task_description="my task",
n_examples=5,
)
def test_generate_inputs_empty_list(
self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock
) -> None:
adapter._generator = MagicMock()
adapter._generator.return_value = MagicMock(examples=[])
result = adapter.generate_inputs("task", 0)
assert result == []