"""Unit tests for infrastructure adapters — LLM, Judge, Proposer, Synthetic. Uses mocked DSPy modules to isolate adapter logic from LLM calls. """ from __future__ import annotations from unittest.mock import AsyncMock, MagicMock, patch import dspy import pytest from prometheus.domain.entities import Prompt, SyntheticExample, Trajectory from prometheus.infrastructure.judge_adapter import DSPyJudgeAdapter from prometheus.infrastructure.llm_adapter import DSPyLLMAdapter from prometheus.infrastructure.proposer_adapter import DSPyProposerAdapter from prometheus.infrastructure.synth_adapter import DSPySyntheticAdapter # --- LLM Adapter --- class TestDSPyLLMAdapter: """Tests for DSPyLLMAdapter.execute().""" @pytest.fixture def mock_lm(self) -> MagicMock: return MagicMock(spec=dspy.LM) @pytest.fixture def adapter(self, mock_lm: MagicMock) -> DSPyLLMAdapter: return DSPyLLMAdapter(lm=mock_lm) @pytest.mark.asyncio async def test_execute_returns_output_string( self, adapter: DSPyLLMAdapter, mock_lm: MagicMock ) -> None: mock_predictor = MagicMock() mock_predictor.return_value = MagicMock(output="Hello response") adapter._predictor = mock_predictor prompt = Prompt(text="Say hello.") result = await adapter.execute(prompt, "Hi there") assert result == "Hello response" @pytest.mark.asyncio async def test_execute_passes_prompt_text_and_input( self, adapter: DSPyLLMAdapter, mock_lm: MagicMock ) -> None: mock_predictor = MagicMock() mock_predictor.return_value = MagicMock(output="response") adapter._predictor = mock_predictor prompt = Prompt(text="Translate this.") await adapter.execute(prompt, "Hello world") mock_predictor.assert_called_once_with( instruction="Translate this.", input_text="Hello world", ) @pytest.mark.asyncio async def test_execute_uses_dspy_context( self, adapter: DSPyLLMAdapter, mock_lm: MagicMock ) -> None: mock_predictor = MagicMock() mock_predictor.return_value = MagicMock(output="ok") adapter._predictor = mock_predictor with patch("prometheus.infrastructure.llm_adapter.dspy.context") as mock_ctx: await adapter.execute(Prompt(text="test"), "input") mock_ctx.assert_called_once_with(lm=mock_lm) @pytest.mark.asyncio async def test_execute_converts_output_to_str( self, adapter: DSPyLLMAdapter, mock_lm: MagicMock ) -> None: mock_predictor = MagicMock() mock_predictor.return_value = MagicMock(output=42) adapter._predictor = mock_predictor result = await adapter.execute(Prompt(text="test"), "input") assert isinstance(result, str) assert result == "42" # --- Judge Adapter --- class TestDSPyJudgeAdapter: """Tests for DSPyJudgeAdapter.judge_batch().""" @pytest.fixture def mock_lm(self) -> MagicMock: return MagicMock(spec=dspy.LM) @pytest.fixture def adapter(self, mock_lm: MagicMock) -> DSPyJudgeAdapter: return DSPyJudgeAdapter(lm=mock_lm) @pytest.mark.asyncio async def test_judge_batch_returns_scores_and_feedback( self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock ) -> None: adapter._judge = MagicMock() adapter._judge.side_effect = [ MagicMock(score=0.9, feedback="Excellent."), MagicMock(score=0.4, feedback="Incomplete."), ] pairs = [("What is 2+2?", "4"), ("Capital of France?", "London")] result = await adapter.judge_batch("math and geography", pairs) assert len(result) == 2 assert result[0] == (0.9, "Excellent.") assert result[1] == (0.4, "Incomplete.") @pytest.mark.asyncio async def test_judge_batch_empty_pairs( self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock ) -> None: result = await adapter.judge_batch("task", []) assert result == [] @pytest.mark.asyncio async def test_judge_batch_uses_dspy_context( self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock ) -> None: adapter._judge = MagicMock() adapter._judge.return_value = MagicMock(score=0.5, feedback="ok") with patch("prometheus.infrastructure.judge_adapter.dspy.context") as mock_ctx: await adapter.judge_batch("task", [("in", "out")]) mock_ctx.assert_called_once_with(lm=mock_lm) @pytest.mark.asyncio async def test_judge_batch_returns_all_results( self, adapter: DSPyJudgeAdapter, mock_lm: MagicMock ) -> None: """Judge calls run in parallel but all results are returned.""" adapter._judge = MagicMock() adapter._judge.side_effect = [ MagicMock(score=0.5, feedback="ok"), MagicMock(score=0.7, feedback="better"), MagicMock(score=0.3, feedback="worse"), ] pairs = [("first", "out1"), ("second", "out2"), ("third", "out3")] results = await adapter.judge_batch("task", pairs) assert len(results) == 3 scores = [r[0] for r in results] assert 0.5 in scores assert 0.7 in scores assert 0.3 in scores # --- Proposer Adapter --- class TestDSPyProposerAdapter: """Tests for DSPyProposerAdapter.propose().""" @pytest.fixture def mock_lm(self) -> MagicMock: return MagicMock(spec=dspy.LM) @pytest.fixture def adapter(self, mock_lm: MagicMock) -> DSPyProposerAdapter: return DSPyProposerAdapter(lm=mock_lm) @pytest.mark.asyncio async def test_propose_returns_new_prompt( self, adapter: DSPyProposerAdapter, mock_lm: MagicMock ) -> None: adapter._proposer = MagicMock() adapter._proposer.return_value = MagicMock( new_instruction="Be concise and accurate." ) current = Prompt(text="Answer questions.") trajectories = [ Trajectory("in", "out", 0.3, "too verbose", "Answer questions.") ] result = await adapter.propose(current, trajectories, "Q&A task") assert isinstance(result, Prompt) assert result.text == "Be concise and accurate." @pytest.mark.asyncio async def test_propose_uses_dspy_context( self, adapter: DSPyProposerAdapter, mock_lm: MagicMock ) -> None: adapter._proposer = MagicMock() adapter._proposer.return_value = MagicMock(new_instruction="improved") with patch("prometheus.infrastructure.proposer_adapter.dspy.context") as mock_ctx: await adapter.propose(Prompt(text="t"), [], "task") mock_ctx.assert_called_once_with(lm=mock_lm) def test_format_failures_single_trajectory(self) -> None: trajectories = [ Trajectory("What is AI?", "A type of robot.", 0.3, "Incomplete definition.", "prompt") ] result = DSPyProposerAdapter._format_failures(trajectories) assert "What is AI?" in result assert "A type of robot." in result assert "0.30" in result assert "Incomplete definition." in result assert "# Example 1" in result def test_format_failures_multiple_trajectories(self) -> None: trajectories = [ Trajectory("input1", "output1", 0.4, "bad", "prompt"), Trajectory("input2", "output2", 0.2, "worse", "prompt"), ] result = DSPyProposerAdapter._format_failures(trajectories) assert "# Example 1" in result assert "# Example 2" in result assert "---" in result assert "input1" in result assert "input2" in result def test_format_failures_empty_list(self) -> None: result = DSPyProposerAdapter._format_failures([]) assert result == "" # --- Synthetic Adapter --- class TestDSPySyntheticAdapter: """Tests for DSPySyntheticAdapter.generate_inputs().""" @pytest.fixture def mock_lm(self) -> MagicMock: return MagicMock(spec=dspy.LM) @pytest.fixture def adapter(self, mock_lm: MagicMock) -> DSPySyntheticAdapter: return DSPySyntheticAdapter(lm=mock_lm) def test_generate_inputs_returns_examples( self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock ) -> None: adapter._generator = MagicMock() adapter._generator.return_value = MagicMock( examples=["What is AI?", "Explain ML.", "What is NLP?"] ) result = adapter.generate_inputs("AI task", 3) assert len(result) == 3 assert all(isinstance(ex, SyntheticExample) for ex in result) assert result[0].input_text == "What is AI?" assert result[0].id == 0 assert result[1].id == 1 def test_generate_inputs_truncates_to_n( self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock ) -> None: adapter._generator = MagicMock() adapter._generator.return_value = MagicMock( examples=["q1", "q2", "q3", "q4", "q5"] ) result = adapter.generate_inputs("task", 3) assert len(result) == 3 def test_generate_inputs_passes_correct_args( self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock ) -> None: adapter._generator = MagicMock() adapter._generator.return_value = MagicMock(examples=["q1"]) adapter.generate_inputs("my task", 5) adapter._generator.assert_called_once_with( task_description="my task", n_examples=5, ) def test_generate_inputs_empty_list( self, adapter: DSPySyntheticAdapter, mock_lm: MagicMock ) -> None: adapter._generator = MagicMock() adapter._generator.return_value = MagicMock(examples=[]) result = adapter.generate_inputs("task", 0) assert result == []