"""Unit tests for DSPy module parsing logic.""" from __future__ import annotations import json from unittest.mock import MagicMock, patch import dspy import pytest from prometheus.infrastructure.dspy_modules import ( InstructionProposer, OutputJudge, SyntheticInputGenerator, ) class TestSyntheticInputGeneratorParseFallback: """Tests for _parse_fallback — regex-based JSON recovery.""" def test_extracts_quoted_strings(self) -> None: text = 'Here are some: "first example" and "second example" done.' result = SyntheticInputGenerator._parse_fallback(text) assert result == ["first example", "second example"] def test_single_quoted_string(self) -> None: text = 'Just one: "hello world"' result = SyntheticInputGenerator._parse_fallback(text) assert result == ["hello world"] def test_no_quotes_returns_raw_text(self) -> None: text = "no quotes at all here" result = SyntheticInputGenerator._parse_fallback(text) assert result == ["no quotes at all here"] def test_empty_string_returns_itself(self) -> None: result = SyntheticInputGenerator._parse_fallback("") assert result == [""] def test_mixed_json_with_extra_text(self) -> None: text = 'Results: "alpha", "beta", "gamma" — take your pick.' result = SyntheticInputGenerator._parse_fallback(text) assert result == ["alpha", "beta", "gamma"] class TestOutputJudgeForward: """Tests for OutputJudge score parsing and clamping. Mocks the internal ChainOfThought module to isolate parsing logic. """ @pytest.fixture def judge(self) -> OutputJudge: return OutputJudge() def test_valid_numeric_score(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="0.8", feedback="Good output.") ) result = judge.forward("task", "input", "output") assert result.score == 0.8 assert result.feedback == "Good output." def test_non_numeric_score_falls_back_to_half( self, judge: OutputJudge ) -> None: judge.judge = MagicMock( return_value=dspy.Prediction( score="not-a-number", feedback="N/A" ) ) result = judge.forward("task", "input", "output") assert result.score == 0.5 def test_score_clamped_to_upper_bound(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="1.5", feedback="Great!") ) result = judge.forward("task", "input", "output") assert result.score == 1.0 def test_score_clamped_to_lower_bound(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="-0.3", feedback="Terrible.") ) result = judge.forward("task", "input", "output") assert result.score == 0.0 def test_empty_score_string_falls_back(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="", feedback="No score.") ) result = judge.forward("task", "input", "output") assert result.score == 0.5 def test_boundary_score_one(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="1.0", feedback="Perfect.") ) result = judge.forward("task", "input", "output") assert result.score == 1.0 def test_boundary_score_zero(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score="0.0", feedback="Wrong.") ) result = judge.forward("task", "input", "output") assert result.score == 0.0 def test_none_score_falls_back(self, judge: OutputJudge) -> None: judge.judge = MagicMock( return_value=dspy.Prediction(score=None, feedback="Missing.") ) result = judge.forward("task", "input", "output") assert result.score == 0.5 class TestSyntheticInputGeneratorForward: """Tests for SyntheticInputGenerator.forward JSON/fallback parsing. Mocks the internal ChainOfThought module to isolate parsing logic. """ @pytest.fixture def generator(self) -> SyntheticInputGenerator: return SyntheticInputGenerator() def test_valid_json_parsed_correctly( self, generator: SyntheticInputGenerator ) -> None: examples_json = json.dumps(["q1", "q2", "q3"]) generator.generate = MagicMock( return_value=dspy.Prediction(examples=examples_json) ) result = generator.forward("task desc", 3) assert result.examples == ["q1", "q2", "q3"] def test_malformed_json_triggers_fallback( self, generator: SyntheticInputGenerator ) -> None: generator.generate = MagicMock( return_value=dspy.Prediction( examples='Here: "fallback item" and "another one"' ) ) result = generator.forward("task desc", 2) assert result.examples == ["fallback item", "another one"] def test_empty_json_array(self, generator: SyntheticInputGenerator) -> None: generator.generate = MagicMock( return_value=dspy.Prediction(examples="[]") ) result = generator.forward("task desc", 0) assert result.examples == [] class TestInstructionProposerForward: """Tests for InstructionProposer.forward.""" @pytest.fixture def proposer(self) -> InstructionProposer: return InstructionProposer() def test_returns_new_instruction(self, proposer: InstructionProposer) -> None: proposer.propose = MagicMock( return_value=dspy.Prediction( new_instruction="Be concise and accurate." ) ) result = proposer.forward( "Be helpful.", "Answer questions.", "Failed: too verbose" ) assert result.new_instruction == "Be concise and accurate." def test_passes_correct_arguments( self, proposer: InstructionProposer ) -> None: proposer.propose = MagicMock( return_value=dspy.Prediction(new_instruction="improved") ) proposer.forward("current", "task desc", "failures") proposer.propose.assert_called_once_with( current_instruction="current", task_description="task desc", failure_examples="failures", )