Files
Prompt-optimizer/tests/unit/test_dspy_modules.py
Gartoid 837a44970f Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer
- Clean architecture (domain/application/infrastructure)
- DSPy-based evolution engine with scoring
- CLI via pyproject.toml entry point
- Unit + integration tests (~300 tests)
- Configs for glm-5.1 and glm-4.5-air models
- Z.AI endpoint integration
2026-03-29 11:44:03 +00:00

199 lines
6.4 KiB
Python

"""Unit tests for DSPy module parsing logic."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import dspy
import pytest
from prometheus.infrastructure.dspy_modules import (
InstructionProposer,
OutputJudge,
SyntheticInputGenerator,
)
class TestSyntheticInputGeneratorParseFallback:
"""Tests for _parse_fallback — regex-based JSON recovery."""
def test_extracts_quoted_strings(self) -> None:
text = 'Here are some: "first example" and "second example" done.'
result = SyntheticInputGenerator._parse_fallback(text)
assert result == ["first example", "second example"]
def test_single_quoted_string(self) -> None:
text = 'Just one: "hello world"'
result = SyntheticInputGenerator._parse_fallback(text)
assert result == ["hello world"]
def test_no_quotes_returns_raw_text(self) -> None:
text = "no quotes at all here"
result = SyntheticInputGenerator._parse_fallback(text)
assert result == ["no quotes at all here"]
def test_empty_string_returns_itself(self) -> None:
result = SyntheticInputGenerator._parse_fallback("")
assert result == [""]
def test_mixed_json_with_extra_text(self) -> None:
text = 'Results: "alpha", "beta", "gamma" — take your pick.'
result = SyntheticInputGenerator._parse_fallback(text)
assert result == ["alpha", "beta", "gamma"]
class TestOutputJudgeForward:
"""Tests for OutputJudge score parsing and clamping.
Mocks the internal ChainOfThought module to isolate parsing logic.
"""
@pytest.fixture
def judge(self) -> OutputJudge:
return OutputJudge()
def test_valid_numeric_score(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="0.8", feedback="Good output.")
)
result = judge.forward("task", "input", "output")
assert result.score == 0.8
assert result.feedback == "Good output."
def test_non_numeric_score_falls_back_to_half(
self, judge: OutputJudge
) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(
score="not-a-number", feedback="N/A"
)
)
result = judge.forward("task", "input", "output")
assert result.score == 0.5
def test_score_clamped_to_upper_bound(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="1.5", feedback="Great!")
)
result = judge.forward("task", "input", "output")
assert result.score == 1.0
def test_score_clamped_to_lower_bound(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="-0.3", feedback="Terrible.")
)
result = judge.forward("task", "input", "output")
assert result.score == 0.0
def test_empty_score_string_falls_back(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="", feedback="No score.")
)
result = judge.forward("task", "input", "output")
assert result.score == 0.5
def test_boundary_score_one(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="1.0", feedback="Perfect.")
)
result = judge.forward("task", "input", "output")
assert result.score == 1.0
def test_boundary_score_zero(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score="0.0", feedback="Wrong.")
)
result = judge.forward("task", "input", "output")
assert result.score == 0.0
def test_none_score_falls_back(self, judge: OutputJudge) -> None:
judge.judge = MagicMock(
return_value=dspy.Prediction(score=None, feedback="Missing.")
)
result = judge.forward("task", "input", "output")
assert result.score == 0.5
class TestSyntheticInputGeneratorForward:
"""Tests for SyntheticInputGenerator.forward JSON/fallback parsing.
Mocks the internal ChainOfThought module to isolate parsing logic.
"""
@pytest.fixture
def generator(self) -> SyntheticInputGenerator:
return SyntheticInputGenerator()
def test_valid_json_parsed_correctly(
self, generator: SyntheticInputGenerator
) -> None:
examples_json = json.dumps(["q1", "q2", "q3"])
generator.generate = MagicMock(
return_value=dspy.Prediction(examples=examples_json)
)
result = generator.forward("task desc", 3)
assert result.examples == ["q1", "q2", "q3"]
def test_malformed_json_triggers_fallback(
self, generator: SyntheticInputGenerator
) -> None:
generator.generate = MagicMock(
return_value=dspy.Prediction(
examples='Here: "fallback item" and "another one"'
)
)
result = generator.forward("task desc", 2)
assert result.examples == ["fallback item", "another one"]
def test_empty_json_array(self, generator: SyntheticInputGenerator) -> None:
generator.generate = MagicMock(
return_value=dspy.Prediction(examples="[]")
)
result = generator.forward("task desc", 0)
assert result.examples == []
class TestInstructionProposerForward:
"""Tests for InstructionProposer.forward."""
@pytest.fixture
def proposer(self) -> InstructionProposer:
return InstructionProposer()
def test_returns_new_instruction(self, proposer: InstructionProposer) -> None:
proposer.propose = MagicMock(
return_value=dspy.Prediction(
new_instruction="Be concise and accurate."
)
)
result = proposer.forward(
"Be helpful.", "Answer questions.", "Failed: too verbose"
)
assert result.new_instruction == "Be concise and accurate."
def test_passes_correct_arguments(
self, proposer: InstructionProposer
) -> None:
proposer.propose = MagicMock(
return_value=dspy.Prediction(new_instruction="improved")
)
proposer.forward("current", "task desc", "failures")
proposer.propose.assert_called_once_with(
current_instruction="current",
task_description="task desc",
failure_examples="failures",
)