Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer
- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
This commit is contained in:
198
tests/unit/test_dspy_modules.py
Normal file
198
tests/unit/test_dspy_modules.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""Unit tests for DSPy module parsing logic."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import dspy
|
||||
import pytest
|
||||
|
||||
from prometheus.infrastructure.dspy_modules import (
|
||||
InstructionProposer,
|
||||
OutputJudge,
|
||||
SyntheticInputGenerator,
|
||||
)
|
||||
|
||||
|
||||
class TestSyntheticInputGeneratorParseFallback:
|
||||
"""Tests for _parse_fallback — regex-based JSON recovery."""
|
||||
|
||||
def test_extracts_quoted_strings(self) -> None:
|
||||
text = 'Here are some: "first example" and "second example" done.'
|
||||
result = SyntheticInputGenerator._parse_fallback(text)
|
||||
assert result == ["first example", "second example"]
|
||||
|
||||
def test_single_quoted_string(self) -> None:
|
||||
text = 'Just one: "hello world"'
|
||||
result = SyntheticInputGenerator._parse_fallback(text)
|
||||
assert result == ["hello world"]
|
||||
|
||||
def test_no_quotes_returns_raw_text(self) -> None:
|
||||
text = "no quotes at all here"
|
||||
result = SyntheticInputGenerator._parse_fallback(text)
|
||||
assert result == ["no quotes at all here"]
|
||||
|
||||
def test_empty_string_returns_itself(self) -> None:
|
||||
result = SyntheticInputGenerator._parse_fallback("")
|
||||
assert result == [""]
|
||||
|
||||
def test_mixed_json_with_extra_text(self) -> None:
|
||||
text = 'Results: "alpha", "beta", "gamma" — take your pick.'
|
||||
result = SyntheticInputGenerator._parse_fallback(text)
|
||||
assert result == ["alpha", "beta", "gamma"]
|
||||
|
||||
|
||||
class TestOutputJudgeForward:
|
||||
"""Tests for OutputJudge score parsing and clamping.
|
||||
|
||||
Mocks the internal ChainOfThought module to isolate parsing logic.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def judge(self) -> OutputJudge:
|
||||
return OutputJudge()
|
||||
|
||||
def test_valid_numeric_score(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="0.8", feedback="Good output.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.8
|
||||
assert result.feedback == "Good output."
|
||||
|
||||
def test_non_numeric_score_falls_back_to_half(
|
||||
self, judge: OutputJudge
|
||||
) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(
|
||||
score="not-a-number", feedback="N/A"
|
||||
)
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.5
|
||||
|
||||
def test_score_clamped_to_upper_bound(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="1.5", feedback="Great!")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 1.0
|
||||
|
||||
def test_score_clamped_to_lower_bound(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="-0.3", feedback="Terrible.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.0
|
||||
|
||||
def test_empty_score_string_falls_back(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="", feedback="No score.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.5
|
||||
|
||||
def test_boundary_score_one(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="1.0", feedback="Perfect.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 1.0
|
||||
|
||||
def test_boundary_score_zero(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score="0.0", feedback="Wrong.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.0
|
||||
|
||||
def test_none_score_falls_back(self, judge: OutputJudge) -> None:
|
||||
judge.judge = MagicMock(
|
||||
return_value=dspy.Prediction(score=None, feedback="Missing.")
|
||||
)
|
||||
result = judge.forward("task", "input", "output")
|
||||
|
||||
assert result.score == 0.5
|
||||
|
||||
|
||||
class TestSyntheticInputGeneratorForward:
|
||||
"""Tests for SyntheticInputGenerator.forward JSON/fallback parsing.
|
||||
|
||||
Mocks the internal ChainOfThought module to isolate parsing logic.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def generator(self) -> SyntheticInputGenerator:
|
||||
return SyntheticInputGenerator()
|
||||
|
||||
def test_valid_json_parsed_correctly(
|
||||
self, generator: SyntheticInputGenerator
|
||||
) -> None:
|
||||
examples_json = json.dumps(["q1", "q2", "q3"])
|
||||
generator.generate = MagicMock(
|
||||
return_value=dspy.Prediction(examples=examples_json)
|
||||
)
|
||||
result = generator.forward("task desc", 3)
|
||||
|
||||
assert result.examples == ["q1", "q2", "q3"]
|
||||
|
||||
def test_malformed_json_triggers_fallback(
|
||||
self, generator: SyntheticInputGenerator
|
||||
) -> None:
|
||||
generator.generate = MagicMock(
|
||||
return_value=dspy.Prediction(
|
||||
examples='Here: "fallback item" and "another one"'
|
||||
)
|
||||
)
|
||||
result = generator.forward("task desc", 2)
|
||||
|
||||
assert result.examples == ["fallback item", "another one"]
|
||||
|
||||
def test_empty_json_array(self, generator: SyntheticInputGenerator) -> None:
|
||||
generator.generate = MagicMock(
|
||||
return_value=dspy.Prediction(examples="[]")
|
||||
)
|
||||
result = generator.forward("task desc", 0)
|
||||
|
||||
assert result.examples == []
|
||||
|
||||
|
||||
class TestInstructionProposerForward:
|
||||
"""Tests for InstructionProposer.forward."""
|
||||
|
||||
@pytest.fixture
|
||||
def proposer(self) -> InstructionProposer:
|
||||
return InstructionProposer()
|
||||
|
||||
def test_returns_new_instruction(self, proposer: InstructionProposer) -> None:
|
||||
proposer.propose = MagicMock(
|
||||
return_value=dspy.Prediction(
|
||||
new_instruction="Be concise and accurate."
|
||||
)
|
||||
)
|
||||
result = proposer.forward(
|
||||
"Be helpful.", "Answer questions.", "Failed: too verbose"
|
||||
)
|
||||
|
||||
assert result.new_instruction == "Be concise and accurate."
|
||||
|
||||
def test_passes_correct_arguments(
|
||||
self, proposer: InstructionProposer
|
||||
) -> None:
|
||||
proposer.propose = MagicMock(
|
||||
return_value=dspy.Prediction(new_instruction="improved")
|
||||
)
|
||||
proposer.forward("current", "task desc", "failures")
|
||||
|
||||
proposer.propose.assert_called_once_with(
|
||||
current_instruction="current",
|
||||
task_description="task desc",
|
||||
failure_examples="failures",
|
||||
)
|
||||
Reference in New Issue
Block a user