- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
100 lines
2.6 KiB
Python
100 lines
2.6 KiB
Python
"""Unit tests for domain entities."""
|
|
from __future__ import annotations
|
|
|
|
from prometheus.domain.entities import (
|
|
Candidate,
|
|
EvalResult,
|
|
OptimizationState,
|
|
Prompt,
|
|
SyntheticExample,
|
|
Trajectory,
|
|
)
|
|
|
|
|
|
class TestPrompt:
|
|
def test_prompt_text(self) -> None:
|
|
p = Prompt(text="Hello")
|
|
assert p.text == "Hello"
|
|
|
|
def test_prompt_len(self) -> None:
|
|
p = Prompt(text="Hello")
|
|
assert len(p) == 5
|
|
|
|
def test_prompt_frozen(self) -> None:
|
|
p = Prompt(text="Hello")
|
|
try:
|
|
p.text = "World" # type: ignore[misc]
|
|
raise AssertionError("Should have raised FrozenInstanceError")
|
|
except AttributeError:
|
|
pass
|
|
|
|
def test_prompt_default_metadata(self) -> None:
|
|
p = Prompt(text="Hello")
|
|
assert p.metadata == {}
|
|
|
|
def test_prompt_custom_metadata(self) -> None:
|
|
p = Prompt(text="Hello", metadata={"key": "value"})
|
|
assert p.metadata["key"] == "value"
|
|
|
|
|
|
class TestSyntheticExample:
|
|
def test_default_category(self) -> None:
|
|
ex = SyntheticExample(input_text="test")
|
|
assert ex.category == "default"
|
|
|
|
def test_default_id(self) -> None:
|
|
ex = SyntheticExample(input_text="test")
|
|
assert ex.id == 0
|
|
|
|
|
|
class TestEvalResult:
|
|
def test_total_score(self) -> None:
|
|
result = EvalResult(
|
|
scores=[0.3, 0.5, 0.4],
|
|
feedbacks=["a", "b", "c"],
|
|
trajectories=[],
|
|
)
|
|
assert result.total_score == 1.2
|
|
|
|
def test_mean_score(self) -> None:
|
|
result = EvalResult(
|
|
scores=[0.3, 0.5, 0.4],
|
|
feedbacks=["a", "b", "c"],
|
|
trajectories=[],
|
|
)
|
|
assert abs(result.mean_score - 0.4) < 1e-9
|
|
|
|
def test_mean_score_empty(self) -> None:
|
|
result = EvalResult(scores=[], feedbacks=[], trajectories=[])
|
|
assert result.mean_score == 0.0
|
|
|
|
|
|
class TestTrajectory:
|
|
def test_trajectory_fields(self) -> None:
|
|
t = Trajectory(
|
|
input_text="in",
|
|
output_text="out",
|
|
score=0.8,
|
|
feedback="good",
|
|
prompt_used="test",
|
|
)
|
|
assert t.input_text == "in"
|
|
assert t.score == 0.8
|
|
|
|
|
|
class TestCandidate:
|
|
def test_candidate_defaults(self) -> None:
|
|
c = Candidate(prompt=Prompt(text="test"))
|
|
assert c.best_score == 0.0
|
|
assert c.generation == 0
|
|
assert c.parent_id is None
|
|
|
|
|
|
class TestOptimizationState:
|
|
def test_default_state(self) -> None:
|
|
state = OptimizationState()
|
|
assert state.iteration == 0
|
|
assert state.best_candidate is None
|
|
assert state.candidates == []
|
|
assert state.total_llm_calls == 0
|