Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer
- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
This commit is contained in:
54
tests/unit/test_scoring.py
Normal file
54
tests/unit/test_scoring.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Unit tests for scoring logic."""
|
||||
from __future__ import annotations
|
||||
|
||||
from prometheus.domain.entities import EvalResult, Trajectory
|
||||
from prometheus.domain.scoring import normalize_score, should_accept
|
||||
|
||||
|
||||
def _make_eval(scores: list[float]) -> EvalResult:
|
||||
return EvalResult(
|
||||
scores=scores,
|
||||
feedbacks=[""] * len(scores),
|
||||
trajectories=[
|
||||
Trajectory(f"in{i}", f"out{i}", s, "", "p")
|
||||
for i, s in enumerate(scores)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class TestShouldAccept:
|
||||
def test_accepts_improvement(self) -> None:
|
||||
old = _make_eval([0.3, 0.4])
|
||||
new = _make_eval([0.8, 0.9])
|
||||
assert should_accept(old, new) is True
|
||||
|
||||
def test_rejects_regression(self) -> None:
|
||||
old = _make_eval([0.8, 0.9])
|
||||
new = _make_eval([0.3, 0.4])
|
||||
assert should_accept(old, new) is False
|
||||
|
||||
def test_rejects_equal(self) -> None:
|
||||
old = _make_eval([0.5, 0.5])
|
||||
new = _make_eval([0.5, 0.5])
|
||||
assert should_accept(old, new) is False
|
||||
|
||||
def test_min_improvement_threshold(self) -> None:
|
||||
old = _make_eval([0.5])
|
||||
new = _make_eval([0.6])
|
||||
assert should_accept(old, new, min_improvement=0.2) is False
|
||||
assert should_accept(old, new, min_improvement=0.05) is True
|
||||
|
||||
|
||||
class TestNormalizeScore:
|
||||
def test_clamps_high(self) -> None:
|
||||
assert normalize_score(1.5) == 1.0
|
||||
|
||||
def test_clamps_low(self) -> None:
|
||||
assert normalize_score(-0.5) == 0.0
|
||||
|
||||
def test_passes_within_range(self) -> None:
|
||||
assert normalize_score(0.7) == 0.7
|
||||
|
||||
def test_custom_range(self) -> None:
|
||||
assert normalize_score(15.0, min_val=0.0, max_val=10.0) == 10.0
|
||||
assert normalize_score(-5.0, min_val=0.0, max_val=10.0) == 0.0
|
||||
Reference in New Issue
Block a user