"""Unit tests for scoring logic.""" from __future__ import annotations from prometheus.domain.entities import EvalResult, Trajectory from prometheus.domain.scoring import normalize_score, should_accept def _make_eval(scores: list[float]) -> EvalResult: return EvalResult( scores=scores, feedbacks=[""] * len(scores), trajectories=[ Trajectory(f"in{i}", f"out{i}", s, "", "p") for i, s in enumerate(scores) ], ) class TestShouldAccept: def test_accepts_improvement(self) -> None: old = _make_eval([0.3, 0.4]) new = _make_eval([0.8, 0.9]) assert should_accept(old, new) is True def test_rejects_regression(self) -> None: old = _make_eval([0.8, 0.9]) new = _make_eval([0.3, 0.4]) assert should_accept(old, new) is False def test_rejects_equal(self) -> None: old = _make_eval([0.5, 0.5]) new = _make_eval([0.5, 0.5]) assert should_accept(old, new) is False def test_min_improvement_threshold(self) -> None: old = _make_eval([0.5]) new = _make_eval([0.6]) assert should_accept(old, new, min_improvement=0.2) is False assert should_accept(old, new, min_improvement=0.05) is True class TestNormalizeScore: def test_clamps_high(self) -> None: assert normalize_score(1.5) == 1.0 def test_clamps_low(self) -> None: assert normalize_score(-0.5) == 0.0 def test_passes_within_range(self) -> None: assert normalize_score(0.7) == 0.7 def test_custom_range(self) -> None: assert normalize_score(15.0, min_val=0.0, max_val=10.0) == 10.0 assert normalize_score(-5.0, min_val=0.0, max_val=10.0) == 0.0