Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
134 lines
4.0 KiB
Python
134 lines
4.0 KiB
Python
"""Tests for similarity adapters — exact, BLEU, ROUGE-L, cosine."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from prometheus.infrastructure.similarity import (
|
|
BleuSimilarity,
|
|
CosineSimilarity,
|
|
ExactMatchSimilarity,
|
|
RougeLSimilarity,
|
|
create_similarity_adapter,
|
|
)
|
|
|
|
|
|
class TestExactMatchSimilarity:
|
|
def test_exact_match(self):
|
|
s = ExactMatchSimilarity()
|
|
assert s.compute("Hello World", "Hello World") == 1.0
|
|
|
|
def test_case_insensitive(self):
|
|
s = ExactMatchSimilarity()
|
|
assert s.compute("hello world", "HELLO WORLD") == 1.0
|
|
|
|
def test_whitespace_trimmed(self):
|
|
s = ExactMatchSimilarity()
|
|
assert s.compute(" hello ", "hello") == 1.0
|
|
|
|
def test_no_match(self):
|
|
s = ExactMatchSimilarity()
|
|
assert s.compute("hello", "world") == 0.0
|
|
|
|
def test_partial_no_match(self):
|
|
s = ExactMatchSimilarity()
|
|
assert s.compute("hello world", "hello") == 0.0
|
|
|
|
|
|
class TestBleuSimilarity:
|
|
def test_perfect_match(self):
|
|
s = BleuSimilarity()
|
|
assert s.compute("the cat sat on the mat", "the cat sat on the mat") == 1.0
|
|
|
|
def test_no_overlap(self):
|
|
s = BleuSimilarity()
|
|
assert s.compute("aaa bbb ccc", "ddd eee fff") == 0.0
|
|
|
|
def test_partial_overlap(self):
|
|
s = BleuSimilarity()
|
|
score = s.compute("the cat sat", "the cat")
|
|
assert 0.0 < score < 1.0
|
|
|
|
def test_empty_prediction(self):
|
|
s = BleuSimilarity()
|
|
assert s.compute("", "hello world") == 0.0
|
|
|
|
def test_empty_expected(self):
|
|
s = BleuSimilarity()
|
|
assert s.compute("hello world", "") == 0.0
|
|
|
|
def test_both_empty(self):
|
|
s = BleuSimilarity()
|
|
assert s.compute("", "") == 0.0
|
|
|
|
def test_shorter_prediction_gets_brevity_penalty(self):
|
|
s = BleuSimilarity()
|
|
short = s.compute("cat", "the cat sat on the mat")
|
|
full = s.compute("the cat sat on the mat", "the cat sat on the mat")
|
|
assert short < full
|
|
|
|
|
|
class TestRougeLSimilarity:
|
|
def test_perfect_match(self):
|
|
s = RougeLSimilarity()
|
|
assert s.compute("the cat sat", "the cat sat") == 1.0
|
|
|
|
def test_no_overlap(self):
|
|
s = RougeLSimilarity()
|
|
assert s.compute("aaa bbb", "ccc ddd") == 0.0
|
|
|
|
def test_partial_overlap(self):
|
|
s = RougeLSimilarity()
|
|
score = s.compute("the cat sat on the mat", "the cat on the rug")
|
|
assert 0.0 < score < 1.0
|
|
|
|
def test_empty_prediction(self):
|
|
s = RougeLSimilarity()
|
|
assert s.compute("", "hello") == 0.0
|
|
|
|
def test_subsequence(self):
|
|
s = RougeLSimilarity()
|
|
# "cat mat" is a subsequence of "the cat sat on the mat"
|
|
score = s.compute("cat mat", "the cat sat on the mat")
|
|
assert score > 0.0
|
|
|
|
|
|
class TestCosineSimilarity:
|
|
def test_identical_texts(self):
|
|
s = CosineSimilarity()
|
|
assert s.compute("hello world", "hello world") == pytest.approx(1.0)
|
|
|
|
def test_no_overlap(self):
|
|
s = CosineSimilarity()
|
|
assert s.compute("aaa bbb", "ccc ddd") == 0.0
|
|
|
|
def test_partial_overlap(self):
|
|
s = CosineSimilarity()
|
|
score = s.compute("hello world foo", "hello world bar")
|
|
assert 0.0 < score < 1.0
|
|
|
|
def test_empty_prediction(self):
|
|
s = CosineSimilarity()
|
|
assert s.compute("", "hello") == 0.0
|
|
|
|
|
|
class TestCreateSimilarityAdapter:
|
|
def test_create_exact(self):
|
|
adapter = create_similarity_adapter("exact")
|
|
assert isinstance(adapter, ExactMatchSimilarity)
|
|
|
|
def test_create_bleu(self):
|
|
adapter = create_similarity_adapter("bleu")
|
|
assert isinstance(adapter, BleuSimilarity)
|
|
|
|
def test_create_rouge_l(self):
|
|
adapter = create_similarity_adapter("rouge_l")
|
|
assert isinstance(adapter, RougeLSimilarity)
|
|
|
|
def test_create_cosine(self):
|
|
adapter = create_similarity_adapter("cosine")
|
|
assert isinstance(adapter, CosineSimilarity)
|
|
|
|
def test_unknown_metric_raises(self):
|
|
with pytest.raises(ValueError, match="Unknown eval metric"):
|
|
create_similarity_adapter("nonexistent")
|