"""Tests for similarity adapters — exact, BLEU, ROUGE-L, cosine.""" from __future__ import annotations import pytest from prometheus.infrastructure.similarity import ( BleuSimilarity, CosineSimilarity, ExactMatchSimilarity, RougeLSimilarity, create_similarity_adapter, ) class TestExactMatchSimilarity: def test_exact_match(self): s = ExactMatchSimilarity() assert s.compute("Hello World", "Hello World") == 1.0 def test_case_insensitive(self): s = ExactMatchSimilarity() assert s.compute("hello world", "HELLO WORLD") == 1.0 def test_whitespace_trimmed(self): s = ExactMatchSimilarity() assert s.compute(" hello ", "hello") == 1.0 def test_no_match(self): s = ExactMatchSimilarity() assert s.compute("hello", "world") == 0.0 def test_partial_no_match(self): s = ExactMatchSimilarity() assert s.compute("hello world", "hello") == 0.0 class TestBleuSimilarity: def test_perfect_match(self): s = BleuSimilarity() assert s.compute("the cat sat on the mat", "the cat sat on the mat") == 1.0 def test_no_overlap(self): s = BleuSimilarity() assert s.compute("aaa bbb ccc", "ddd eee fff") == 0.0 def test_partial_overlap(self): s = BleuSimilarity() score = s.compute("the cat sat", "the cat") assert 0.0 < score < 1.0 def test_empty_prediction(self): s = BleuSimilarity() assert s.compute("", "hello world") == 0.0 def test_empty_expected(self): s = BleuSimilarity() assert s.compute("hello world", "") == 0.0 def test_both_empty(self): s = BleuSimilarity() assert s.compute("", "") == 0.0 def test_shorter_prediction_gets_brevity_penalty(self): s = BleuSimilarity() short = s.compute("cat", "the cat sat on the mat") full = s.compute("the cat sat on the mat", "the cat sat on the mat") assert short < full class TestRougeLSimilarity: def test_perfect_match(self): s = RougeLSimilarity() assert s.compute("the cat sat", "the cat sat") == 1.0 def test_no_overlap(self): s = RougeLSimilarity() assert s.compute("aaa bbb", "ccc ddd") == 0.0 def test_partial_overlap(self): s = RougeLSimilarity() score = s.compute("the cat sat on the mat", "the cat on the rug") assert 0.0 < score < 1.0 def test_empty_prediction(self): s = RougeLSimilarity() assert s.compute("", "hello") == 0.0 def test_subsequence(self): s = RougeLSimilarity() # "cat mat" is a subsequence of "the cat sat on the mat" score = s.compute("cat mat", "the cat sat on the mat") assert score > 0.0 class TestCosineSimilarity: def test_identical_texts(self): s = CosineSimilarity() assert s.compute("hello world", "hello world") == pytest.approx(1.0) def test_no_overlap(self): s = CosineSimilarity() assert s.compute("aaa bbb", "ccc ddd") == 0.0 def test_partial_overlap(self): s = CosineSimilarity() score = s.compute("hello world foo", "hello world bar") assert 0.0 < score < 1.0 def test_empty_prediction(self): s = CosineSimilarity() assert s.compute("", "hello") == 0.0 class TestCreateSimilarityAdapter: def test_create_exact(self): adapter = create_similarity_adapter("exact") assert isinstance(adapter, ExactMatchSimilarity) def test_create_bleu(self): adapter = create_similarity_adapter("bleu") assert isinstance(adapter, BleuSimilarity) def test_create_rouge_l(self): adapter = create_similarity_adapter("rouge_l") assert isinstance(adapter, RougeLSimilarity) def test_create_cosine(self): adapter = create_similarity_adapter("cosine") assert isinstance(adapter, CosineSimilarity) def test_unknown_metric_raises(self): with pytest.raises(ValueError, match="Unknown eval metric"): create_similarity_adapter("nonexistent")