Prompt-optimizer/tests/unit/test_similarity.py

"""Tests for similarity adapters — exact, BLEU, ROUGE-L, cosine."""
from __future__ import annotations

import pytest

from prometheus.infrastructure.similarity import (
    BleuSimilarity,
    CosineSimilarity,
    ExactMatchSimilarity,
    RougeLSimilarity,
    create_similarity_adapter,
)


class TestExactMatchSimilarity:
    def test_exact_match(self):
        s = ExactMatchSimilarity()
        assert s.compute("Hello World", "Hello World") == 1.0

    def test_case_insensitive(self):
        s = ExactMatchSimilarity()
        assert s.compute("hello world", "HELLO WORLD") == 1.0

    def test_whitespace_trimmed(self):
        s = ExactMatchSimilarity()
        assert s.compute("  hello  ", "hello") == 1.0

    def test_no_match(self):
        s = ExactMatchSimilarity()
        assert s.compute("hello", "world") == 0.0

    def test_partial_no_match(self):
        s = ExactMatchSimilarity()
        assert s.compute("hello world", "hello") == 0.0


class TestBleuSimilarity:
    def test_perfect_match(self):
        s = BleuSimilarity()
        assert s.compute("the cat sat on the mat", "the cat sat on the mat") == 1.0

    def test_no_overlap(self):
        s = BleuSimilarity()
        assert s.compute("aaa bbb ccc", "ddd eee fff") == 0.0

    def test_partial_overlap(self):
        s = BleuSimilarity()
        score = s.compute("the cat sat", "the cat")
        assert 0.0 < score < 1.0

    def test_empty_prediction(self):
        s = BleuSimilarity()
        assert s.compute("", "hello world") == 0.0

    def test_empty_expected(self):
        s = BleuSimilarity()
        assert s.compute("hello world", "") == 0.0

    def test_both_empty(self):
        s = BleuSimilarity()
        assert s.compute("", "") == 0.0

    def test_shorter_prediction_gets_brevity_penalty(self):
        s = BleuSimilarity()
        short = s.compute("cat", "the cat sat on the mat")
        full = s.compute("the cat sat on the mat", "the cat sat on the mat")
        assert short < full


class TestRougeLSimilarity:
    def test_perfect_match(self):
        s = RougeLSimilarity()
        assert s.compute("the cat sat", "the cat sat") == 1.0

    def test_no_overlap(self):
        s = RougeLSimilarity()
        assert s.compute("aaa bbb", "ccc ddd") == 0.0

    def test_partial_overlap(self):
        s = RougeLSimilarity()
        score = s.compute("the cat sat on the mat", "the cat on the rug")
        assert 0.0 < score < 1.0

    def test_empty_prediction(self):
        s = RougeLSimilarity()
        assert s.compute("", "hello") == 0.0

    def test_subsequence(self):
        s = RougeLSimilarity()
        # "cat mat" is a subsequence of "the cat sat on the mat"
        score = s.compute("cat mat", "the cat sat on the mat")
        assert score > 0.0


class TestCosineSimilarity:
    def test_identical_texts(self):
        s = CosineSimilarity()
        assert s.compute("hello world", "hello world") == pytest.approx(1.0)

    def test_no_overlap(self):
        s = CosineSimilarity()
        assert s.compute("aaa bbb", "ccc ddd") == 0.0

    def test_partial_overlap(self):
        s = CosineSimilarity()
        score = s.compute("hello world foo", "hello world bar")
        assert 0.0 < score < 1.0

    def test_empty_prediction(self):
        s = CosineSimilarity()
        assert s.compute("", "hello") == 0.0


class TestCreateSimilarityAdapter:
    def test_create_exact(self):
        adapter = create_similarity_adapter("exact")
        assert isinstance(adapter, ExactMatchSimilarity)

    def test_create_bleu(self):
        adapter = create_similarity_adapter("bleu")
        assert isinstance(adapter, BleuSimilarity)

    def test_create_rouge_l(self):
        adapter = create_similarity_adapter("rouge_l")
        assert isinstance(adapter, RougeLSimilarity)

    def test_create_cosine(self):
        adapter = create_similarity_adapter("cosine")
        assert isinstance(adapter, CosineSimilarity)

    def test_unknown_metric_raises(self):
        with pytest.raises(ValueError, match="Unknown eval metric"):
            create_similarity_adapter("nonexistent")