Prompt-optimizer/tests/unit/test_adapter_config.py

"""Unit tests for multi-model adapter configuration.

Verifies that each adapter uses its own dspy.LM instance and
that per-model api_base/api_key_env overrides are wired correctly.
"""
from __future__ import annotations

import json
from unittest.mock import MagicMock, patch

import dspy
import pytest

from prometheus.domain.entities import Prompt, SyntheticExample, Trajectory
from prometheus.infrastructure.judge_adapter import DSPyJudgeAdapter
from prometheus.infrastructure.llm_adapter import DSPyLLMAdapter
from prometheus.infrastructure.proposer_adapter import DSPyProposerAdapter
from prometheus.infrastructure.synth_adapter import DSPySyntheticAdapter


@pytest.fixture
def task_lm() -> dspy.LM:
    """Dummy LM for task execution."""
    return dspy.utils.DummyLM([{"output": "task model output"}])


@pytest.fixture
def judge_lm() -> dspy.LM:
    """Dummy LM for judging (ChainOfThought requires reasoning field)."""
    return dspy.utils.DummyLM(
        [
            {"reasoning": "Evaluating output.", "score": "0.8", "feedback": "Good response.", "dimension_scores": "{}"},
        ]
    )


@pytest.fixture
def proposer_lm() -> dspy.LM:
    """Dummy LM for proposing (ChainOfThought requires reasoning field)."""
    return dspy.utils.DummyLM(
        [
            {"reasoning": "Analyzing failures.", "new_instruction": "Improved prompt: be more specific."},
        ]
    )


@pytest.fixture
def synth_lm() -> dspy.LM:
    """Dummy LM for synthetic generation (ChainOfThought requires reasoning field)."""
    return dspy.utils.DummyLM(
        [
            {"reasoning": "Generating examples.", "examples": json.dumps(["input 1", "input 2", "input 3"])},
        ]
    )


class TestDSPyLLMAdapterOwnLM:
    """Bug #2 fix: DSPyLLMAdapter must use the LM it receives, not the global one."""

    @pytest.mark.asyncio
    async def test_uses_provided_lm_not_global(self) -> None:
        local_lm = dspy.utils.DummyLM([{"output": "local response"}])
        global_lm = dspy.utils.DummyLM([{"output": "global response"}])
        dspy.configure(lm=global_lm)

        adapter = DSPyLLMAdapter(lm=local_lm)
        result = await adapter.execute(Prompt(text="test"), "input")

        assert result == "local response"

    @pytest.mark.asyncio
    async def test_does_not_affect_global_lm(self) -> None:
        local_lm = dspy.utils.DummyLM([{"output": "local response"}])
        global_lm = dspy.utils.DummyLM([{"output": "global response"}])
        dspy.configure(lm=global_lm)

        adapter = DSPyLLMAdapter(lm=local_lm)
        await adapter.execute(Prompt(text="test"), "input")

        # Global LM should still be the same
        assert dspy.settings.lm is global_lm


class TestDSPyJudgeAdapterOwnLM:
    """DSPyJudgeAdapter must use its own LM instance."""

    @pytest.mark.asyncio
    async def test_uses_provided_lm(self, judge_lm: dspy.LM) -> None:
        adapter = DSPyJudgeAdapter(lm=judge_lm)
        results = await adapter.judge_batch(
            task_description="Test task",
            pairs=[("input 1", "output 1")],
        )
        assert len(results) == 1
        score, feedback = results[0]
        assert score == 0.8
        assert feedback == "Good response."

    @pytest.mark.asyncio
    async def test_does_not_use_global_lm(self) -> None:
        judge_lm = dspy.utils.DummyLM(
            [{"reasoning": "ok", "score": "0.9", "feedback": "Judge-specific response", "dimension_scores": "{}"}]
        )
        global_lm = dspy.utils.DummyLM([{"reasoning": "no", "score": "0.1", "feedback": "Wrong LM!", "dimension_scores": "{}"}])
        dspy.configure(lm=global_lm)

        adapter = DSPyJudgeAdapter(lm=judge_lm)
        results = await adapter.judge_batch("task", [("in", "out")])
        assert results[0][0] == 0.9


class TestDSPyProposerAdapterOwnLM:
    """DSPyProposerAdapter must use its own LM instance."""

    @pytest.mark.asyncio
    async def test_uses_provided_lm(self, proposer_lm: dspy.LM) -> None:
        adapter = DSPyProposerAdapter(lm=proposer_lm)
        trajectories = [
            Trajectory(
                input_text="test input",
                output_text="test output",
                score=0.3,
                feedback="bad",
                prompt_used="old prompt",
            )
        ]
        result = await adapter.propose(
            current_prompt=Prompt(text="old prompt"),
            trajectories=trajectories,
            task_description="Test task",
        )
        assert "Improved prompt" in result.text

    @pytest.mark.asyncio
    async def test_does_not_use_global_lm(self) -> None:
        proposer_lm = dspy.utils.DummyLM(
            [{"reasoning": "ok", "new_instruction": "proposer-specific"}]
        )
        global_lm = dspy.utils.DummyLM(
            [{"reasoning": "no", "new_instruction": "wrong-global"}]
        )
        dspy.configure(lm=global_lm)

        adapter = DSPyProposerAdapter(lm=proposer_lm)
        result = await adapter.propose(
            current_prompt=Prompt(text="test"),
            trajectories=[],
            task_description="task",
        )
        assert result.text == "proposer-specific"


class TestDSPySyntheticAdapterOwnLM:
    """DSPySyntheticAdapter must use its own LM instance."""

    def test_uses_provided_lm(self, synth_lm: dspy.LM) -> None:
        adapter = DSPySyntheticAdapter(lm=synth_lm)
        results = adapter.generate_inputs("Test task", 3)
        assert len(results) == 3
        assert all(isinstance(ex, SyntheticExample) for ex in results)

    def test_does_not_use_global_lm(self) -> None:
        synth_lm = dspy.utils.DummyLM(
            [{"reasoning": "ok", "examples": json.dumps(["synth-specific"])}]
        )
        global_lm = dspy.utils.DummyLM(
            [{"reasoning": "no", "examples": json.dumps(["wrong-global"])}]
        )
        dspy.configure(lm=global_lm)

        adapter = DSPySyntheticAdapter(lm=synth_lm)
        results = adapter.generate_inputs("task", 1)
        assert results[0].input_text == "synth-specific"


class TestPerModelOverrides:
    """Verify that per-model api_base/api_key_env are passed through to dspy.LM."""

    @patch("prometheus.cli.commands.optimize.dspy.LM")
    def test_per_model_api_base_override(self, mock_lm_cls: MagicMock) -> None:
        """Per-model api_base should be used instead of global."""
        mock_lm_cls.return_value = MagicMock()

        from prometheus.application.dto import OptimizationConfig

        config = OptimizationConfig(
            seed_prompt="test",
            task_description="test",
            task_model="openai/gpt-4o-mini",
            judge_model="openai/gpt-4o",
            proposer_model="openai/gpt-4o",
            synth_model="openai/gpt-4o",
            judge_api_base="https://judge.example.com/v1",
            judge_api_key_env="JUDGE_API_KEY",
        )

        # Verify config carries the overrides
        assert config.judge_api_base == "https://judge.example.com/v1"
        assert config.judge_api_key_env == "JUDGE_API_KEY"
        assert config.task_api_base is None

    def test_config_defaults_to_none(self) -> None:
        from prometheus.application.dto import OptimizationConfig

        config = OptimizationConfig(seed_prompt="test", task_description="test")
        assert config.task_api_base is None
        assert config.task_api_key_env is None
        assert config.judge_api_base is None
        assert config.judge_api_key_env is None
        assert config.proposer_api_base is None
        assert config.proposer_api_key_env is None
        assert config.synth_api_base is None
        assert config.synth_api_key_env is None