Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
118 lines
3.1 KiB
Python
118 lines
3.1 KiB
Python
"""Shared test fixtures."""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from prometheus.domain.entities import (
|
|
EvalResult,
|
|
Prompt,
|
|
SyntheticExample,
|
|
Trajectory,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def seed_prompt() -> Prompt:
|
|
return Prompt(text="You are a helpful assistant. Answer the question.")
|
|
|
|
|
|
@pytest.fixture
|
|
def task_description() -> str:
|
|
return "Answer factual questions accurately and concisely."
|
|
|
|
|
|
@pytest.fixture
|
|
def synthetic_pool() -> list[SyntheticExample]:
|
|
return [
|
|
SyntheticExample(input_text=f"Test input {i}", id=i) for i in range(20)
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_eval_result() -> EvalResult:
|
|
return EvalResult(
|
|
scores=[0.3, 0.5, 0.4, 0.6, 0.2],
|
|
feedbacks=[
|
|
"Incomplete answer",
|
|
"Missing key detail",
|
|
"Wrong format",
|
|
"Partially correct",
|
|
"Completely off topic",
|
|
],
|
|
trajectories=[
|
|
Trajectory(
|
|
input_text=f"Input {i}",
|
|
output_text=f"Output {i}",
|
|
score=s,
|
|
feedback=f,
|
|
prompt_used="test prompt",
|
|
)
|
|
for i, (s, f) in enumerate(
|
|
zip(
|
|
[0.3, 0.5, 0.4, 0.6, 0.2],
|
|
[
|
|
"Incomplete answer",
|
|
"Missing key detail",
|
|
"Wrong format",
|
|
"Partially correct",
|
|
"Completely off topic",
|
|
],
|
|
)
|
|
)
|
|
],
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_llm_port() -> AsyncMock:
|
|
"""Mock LLMPort that returns canned responses."""
|
|
port = AsyncMock()
|
|
port.execute.return_value = "This is a mock response."
|
|
return port
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_judge_port() -> AsyncMock:
|
|
"""Mock JudgePort that returns moderate scores."""
|
|
port = AsyncMock()
|
|
port.judge_batch.return_value = [
|
|
(0.5, "Moderate quality, needs improvement."),
|
|
] * 5
|
|
return port
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_proposer_port() -> AsyncMock:
|
|
"""Mock ProposerPort that returns a slightly modified prompt."""
|
|
port = AsyncMock()
|
|
port.propose.return_value = Prompt(
|
|
text="You are a very helpful assistant. Answer the question precisely."
|
|
)
|
|
return port
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_crossover_port() -> AsyncMock:
|
|
"""Mock CrossoverPort that combines two parent prompts."""
|
|
port = AsyncMock()
|
|
|
|
async def _crossover(parent_a: Prompt, parent_b: Prompt, task_description: str) -> Prompt:
|
|
return Prompt(text=f"{parent_a.text} Also, {parent_b.text.lower()}")
|
|
|
|
port.crossover = AsyncMock(side_effect=_crossover)
|
|
return port
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_mutation_port() -> AsyncMock:
|
|
"""Mock MutationPort that paraphrases a prompt."""
|
|
port = AsyncMock()
|
|
|
|
async def _mutate(prompt: Prompt, task_description: str, mutation_type: str = "paraphrase") -> Prompt:
|
|
return Prompt(text=f"[{mutation_type}] {prompt.text}")
|
|
|
|
port.mutate = AsyncMock(side_effect=_mutate)
|
|
return port
|