Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer
- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
This commit is contained in:
0
tests/integration/__init__.py
Normal file
0
tests/integration/__init__.py
Normal file
29
tests/integration/test_dspy_adapters.py
Normal file
29
tests/integration/test_dspy_adapters.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Integration tests for DSPy adapters using DSPy mock LM."""
|
||||
from __future__ import annotations
|
||||
|
||||
import dspy
|
||||
import pytest
|
||||
|
||||
from prometheus.domain.entities import Prompt
|
||||
from prometheus.infrastructure.llm_adapter import DSPyLLMAdapter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_lm() -> dspy.LM:
|
||||
"""Create a DSPy mock LM that returns predictable responses."""
|
||||
lm = dspy.utils.DummyLM(
|
||||
[
|
||||
{"output": "Mock output response"},
|
||||
]
|
||||
)
|
||||
dspy.configure(lm=lm)
|
||||
return lm
|
||||
|
||||
|
||||
class TestDSPyLLMAdapter:
|
||||
def test_execute_returns_response(self, mock_lm: dspy.LM) -> None:
|
||||
adapter = DSPyLLMAdapter(model="openai/gpt-4o-mini")
|
||||
prompt = Prompt(text="Answer the question.")
|
||||
result = adapter.execute(prompt, "What is 2+2?")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
74
tests/integration/test_full_pipeline.py
Normal file
74
tests/integration/test_full_pipeline.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""End-to-end pipeline test with mocked LLM calls."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from prometheus.application.bootstrap import SyntheticBootstrap
|
||||
from prometheus.application.dto import OptimizationConfig
|
||||
from prometheus.application.evaluator import PromptEvaluator
|
||||
from prometheus.application.use_cases import OptimizePromptUseCase
|
||||
from prometheus.domain.entities import EvalResult, Prompt, SyntheticExample, Trajectory
|
||||
from prometheus.domain.ports import JudgePort, LLMPort, ProposerPort
|
||||
|
||||
|
||||
def _make_eval(scores: list[float]) -> EvalResult:
|
||||
return EvalResult(
|
||||
scores=scores,
|
||||
feedbacks=["feedback"] * len(scores),
|
||||
trajectories=[
|
||||
Trajectory(f"in{i}", f"out{i}", s, "feedback", "prompt")
|
||||
for i, s in enumerate(scores)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class TestFullPipeline:
|
||||
def test_pipeline_produces_result(self) -> None:
|
||||
"""Full pipeline with mocked ports produces an OptimizationResult."""
|
||||
mock_llm = MagicMock(spec=LLMPort)
|
||||
mock_llm.execute.return_value = "mock response"
|
||||
|
||||
mock_judge = MagicMock(spec=JudgePort)
|
||||
# Initial eval (low), then alternating current/new evals per iteration
|
||||
eval_sequence = [
|
||||
_make_eval([0.3, 0.3, 0.3, 0.3, 0.3]), # initial seed eval
|
||||
]
|
||||
for _ in range(5): # 5 iterations
|
||||
eval_sequence.append(_make_eval([0.4, 0.4, 0.4, 0.4, 0.4])) # current eval
|
||||
eval_sequence.append(_make_eval([0.6, 0.6, 0.6, 0.6, 0.6])) # new eval (accepted)
|
||||
mock_judge.judge_batch.return_value = [(0.5, "ok")] * 5
|
||||
|
||||
mock_proposer = MagicMock(spec=ProposerPort)
|
||||
mock_proposer.propose.return_value = Prompt(text="Improved prompt")
|
||||
|
||||
evaluator = PromptEvaluator(mock_llm, mock_judge)
|
||||
evaluator.evaluate = MagicMock(side_effect=eval_sequence)
|
||||
|
||||
mock_gen = MagicMock()
|
||||
mock_gen.generate_inputs.return_value = [
|
||||
SyntheticExample(input_text=f"synth input {i}", id=i) for i in range(20)
|
||||
]
|
||||
bootstrap = SyntheticBootstrap(generator=mock_gen, seed=42)
|
||||
|
||||
use_case = OptimizePromptUseCase(
|
||||
evaluator=evaluator,
|
||||
proposer=mock_proposer,
|
||||
bootstrap=bootstrap,
|
||||
)
|
||||
|
||||
config = OptimizationConfig(
|
||||
seed_prompt="Answer questions.",
|
||||
task_description="Answer questions accurately.",
|
||||
max_iterations=5,
|
||||
n_synthetic_inputs=20,
|
||||
minibatch_size=5,
|
||||
seed=42,
|
||||
)
|
||||
|
||||
result = use_case.execute(config)
|
||||
|
||||
assert result.initial_prompt == "Answer questions."
|
||||
assert result.optimized_prompt == "Improved prompt"
|
||||
assert result.iterations_used == 5
|
||||
assert result.total_llm_calls > 0
|
||||
assert result.final_score > result.initial_score
|
||||
Reference in New Issue
Block a user