Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
279 lines
11 KiB
Python
279 lines
11 KiB
Python
"""Tests for the CLI interface — prometheus optimize, version, etc.
|
|
|
|
Uses Typer's CliRunner for isolated command testing.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
import yaml
|
|
from typer.testing import CliRunner
|
|
|
|
from prometheus.application.dto import OptimizationResult
|
|
from prometheus.cli.app import app
|
|
|
|
runner = CliRunner()
|
|
|
|
|
|
class TestCLIOptimize:
|
|
"""Tests for the `prometheus optimize` command."""
|
|
|
|
def _write_config(self, tmp_path: Path, **overrides: object) -> Path:
|
|
"""Write a minimal valid config YAML and return its path."""
|
|
data = {
|
|
"seed_prompt": "You are a helpful assistant.",
|
|
"task_description": "Answer factual questions accurately.",
|
|
}
|
|
data.update(overrides)
|
|
config_file = tmp_path / "config.yaml"
|
|
with open(config_file, "w") as f:
|
|
yaml.dump(data, f)
|
|
return config_file
|
|
|
|
def test_optimize_with_valid_config(self, tmp_path: Path) -> None:
|
|
config_file = self._write_config(tmp_path)
|
|
output_file = tmp_path / "output.yaml"
|
|
|
|
mock_result = OptimizationResult(
|
|
optimized_prompt="Improved prompt",
|
|
initial_prompt="You are a helpful assistant.",
|
|
iterations_used=5,
|
|
total_llm_calls=50,
|
|
initial_score=0.3,
|
|
final_score=0.9,
|
|
improvement=0.6,
|
|
history=[],
|
|
)
|
|
|
|
mock_uc = AsyncMock()
|
|
mock_uc.execute.return_value = mock_result
|
|
|
|
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
|
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
|
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
|
mock_llm_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
|
mock_judge_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
|
mock_prop_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.dspy"):
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"optimize",
|
|
"-i",
|
|
str(config_file),
|
|
"-o",
|
|
str(output_file),
|
|
],
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert "Optimized Prompt" in result.output
|
|
|
|
def test_optimize_missing_input_file(self) -> None:
|
|
result = runner.invoke(
|
|
app,
|
|
["optimize", "-i", "/nonexistent/config.yaml"],
|
|
)
|
|
assert result.exit_code != 0
|
|
|
|
def test_optimize_with_verbose_flag(self, tmp_path: Path) -> None:
|
|
config_file = self._write_config(tmp_path)
|
|
output_file = tmp_path / "output.yaml"
|
|
|
|
mock_result = OptimizationResult(
|
|
optimized_prompt="Improved",
|
|
initial_prompt="test",
|
|
iterations_used=1,
|
|
total_llm_calls=10,
|
|
initial_score=0.3,
|
|
final_score=0.8,
|
|
improvement=0.5,
|
|
history=[],
|
|
)
|
|
|
|
mock_uc = AsyncMock()
|
|
mock_uc.execute.return_value = mock_result
|
|
|
|
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
|
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
|
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
|
mock_llm_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
|
mock_judge_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
|
mock_prop_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.dspy"):
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"optimize",
|
|
"-i",
|
|
str(config_file),
|
|
"-o",
|
|
str(output_file),
|
|
"-v",
|
|
],
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
|
|
def test_optimize_displays_metrics(self, tmp_path: Path) -> None:
|
|
config_file = self._write_config(tmp_path)
|
|
output_file = tmp_path / "output.yaml"
|
|
|
|
mock_result = OptimizationResult(
|
|
optimized_prompt="Better prompt",
|
|
initial_prompt="test",
|
|
iterations_used=3,
|
|
total_llm_calls=30,
|
|
initial_score=0.40,
|
|
final_score=0.85,
|
|
improvement=0.45,
|
|
history=[],
|
|
)
|
|
|
|
mock_uc = AsyncMock()
|
|
mock_uc.execute.return_value = mock_result
|
|
|
|
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
|
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
|
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
|
mock_llm_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
|
mock_judge_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
|
mock_prop_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.dspy"):
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"optimize",
|
|
"-i",
|
|
str(config_file),
|
|
"-o",
|
|
str(output_file),
|
|
],
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert "0.40" in result.output
|
|
assert "0.85" in result.output
|
|
assert "+0.45" in result.output
|
|
|
|
def test_optimize_with_max_concurrency_flag(self, tmp_path: Path) -> None:
|
|
config_file = self._write_config(tmp_path)
|
|
output_file = tmp_path / "output.yaml"
|
|
|
|
mock_result = OptimizationResult(
|
|
optimized_prompt="Better prompt",
|
|
initial_prompt="test",
|
|
iterations_used=1,
|
|
total_llm_calls=10,
|
|
initial_score=0.3,
|
|
final_score=0.8,
|
|
improvement=0.5,
|
|
history=[],
|
|
)
|
|
|
|
mock_uc = AsyncMock()
|
|
mock_uc.execute.return_value = mock_result
|
|
|
|
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
|
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
|
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
|
mock_llm_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
|
mock_judge_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
|
mock_prop_cls.return_value = MagicMock()
|
|
with patch("prometheus.cli.commands.optimize.dspy"):
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"optimize",
|
|
"-i",
|
|
str(config_file),
|
|
"-o",
|
|
str(output_file),
|
|
"--max-concurrency",
|
|
"10",
|
|
],
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
|
|
|
|
class TestCLIHelp:
|
|
"""Tests for CLI help and no-args behavior."""
|
|
|
|
def test_no_args_shows_help(self) -> None:
|
|
result = runner.invoke(app, [])
|
|
# Typer uses exit code 2 when no_args_is_help=True
|
|
assert result.exit_code in (0, 2)
|
|
assert "PROMETHEUS" in result.output or "Usage" in result.output
|
|
|
|
def test_optimize_help(self) -> None:
|
|
result = runner.invoke(app, ["optimize", "--help"])
|
|
assert result.exit_code == 0
|
|
assert "input" in result.output.lower() or "INPUT" in result.output
|
|
|
|
def test_version_help(self) -> None:
|
|
result = runner.invoke(app, ["version", "--help"])
|
|
assert result.exit_code == 0
|
|
|
|
def test_init_help(self) -> None:
|
|
result = runner.invoke(app, ["init", "--help"])
|
|
assert result.exit_code == 0
|
|
|
|
def test_list_help(self) -> None:
|
|
result = runner.invoke(app, ["list", "--help"])
|
|
assert result.exit_code == 0
|
|
|
|
|
|
class TestCLIVersion:
|
|
"""Tests for the `prometheus version` command."""
|
|
|
|
def test_version_prints_version(self) -> None:
|
|
result = runner.invoke(app, ["version"])
|
|
assert result.exit_code == 0
|
|
assert "PROMETHEUS" in result.output
|
|
assert "0.1.0" in result.output
|
|
|
|
|
|
class TestCLIList:
|
|
"""Tests for the `prometheus list` command."""
|
|
|
|
def test_list_no_runs(self, tmp_path: Path) -> None:
|
|
result = runner.invoke(app, ["list", "-d", str(tmp_path)])
|
|
assert result.exit_code == 0
|
|
assert "No optimization runs found" in result.output
|
|
|
|
def test_list_with_result(self, tmp_path: Path) -> None:
|
|
result_data = {
|
|
"optimized_prompt": "Better prompt for testing",
|
|
"initial_prompt": "test",
|
|
"iterations_used": 5,
|
|
"total_llm_calls": 50,
|
|
"initial_score": 0.30,
|
|
"final_score": 0.90,
|
|
"improvement": 0.60,
|
|
"history": [],
|
|
}
|
|
result_file = tmp_path / "output.yaml"
|
|
import yaml as _yaml
|
|
with open(result_file, "w") as f:
|
|
_yaml.dump(result_data, f)
|
|
|
|
result = runner.invoke(app, ["list", "-d", str(tmp_path)])
|
|
assert result.exit_code == 0
|
|
assert "0.30" in result.output
|
|
assert "0.90" in result.output
|
|
|
|
def test_list_nonexistent_directory(self) -> None:
|
|
result = runner.invoke(app, ["list", "-d", "/nonexistent/dir"])
|
|
assert result.exit_code == 1
|