feat: v0.2.0 sprint — ground truth eval, crossover/mutation, checkpointing, similarity guards, dataset loader, CLI commands, extended test coverage
Aggregates all v0.2.0 sprint work (GARAA-30 through GARAA-40) and fixes 2 integration tests that broke when the codebase went async (DSPyLLMAdapter and full pipeline tests now properly await coroutines). 277 tests pass (260 unit + 17 integration). Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
278
tests/unit/test_cli.py
Normal file
278
tests/unit/test_cli.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""Tests for the CLI interface — prometheus optimize, version, etc.
|
||||
|
||||
Uses Typer's CliRunner for isolated command testing.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from prometheus.application.dto import OptimizationResult
|
||||
from prometheus.cli.app import app
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
class TestCLIOptimize:
|
||||
"""Tests for the `prometheus optimize` command."""
|
||||
|
||||
def _write_config(self, tmp_path: Path, **overrides: object) -> Path:
|
||||
"""Write a minimal valid config YAML and return its path."""
|
||||
data = {
|
||||
"seed_prompt": "You are a helpful assistant.",
|
||||
"task_description": "Answer factual questions accurately.",
|
||||
}
|
||||
data.update(overrides)
|
||||
config_file = tmp_path / "config.yaml"
|
||||
with open(config_file, "w") as f:
|
||||
yaml.dump(data, f)
|
||||
return config_file
|
||||
|
||||
def test_optimize_with_valid_config(self, tmp_path: Path) -> None:
|
||||
config_file = self._write_config(tmp_path)
|
||||
output_file = tmp_path / "output.yaml"
|
||||
|
||||
mock_result = OptimizationResult(
|
||||
optimized_prompt="Improved prompt",
|
||||
initial_prompt="You are a helpful assistant.",
|
||||
iterations_used=5,
|
||||
total_llm_calls=50,
|
||||
initial_score=0.3,
|
||||
final_score=0.9,
|
||||
improvement=0.6,
|
||||
history=[],
|
||||
)
|
||||
|
||||
mock_uc = AsyncMock()
|
||||
mock_uc.execute.return_value = mock_result
|
||||
|
||||
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
||||
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
||||
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
||||
mock_llm_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
||||
mock_judge_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
||||
mock_prop_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.dspy"):
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"optimize",
|
||||
"-i",
|
||||
str(config_file),
|
||||
"-o",
|
||||
str(output_file),
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Optimized Prompt" in result.output
|
||||
|
||||
def test_optimize_missing_input_file(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
["optimize", "-i", "/nonexistent/config.yaml"],
|
||||
)
|
||||
assert result.exit_code != 0
|
||||
|
||||
def test_optimize_with_verbose_flag(self, tmp_path: Path) -> None:
|
||||
config_file = self._write_config(tmp_path)
|
||||
output_file = tmp_path / "output.yaml"
|
||||
|
||||
mock_result = OptimizationResult(
|
||||
optimized_prompt="Improved",
|
||||
initial_prompt="test",
|
||||
iterations_used=1,
|
||||
total_llm_calls=10,
|
||||
initial_score=0.3,
|
||||
final_score=0.8,
|
||||
improvement=0.5,
|
||||
history=[],
|
||||
)
|
||||
|
||||
mock_uc = AsyncMock()
|
||||
mock_uc.execute.return_value = mock_result
|
||||
|
||||
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
||||
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
||||
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
||||
mock_llm_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
||||
mock_judge_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
||||
mock_prop_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.dspy"):
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"optimize",
|
||||
"-i",
|
||||
str(config_file),
|
||||
"-o",
|
||||
str(output_file),
|
||||
"-v",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
def test_optimize_displays_metrics(self, tmp_path: Path) -> None:
|
||||
config_file = self._write_config(tmp_path)
|
||||
output_file = tmp_path / "output.yaml"
|
||||
|
||||
mock_result = OptimizationResult(
|
||||
optimized_prompt="Better prompt",
|
||||
initial_prompt="test",
|
||||
iterations_used=3,
|
||||
total_llm_calls=30,
|
||||
initial_score=0.40,
|
||||
final_score=0.85,
|
||||
improvement=0.45,
|
||||
history=[],
|
||||
)
|
||||
|
||||
mock_uc = AsyncMock()
|
||||
mock_uc.execute.return_value = mock_result
|
||||
|
||||
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
||||
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
||||
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
||||
mock_llm_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
||||
mock_judge_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
||||
mock_prop_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.dspy"):
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"optimize",
|
||||
"-i",
|
||||
str(config_file),
|
||||
"-o",
|
||||
str(output_file),
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "0.40" in result.output
|
||||
assert "0.85" in result.output
|
||||
assert "+0.45" in result.output
|
||||
|
||||
def test_optimize_with_max_concurrency_flag(self, tmp_path: Path) -> None:
|
||||
config_file = self._write_config(tmp_path)
|
||||
output_file = tmp_path / "output.yaml"
|
||||
|
||||
mock_result = OptimizationResult(
|
||||
optimized_prompt="Better prompt",
|
||||
initial_prompt="test",
|
||||
iterations_used=1,
|
||||
total_llm_calls=10,
|
||||
initial_score=0.3,
|
||||
final_score=0.8,
|
||||
improvement=0.5,
|
||||
history=[],
|
||||
)
|
||||
|
||||
mock_uc = AsyncMock()
|
||||
mock_uc.execute.return_value = mock_result
|
||||
|
||||
with patch("prometheus.cli.commands.optimize.OptimizePromptUseCase", return_value=mock_uc):
|
||||
with patch("prometheus.cli.commands.optimize.DSPySyntheticAdapter"):
|
||||
with patch("prometheus.cli.commands.optimize.DSPyLLMAdapter") as mock_llm_cls:
|
||||
mock_llm_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyJudgeAdapter") as mock_judge_cls:
|
||||
mock_judge_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.DSPyProposerAdapter") as mock_prop_cls:
|
||||
mock_prop_cls.return_value = MagicMock()
|
||||
with patch("prometheus.cli.commands.optimize.dspy"):
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"optimize",
|
||||
"-i",
|
||||
str(config_file),
|
||||
"-o",
|
||||
str(output_file),
|
||||
"--max-concurrency",
|
||||
"10",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
class TestCLIHelp:
|
||||
"""Tests for CLI help and no-args behavior."""
|
||||
|
||||
def test_no_args_shows_help(self) -> None:
|
||||
result = runner.invoke(app, [])
|
||||
# Typer uses exit code 2 when no_args_is_help=True
|
||||
assert result.exit_code in (0, 2)
|
||||
assert "PROMETHEUS" in result.output or "Usage" in result.output
|
||||
|
||||
def test_optimize_help(self) -> None:
|
||||
result = runner.invoke(app, ["optimize", "--help"])
|
||||
assert result.exit_code == 0
|
||||
assert "input" in result.output.lower() or "INPUT" in result.output
|
||||
|
||||
def test_version_help(self) -> None:
|
||||
result = runner.invoke(app, ["version", "--help"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
def test_init_help(self) -> None:
|
||||
result = runner.invoke(app, ["init", "--help"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
def test_list_help(self) -> None:
|
||||
result = runner.invoke(app, ["list", "--help"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
class TestCLIVersion:
|
||||
"""Tests for the `prometheus version` command."""
|
||||
|
||||
def test_version_prints_version(self) -> None:
|
||||
result = runner.invoke(app, ["version"])
|
||||
assert result.exit_code == 0
|
||||
assert "PROMETHEUS" in result.output
|
||||
assert "0.1.0" in result.output
|
||||
|
||||
|
||||
class TestCLIList:
|
||||
"""Tests for the `prometheus list` command."""
|
||||
|
||||
def test_list_no_runs(self, tmp_path: Path) -> None:
|
||||
result = runner.invoke(app, ["list", "-d", str(tmp_path)])
|
||||
assert result.exit_code == 0
|
||||
assert "No optimization runs found" in result.output
|
||||
|
||||
def test_list_with_result(self, tmp_path: Path) -> None:
|
||||
result_data = {
|
||||
"optimized_prompt": "Better prompt for testing",
|
||||
"initial_prompt": "test",
|
||||
"iterations_used": 5,
|
||||
"total_llm_calls": 50,
|
||||
"initial_score": 0.30,
|
||||
"final_score": 0.90,
|
||||
"improvement": 0.60,
|
||||
"history": [],
|
||||
}
|
||||
result_file = tmp_path / "output.yaml"
|
||||
import yaml as _yaml
|
||||
with open(result_file, "w") as f:
|
||||
_yaml.dump(result_data, f)
|
||||
|
||||
result = runner.invoke(app, ["list", "-d", str(tmp_path)])
|
||||
assert result.exit_code == 0
|
||||
assert "0.30" in result.output
|
||||
assert "0.90" in result.output
|
||||
|
||||
def test_list_nonexistent_directory(self) -> None:
|
||||
result = runner.invoke(app, ["list", "-d", "/nonexistent/dir"])
|
||||
assert result.exit_code == 1
|
||||
Reference in New Issue
Block a user