Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer
- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
This commit is contained in:
34
src/prometheus/infrastructure/judge_adapter.py
Normal file
34
src/prometheus/infrastructure/judge_adapter.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
Adapter: LLM-as-Judge.
|
||||
|
||||
Implements the JudgePort via the DSPy OutputJudge module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from prometheus.domain.ports import JudgePort
|
||||
from prometheus.infrastructure.dspy_modules import OutputJudge
|
||||
|
||||
|
||||
class DSPyJudgeAdapter(JudgePort):
|
||||
"""Evaluates a batch of (input, output) pairs by calling the Judge for each.
|
||||
|
||||
Sequential for MVP. Future: parallelize via dspy.Parallel.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._judge = OutputJudge()
|
||||
|
||||
def judge_batch(
|
||||
self,
|
||||
task_description: str,
|
||||
pairs: list[tuple[str, str]],
|
||||
) -> list[tuple[float, str]]:
|
||||
results: list[tuple[float, str]] = []
|
||||
for input_text, output_text in pairs:
|
||||
pred = self._judge(
|
||||
task_description=task_description,
|
||||
input_text=input_text,
|
||||
output_text=output_text,
|
||||
)
|
||||
results.append((pred.score, pred.feedback))
|
||||
return results
|
||||
Reference in New Issue
Block a user