Initial commit: PROMETHEUS v0.1.0 - Prompt optimizer

- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
2026-03-29 11:44:03 +00:00
commit 837a44970f
49 changed files with 6599 additions and 0 deletions
--- a/src/prometheus/infrastructure/dspy_signatures.py
+++ b/src/prometheus/infrastructure/dspy_signatures.py
@@ -0,0 +1,79 @@
+"""
+DSPy Signatures — declarative LLM contracts.
+
+Defines WHAT each LLM call does, not HOW.
+DSPy Signature = input_fields → output_fields + instruction.
+DSPy handles prompting, parsing, and structuring.
+"""
+from __future__ import annotations
+
+import dspy
+
+
+class GenerateSyntheticInputs(dspy.Signature):
+    """Generate diverse, realistic input examples for a given task."""
+
+    task_description: str = dspy.InputField(
+        desc="Description of the task the prompt should accomplish."
+    )
+    n_examples: int = dspy.InputField(
+        desc="Number of examples to generate."
+    )
+    examples: str = dspy.OutputField(
+        desc=(
+            "A JSON array of strings, each being a realistic input "
+            "for the task. Cover: normal cases, edge cases, long inputs, "
+            "short inputs, ambiguous cases, and tricky scenarios."
+        ),
+    )
+
+
+class JudgeOutput(dspy.Signature):
+    """Evaluate the quality of an LLM output for a given task and input.
+
+    Score: 0.0 (completely wrong) to 1.0 (perfect).
+    Feedback: specific, actionable criticism.
+    """
+
+    task_description: str = dspy.InputField(
+        desc="What the assistant is supposed to do."
+    )
+    input_text: str = dspy.InputField(
+        desc="The input provided to the assistant."
+    )
+    output_text: str = dspy.InputField(
+        desc="The assistant's response to evaluate."
+    )
+    score: float = dspy.OutputField(
+        desc="Quality score from 0.0 (wrong) to 1.0 (perfect)."
+    )
+    feedback: str = dspy.OutputField(
+        desc=(
+            "Specific, actionable feedback explaining what's wrong "
+            "with the output and how to improve it. Be critical."
+        ),
+    )
+
+
+class ProposeInstruction(dspy.Signature):
+    """Given a current prompt and examples of where it fails with feedback,
+    propose an improved version of the prompt.
+
+    The new prompt should address all the issues identified in the feedback.
+    """
+
+    current_instruction: str = dspy.InputField(
+        desc="The current prompt/instruction to improve."
+    )
+    task_description: str = dspy.InputField(
+        desc="Description of the task."
+    )
+    failure_examples: str = dspy.InputField(
+        desc=(
+            "Examples of inputs, outputs, scores, and feedback "
+            "showing where the current instruction fails."
+        ),
+    )
+    new_instruction: str = dspy.OutputField(
+        desc="An improved version of the instruction."
+    )