feat: error handling, retry with backoff, and circuit breaker

Add robust error handling to the evolution loop and LLM adapters:
- Retry utility with exponential backoff for transient errors (429, 5xx, timeouts)
- Per-call error isolation in evaluator and judge adapter
- Circuit breaker in EvolutionLoop (trips after N consecutive failures)
- CLI flags: --max-retries, --error-strategy (skip|retry|abort)
- Config fields: max_retries, retry_delay_base, circuit_breaker_threshold, error_strategy
- 16 new unit tests covering all error handling paths

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
FullStackDev
2026-03-29 12:47:55 +00:00
parent f516ca4be6
commit e2d111ce5b
10 changed files with 646 additions and 103 deletions

View File

@@ -11,14 +11,22 @@ import dspy
from prometheus.domain.entities import Prompt, Trajectory
from prometheus.domain.ports import ProposerPort
from prometheus.infrastructure.dspy_modules import InstructionProposer
from prometheus.infrastructure.retry import retry_with_backoff
class DSPyProposerAdapter(ProposerPort):
"""Uses evaluation trajectories to build a failure report and propose a new prompt."""
def __init__(self, lm: dspy.LM) -> None:
def __init__(
self,
lm: dspy.LM,
max_retries: int = 3,
retry_delay_base: float = 1.0,
) -> None:
self._lm = lm
self._proposer = InstructionProposer()
self._max_retries = max_retries
self._retry_delay_base = retry_delay_base
def propose(
self,
@@ -27,13 +35,21 @@ class DSPyProposerAdapter(ProposerPort):
task_description: str,
) -> Prompt:
failure_examples = self._format_failures(trajectories)
with dspy.context(lm=self._lm):
pred = self._proposer(
current_instruction=current_prompt.text,
task_description=task_description,
failure_examples=failure_examples,
)
return Prompt(text=pred.new_instruction)
def _call() -> Prompt:
with dspy.context(lm=self._lm):
pred = self._proposer(
current_instruction=current_prompt.text,
task_description=task_description,
failure_examples=failure_examples,
)
return Prompt(text=pred.new_instruction)
return retry_with_backoff(
_call,
max_retries=self._max_retries,
retry_delay_base=self._retry_delay_base,
)
@staticmethod
def _format_failures(trajectories: list[Trajectory]) -> str: