feat: async/parallel execution with configurable concurrency

Parallelize LLM calls across minibatches to reduce wall-clock time.
All domain ports (LLMPort, JudgePort, ProposerPort) are now async.
Adapter implementations wrap synchronous DSPy calls with asyncio.to_thread.
Judge calls run in parallel within a batch using asyncio.gather + semaphore.
Evaluator parallelizes minibatch execution with configurable concurrency.
Evolution loop and use case are fully async. Proposer stays sequential.
Added --max-concurrency CLI flag and max_concurrency YAML config field.
Added async_retry_with_backoff for async error handling.
All 139 unit tests pass.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
FullStackDev
2026-03-29 13:15:34 +00:00
parent e2d111ce5b
commit c92ca4a2b8
16 changed files with 297 additions and 159 deletions

View File

@@ -5,6 +5,7 @@ Typer interface with -i (input) and -o (output) options.
"""
from __future__ import annotations
import asyncio
import logging
import os
from dataclasses import asdict
@@ -66,12 +67,30 @@ def optimize(
"--error-strategy",
help="How to handle errors: skip | retry | abort.",
),
max_concurrency: int = typer.Option(
5,
"--max-concurrency",
help="Max parallel LLM calls for minibatch execution and judging.",
),
) -> None:
"""Optimize a prompt without any reference data.
Usage:
prometheus optimize -i config.yaml -o result.yaml
"""
asyncio.run(
_async_optimize(input, output, verbose, max_retries, error_strategy, max_concurrency)
)
async def _async_optimize(
input: str,
output: str,
verbose: bool,
max_retries: int,
error_strategy: str,
max_concurrency: int,
) -> None:
# Configure verbose logging
if verbose:
logging.basicConfig(level=logging.INFO, format="[PROMETHEUS] %(message)s")
@@ -129,6 +148,7 @@ def optimize(
retry_delay_base=raw_config.get("retry_delay_base", 1.0),
circuit_breaker_threshold=raw_config.get("circuit_breaker_threshold", 5),
error_strategy=raw_config.get("error_strategy", error_strategy),
max_concurrency=raw_config.get("max_concurrency", max_concurrency),
output_path=output,
verbose=verbose,
)
@@ -164,6 +184,7 @@ def optimize(
lm=judge_lm,
max_retries=config.max_retries,
retry_delay_base=config.retry_delay_base,
max_concurrency=config.max_concurrency,
)
proposer_adapter = DSPyProposerAdapter(
lm=proposer_lm,
@@ -171,7 +192,11 @@ def optimize(
retry_delay_base=config.retry_delay_base,
)
bootstrap = SyntheticBootstrap(generator=synth_adapter, seed=config.seed)
evaluator = PromptEvaluator(executor=llm_adapter, judge=judge_adapter)
evaluator = PromptEvaluator(
executor=llm_adapter,
judge=judge_adapter,
max_concurrency=config.max_concurrency,
)
use_case = OptimizePromptUseCase(
evaluator=evaluator,
proposer=proposer_adapter,
@@ -180,7 +205,7 @@ def optimize(
# 4. Execute
with console.status("[bold green]Evolving prompt..."):
result = use_case.execute(config)
result = await use_case.execute(config)
# 5. Display results
_display_result(result)