feat: async/parallel execution with configurable concurrency

Parallelize LLM calls across minibatches to reduce wall-clock time. All domain ports (LLMPort, JudgePort, ProposerPort) are now async. Adapter implementations wrap synchronous DSPy calls with asyncio.to_thread. Judge calls run in parallel within a batch using asyncio.gather + semaphore. Evaluator parallelizes minibatch execution with configurable concurrency. Evolution loop and use case are fully async. Proposer stays sequential. Added --max-concurrency CLI flag and max_concurrency YAML config field. Added async_retry_with_backoff for async error handling. All 139 unit tests pass. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-29 13:15:34 +00:00
parent e2d111ce5b
commit c92ca4a2b8
16 changed files with 297 additions and 159 deletions
--- a/src/prometheus/cli/app.py
+++ b/src/prometheus/cli/app.py
@@ -5,6 +5,7 @@ Typer interface with -i (input) and -o (output) options.
 """
 from __future__ import annotations

+import asyncio
 import logging
 import os
 from dataclasses import asdict
@@ -66,12 +67,30 @@ def optimize(
        "--error-strategy",
        help="How to handle errors: skip | retry | abort.",
    ),
+    max_concurrency: int = typer.Option(
+        5,
+        "--max-concurrency",
+        help="Max parallel LLM calls for minibatch execution and judging.",
+    ),
 ) -> None:
    """Optimize a prompt without any reference data.

    Usage:
        prometheus optimize -i config.yaml -o result.yaml
    """
+    asyncio.run(
+        _async_optimize(input, output, verbose, max_retries, error_strategy, max_concurrency)
+    )
+
+
+async def _async_optimize(
+    input: str,
+    output: str,
+    verbose: bool,
+    max_retries: int,
+    error_strategy: str,
+    max_concurrency: int,
+) -> None:
    # Configure verbose logging
    if verbose:
        logging.basicConfig(level=logging.INFO, format="[PROMETHEUS] %(message)s")
@@ -129,6 +148,7 @@ def optimize(
        retry_delay_base=raw_config.get("retry_delay_base", 1.0),
        circuit_breaker_threshold=raw_config.get("circuit_breaker_threshold", 5),
        error_strategy=raw_config.get("error_strategy", error_strategy),
+        max_concurrency=raw_config.get("max_concurrency", max_concurrency),
        output_path=output,
        verbose=verbose,
    )
@@ -164,6 +184,7 @@ def optimize(
        lm=judge_lm,
        max_retries=config.max_retries,
        retry_delay_base=config.retry_delay_base,
+        max_concurrency=config.max_concurrency,
    )
    proposer_adapter = DSPyProposerAdapter(
        lm=proposer_lm,
@@ -171,7 +192,11 @@ def optimize(
        retry_delay_base=config.retry_delay_base,
    )
    bootstrap = SyntheticBootstrap(generator=synth_adapter, seed=config.seed)
-    evaluator = PromptEvaluator(executor=llm_adapter, judge=judge_adapter)
+    evaluator = PromptEvaluator(
+        executor=llm_adapter,
+        judge=judge_adapter,
+        max_concurrency=config.max_concurrency,
+    )
    use_case = OptimizePromptUseCase(
        evaluator=evaluator,
        proposer=proposer_adapter,
@@ -180,7 +205,7 @@ def optimize(

    # 4. Execute
    with console.status("[bold green]Evolving prompt..."):
-        result = use_case.execute(config)
+        result = await use_case.execute(config)

    # 5. Display results
    _display_result(result)