Prompt-optimizer/src/prometheus/application/bootstrap.py

"""
Bootstrap — synthetic input generation.

Creates a pool of test inputs from the task description.
This replaces the need for a labelled dataset.
"""
from __future__ import annotations

import random

from prometheus.domain.entities import SyntheticExample
from prometheus.domain.ports import SyntheticGeneratorPort


class SyntheticBootstrap:
    """Orchestrates synthetic input generation.

    Depends only on the abstract port, not on DSPy directly.
    """

    def __init__(self, generator: SyntheticGeneratorPort, seed: int = 42):
        self._generator = generator
        self._rng = random.Random(seed)

    def run(self, task_description: str, n_examples: int) -> list[SyntheticExample]:
        """Generate the synthetic pool in a single call.

        Single call minimizes LLM cost (1 call instead of N),
        and the LLM can ensure diversity in a single generation.
        """
        examples = self._generator.generate_inputs(task_description, n_examples)
        self._rng.shuffle(examples)
        return examples

    def sample_minibatch(
        self,
        pool: list[SyntheticExample],
        size: int,
    ) -> list[SyntheticExample]:
        """Sample a minibatch from the synthetic pool."""
        size = min(size, len(pool))
        return self._rng.sample(pool, size)