- Clean architecture (domain/application/infrastructure) - DSPy-based evolution engine with scoring - CLI via pyproject.toml entry point - Unit + integration tests (~300 tests) - Configs for glm-5.1 and glm-4.5-air models - Z.AI endpoint integration
43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
"""
|
|
Bootstrap — synthetic input generation.
|
|
|
|
Creates a pool of test inputs from the task description.
|
|
This replaces the need for a labelled dataset.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import random
|
|
|
|
from prometheus.domain.entities import SyntheticExample
|
|
from prometheus.domain.ports import SyntheticGeneratorPort
|
|
|
|
|
|
class SyntheticBootstrap:
|
|
"""Orchestrates synthetic input generation.
|
|
|
|
Depends only on the abstract port, not on DSPy directly.
|
|
"""
|
|
|
|
def __init__(self, generator: SyntheticGeneratorPort, seed: int = 42):
|
|
self._generator = generator
|
|
self._rng = random.Random(seed)
|
|
|
|
def run(self, task_description: str, n_examples: int) -> list[SyntheticExample]:
|
|
"""Generate the synthetic pool in a single call.
|
|
|
|
Single call minimizes LLM cost (1 call instead of N),
|
|
and the LLM can ensure diversity in a single generation.
|
|
"""
|
|
examples = self._generator.generate_inputs(task_description, n_examples)
|
|
self._rng.shuffle(examples)
|
|
return examples
|
|
|
|
def sample_minibatch(
|
|
self,
|
|
pool: list[SyntheticExample],
|
|
size: int,
|
|
) -> list[SyntheticExample]:
|
|
"""Sample a minibatch from the synthetic pool."""
|
|
size = min(size, len(pool))
|
|
return self._rng.sample(pool, size)
|