Skip to content

Outlines

Outlines provides structured text generation. sieves works with Outlines models created via outlines.models.

Usage

import outlines
from transformers import AutoModelForCausalLM, AutoTokenizer
from sieves import tasks

# Initialize an Outlines model
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
model = outlines.models.from_transformers(
    AutoModelForCausalLM.from_pretrained(model_name),
    AutoTokenizer.from_pretrained(model_name)
)

# Pass it to a task
task = tasks.SentimentAnalysis(model=model)

Bases: PydanticModelWrapper[PromptSignature, Result, Model, InferenceMode]

ModelWrapper for Outlines with multiple structured inference modes.

Source code in sieves/model_wrappers/outlines_.py
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class Outlines(PydanticModelWrapper[PromptSignature, Result, Model, InferenceMode]):
    """ModelWrapper for Outlines with multiple structured inference modes."""

    @override
    @property
    def inference_modes(self) -> type[InferenceMode]:
        return InferenceMode

    async def _generate_async(
        self,
        generator: (
            outlines.generator.SteerableGenerator
            | outlines.generator.BlackBoxGenerator
            | outlines.generator.AsyncBlackBoxGenerator
        ),
        prompt: str,
    ) -> Result | None:
        """Generate result async.

        :param generator: Generator instance to use for generation.
        :param prompt: Prompt to generate result for.
        :return: Result for prompt. Results are None if corresponding prompt failed.
        """
        result = generator(prompt, **self._inference_kwargs)
        assert isinstance(result, Result) or result is None
        return result

    @override
    def build_executable(
        self,
        inference_mode: InferenceMode,
        prompt_template: str | None,  # noqa: UP007
        prompt_signature: type[PromptSignature] | PromptSignature,
        fewshot_examples: Sequence[pydantic.BaseModel] = (),
    ) -> Executable[Result | None]:
        # Set a moderate default for max_new_tokens for `transformers` models, as they otherwise run into truncation
        # issues.
        inference_kwargs = self._inference_kwargs.copy()
        if isinstance(self._model, outlines.models.transformers.Transformers):
            inference_kwargs = {"max_new_tokens": 1024} | inference_kwargs

        template = self._create_template(prompt_template)

        # Create Generator instance responsible for generating non-parsed text.
        if isinstance(prompt_signature, list):
            prompt_signature = Literal[*prompt_signature]  # type: ignore[invalid-type-form]

        if inference_mode == InferenceMode.regex:
            prompt_signature = outlines.types.Regex(prompt_signature)

        generator = outlines.Generator(self._model, output_type=prompt_signature, **self._init_kwargs)

        def execute(values: Sequence[dict[str, Any]]) -> Sequence[tuple[Result | None, Any, TokenUsage]]:
            """Execute prompts with model wrapper for given values.

            :param values: Values to inject into prompts.
            :return: Sequence of tuples containing results, raw outputs, and token usage. Results are None if
                corresponding prompt failed.
            """

            def generate(prompts: list[str]) -> Iterable[tuple[Result, Any, TokenUsage]]:
                try:
                    results = generator.batch(prompts, **inference_kwargs)
                # Batch mode is not implemented for all Outlines wrappers. Fall back to single-prompt mode in
                # that case.
                except NotImplementedError:
                    calls = [self._generate_async(generator, prompt) for prompt in prompts]
                    results = asyncio.run(self._execute_async_calls(calls))

                # Estimate token usage if tokenizer is available.
                tokenizer = self._get_tokenizer()

                if inference_mode == InferenceMode.json:
                    assert len(results) == len(prompts)
                    assert isinstance(prompt_signature, type) and issubclass(prompt_signature, pydantic.BaseModel)

                    for prompt, result in zip(prompts, results):
                        usage = TokenUsage(
                            input_tokens=self._count_tokens(prompt, tokenizer),
                            output_tokens=self._count_tokens(result, tokenizer),
                        )

                        try:
                            parsed = prompt_signature.model_validate_json(result)
                            yield parsed, result, usage
                        # If naive parsing fails: JSON is potentially invalid. Attempt to repair it, then try again.
                        except pydantic.ValidationError:
                            repaired = json_repair.repair_json(result)
                            parsed = prompt_signature.model_validate_json(repaired)
                            yield parsed, result, usage

                else:
                    for prompt, result in zip(prompts, results):
                        usage = TokenUsage(
                            input_tokens=self._count_tokens(prompt, tokenizer),
                            output_tokens=self._count_tokens(str(result), tokenizer),
                        )

                        yield result, result, usage

            return self._infer(generate, template, values)

        return execute

    @override
    def _get_tokenizer(self) -> Any | None:
        # Outlines models usually have a tokenizer, but some wrappers (like OpenAI) might hide it.
        tokenizer = getattr(self._model, "tokenizer", None)
        if not tokenizer and hasattr(self._model, "model"):
            tokenizer = getattr(self._model.model, "tokenizer", None)

        # Fall back to tiktoken for remote OpenAI-compatible models if no tokenizer found.
        if not tokenizer:
            try:
                # Default to o200k_base for newer models if specific model encoding not found.
                tokenizer = tiktoken.get_encoding("o200k_base")
            except ImportError:
                tokenizer = None

        return tokenizer

model property

Return model instance.

Returns:

Type Description
ModelWrapperModel

Model instance.

model_settings property

Return model settings.

Returns:

Type Description
ModelSettings

Model settings.

__init__(model, model_settings)

Initialize model wrapper with model and model settings.

Parameters:

Name Type Description Default
model ModelWrapperModel

Instantiated model instance.

required
model_settings ModelSettings

Model settings.

required
Source code in sieves/model_wrappers/core.py
37
38
39
40
41
42
43
44
45
46
47
def __init__(self, model: ModelWrapperModel, model_settings: ModelSettings):
    """Initialize model wrapper with model and model settings.

    :param model: Instantiated model instance.
    :param model_settings: Model settings.
    """
    self._model = model
    self._model_settings = model_settings
    self._inference_kwargs = model_settings.inference_kwargs or {}
    self._init_kwargs = model_settings.init_kwargs or {}
    self._strict = model_settings.strict

convert_fewshot_examples(fewshot_examples) staticmethod

Convert few‑shot examples to dicts.

Parameters:

Name Type Description Default
fewshot_examples Sequence[BaseModel]

Fewshot examples to convert.

required

Returns:

Type Description
list[dict[str, Any]]

Fewshot examples as dicts.

Source code in sieves/model_wrappers/core.py
101
102
103
104
105
106
107
108
@staticmethod
def convert_fewshot_examples(fewshot_examples: Sequence[pydantic.BaseModel]) -> list[dict[str, Any]]:
    """Convert few‑shot examples to dicts.

    :param fewshot_examples: Fewshot examples to convert.
    :return: Fewshot examples as dicts.
    """
    return [fs_example.model_dump(serialize_as_any=True) for fs_example in fewshot_examples]