Skip to content

Outlines

Bases: PydanticEngine[PromptSignature, Result, Model, InferenceMode]

Source code in sieves/engines/outlines_.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class Outlines(PydanticEngine[PromptSignature, Result, Model, InferenceMode]):
    @property
    def inference_modes(self) -> type[InferenceMode]:
        return InferenceMode

    def build_executable(
        self,
        inference_mode: InferenceMode,
        prompt_template: str | None,  # noqa: UP007
        prompt_signature: type[PromptSignature] | PromptSignature,
        fewshot_examples: Iterable[pydantic.BaseModel] = (),
    ) -> Executable[Result | None]:
        cls_name = self.__class__.__name__
        template = self._create_template(prompt_template)

        def execute(values: Iterable[dict[str, Any]]) -> Iterable[Result | None]:
            """Execute prompts with engine for given values.
            :param values: Values to inject into prompts.
            :return Iterable[Result | None]: Results for prompts. Results are None if corresponding prompt failed.
            """
            generator_factory: Callable[..., Any] = inference_mode.value[0]

            match inference_mode:
                case InferenceMode.text:
                    seq_generator = generator_factory(self._model, **self._init_kwargs)
                case InferenceMode.regex:
                    assert isinstance(prompt_signature, str), ValueError(
                        "PromptSignature has to be supplied as string in outlines regex mode."
                    )
                    seq_generator = generator_factory(self._model, regex_str=prompt_signature, **self._init_kwargs)
                case InferenceMode.choice:
                    assert isinstance(prompt_signature, list), ValueError(
                        f"PromptSignature has to be supplied as list of strings or enum values in {cls_name} choice "
                        f"mode."
                    )
                    seq_generator = generator_factory(self._model, choices=prompt_signature, **self._init_kwargs)

                case InferenceMode.json:
                    assert isinstance(prompt_signature, type) and issubclass(prompt_signature, pydantic.BaseModel)
                    seq_generator = generator_factory(self._model, schema_object=prompt_signature, **self._init_kwargs)
                case _:
                    raise ValueError(f"Inference mode {inference_mode} not supported by {cls_name} engine.")

            def generate(prompts: list[str]) -> Iterable[Result]:
                yield from seq_generator(prompts, **self._inference_kwargs)

            yield from self._infer(
                generate,
                template,
                values,
                fewshot_examples,
            )

        return execute

_attributes property

Returns attributes to serialize.

Returns:

Type Description
dict[str, Attribute]

Dict of attributes to serialize.

model property

Return model instance.

Returns:

Type Description
EngineModel

Model instance.

__init__(model, init_kwargs=None, inference_kwargs=None, strict_mode=False, batch_size=-1)

Parameters:

Name Type Description Default
model EngineModel

Instantiated model instance.

required
init_kwargs dict[str, Any] | None

Optional kwargs to supply to engine executable at init time.

None
inference_kwargs dict[str, Any] | None

Optional kwargs to supply to engine executable at inference time.

None
strict_mode bool

If True, exception is raised if prompt response can't be parsed correctly.

False
batch_size int

Batch size in processing prompts. -1 will batch all documents in one go. Not all engines support batching.

-1
Source code in sieves/engines/core.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(
    self,
    model: EngineModel,
    init_kwargs: dict[str, Any] | None = None,
    inference_kwargs: dict[str, Any] | None = None,
    strict_mode: bool = False,
    batch_size: int = -1,
):
    """
    :param model: Instantiated model instance.
    :param init_kwargs: Optional kwargs to supply to engine executable at init time.
    :param inference_kwargs: Optional kwargs to supply to engine executable at inference time.
    :param strict_mode: If True, exception is raised if prompt response can't be parsed correctly.
    :param batch_size: Batch size in processing prompts. -1 will batch all documents in one go. Not all engines
        support batching.
    """
    self._model = model
    self._inference_kwargs = inference_kwargs or {}
    self._init_kwargs = init_kwargs or {}
    self._strict_mode = strict_mode
    self._batch_size = self._validate_batch_size(batch_size)

_convert_fewshot_examples(fewshot_examples) staticmethod

Convert fewshot examples from pydantic.BaseModel instance to dicts.

Parameters:

Name Type Description Default
fewshot_examples Iterable[BaseModel]

Fewshot examples to convert.

required

Returns:

Type Description
list[dict[str, Any]]

Fewshot examples as dicts.

Source code in sieves/engines/core.py
 96
 97
 98
 99
100
101
102
103
@staticmethod
def _convert_fewshot_examples(fewshot_examples: Iterable[pydantic.BaseModel]) -> list[dict[str, Any]]:
    """
    Convert fewshot examples from pydantic.BaseModel instance to dicts.
    :param fewshot_examples: Fewshot examples to convert.
    :return: Fewshot examples as dicts.
    """
    return [fs_example.model_dump(serialize_as_any=True) for fs_example in fewshot_examples]

_create_template(template) classmethod

Creates Jinja2 template from template string.

Parameters:

Name Type Description Default
template str | None

Template string.

required

Returns:

Type Description
Template

Jinja2 template.

Source code in sieves/engines/core.py
154
155
156
157
158
159
160
161
@classmethod
def _create_template(cls, template: str | None) -> jinja2.Template:
    """Creates Jinja2 template from template string.
    :param template: Template string.
    :return: Jinja2 template.
    """
    assert template, f"prompt_template has to be provided to {cls.__name__}."
    return jinja2.Template(template)

_execute_async_calls(calls) async staticmethod

Executes batch of async functions.

Parameters:

Name Type Description Default
calls list[Coroutine[Any, Any, Any]] | list[Awaitable[Any]]

Async calls to execute.

required

Returns:

Type Description
Any

Parsed response objects.

Source code in sieves/engines/core.py
135
136
137
138
139
140
141
@staticmethod
async def _execute_async_calls(calls: list[Coroutine[Any, Any, Any]] | list[Awaitable[Any]]) -> Any:
    """Executes batch of async functions.
    :param calls: Async calls to execute.
    :return: Parsed response objects.
    """
    return await asyncio.gather(*calls)

_infer(generator, template, values, fewshot_examples)

Runs inference record by record with exception handling for template- and Pydantic-based engines.

Parameters:

Name Type Description Default
generator Callable[[list[str]], Iterable[EngineResult]]

Callable generating responses.

required
template Template

Prompt template.

required
values Iterable[dict[str, Any]]

Doc values to inject.

required
fewshot_examples Iterable[BaseModel]

Fewshot examples.

required

Returns:

Type Description
Iterable[EngineResult | None]

Results parsed from responses.

Source code in sieves/engines/core.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def _infer(
    self,
    generator: Callable[[list[str]], Iterable[EngineResult]],
    template: jinja2.Template,
    values: Iterable[dict[str, Any]],
    fewshot_examples: Iterable[pydantic.BaseModel],
) -> Iterable[EngineResult | None]:
    """
    Runs inference record by record with exception handling for template- and Pydantic-based engines.
    :param generator: Callable generating responses.
    :param template: Prompt template.
    :param values: Doc values to inject.
    :param fewshot_examples: Fewshot examples.
    :return: Results parsed from responses.
    """
    fewshot_examples_dict = Engine._convert_fewshot_examples(fewshot_examples)
    examples = {"examples": fewshot_examples_dict} if len(fewshot_examples_dict) else {}
    batch_size = self._batch_size if self._batch_size != -1 else sys.maxsize
    # Ensure values are read as generator for standardized batch handling (otherwise we'd have to use different
    # batch handling depending on whether lists/tuples or generators are used).
    values = (v for v in values)

    while batch := [vals for vals in itertools.islice(values, batch_size)]:
        if len(batch) == 0:
            break

        try:
            yield from generator([template.render(**doc_values, **examples) for doc_values in batch])

        except (TypeError, pydantic.ValidationError) as err:
            if self._strict_mode:
                raise ValueError(
                    "Encountered problem when executing prompt. Ensure your few-shot examples and document "
                    "chunks contain sensible information."
                ) from err
            else:
                yield from (None for _ in range(len(batch)))

_validate_batch_size(batch_size)

Validates batch_size. Noop by default.

Parameters:

Name Type Description Default
batch_size int

Specified batch size.

required

Returns:

Type Description
int

Validated batch size.

Source code in sieves/engines/core.py
50
51
52
53
54
55
def _validate_batch_size(self, batch_size: int) -> int:
    """Validates batch_size. Noop by default.
    :param batch_size: Specified batch size.
    :returns int: Validated batch size.
    """
    return batch_size

deserialize(config, **kwargs) classmethod

Generate Engine instance from config.

Parameters:

Name Type Description Default
config Config

Config to generate instance from.

required
kwargs dict[str, Any]

Values to inject into loaded config.

{}

Returns:

Type Description
Engine[EnginePromptSignature, EngineResult, EngineModel, EngineInferenceMode]

Deserialized Engine instance.

Source code in sieves/engines/core.py
124
125
126
127
128
129
130
131
132
133
@classmethod
def deserialize(
    cls, config: Config, **kwargs: dict[str, Any]
) -> Engine[EnginePromptSignature, EngineResult, EngineModel, EngineInferenceMode]:
    """Generate Engine instance from config.
    :param config: Config to generate instance from.
    :param kwargs: Values to inject into loaded config.
    :return: Deserialized Engine instance.
    """
    return cls(**config.to_init_dict(cls, **kwargs))

serialize()

Serializes engine.

Returns:

Type Description
Config

Config instance.

Source code in sieves/engines/core.py
118
119
120
121
122
def serialize(self) -> Config:
    """Serializes engine.
    :return: Config instance.
    """
    return Config.create(self.__class__, self._attributes)