Skip to content

Bridge

A Bridge acts as an intermediary between a Task and a ModelWrapper. It is responsible for translating task-specific requirements (like prompts and schemas) into a format the model wrapper understands, and conversely, integrating model outputs back into the document's results.


Bases: ABC

Bridge base class.

Source code in sieves/tasks/predictive/bridges.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class Bridge[TaskPromptSignature, TaskResult, ModelWrapperInferenceMode](abc.ABC):
    """Bridge base class."""

    def __init__(
        self,
        task_id: str,
        prompt_instructions: str | None,
        overwrite: bool,
        model_settings: ModelSettings,
        prompt_signature: type[pydantic.BaseModel],
        model_type: ModelType,
        fewshot_examples: Sequence[pydantic.BaseModel] = (),
    ):
        """Initialize new bridge.

        :param task_id: Task ID.
        :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
        :param overwrite: Whether to overwrite text with produced text. Considered only by bridges for tasks producing
            fluent text - like translation, summarization, PII masking, etc.
        :param model_settings: Model settings including inference_mode.
        :param prompt_signature: Pydantic model class representing the task's output schema.
        :param model_type: Model type.
        :param fewshot_examples: Few-shot examples.
        """
        self._task_id = task_id
        self._custom_prompt_instructions = prompt_instructions
        self._overwrite = overwrite
        self._model_settings = model_settings
        self._pydantic_signature = prompt_signature
        self._model_type = model_type
        self._fewshot_examples = fewshot_examples

        self._validate()

    def _validate(self) -> None:
        """Validate configuration.

        No-op by default. Executed at the end of __init__().
        """

    @property
    @abc.abstractmethod
    def _default_prompt_instructions(self) -> str:
        """Return default prompt instructions.

        Instructions are injected at the beginning of each prompt.

        :return: Default prompt instructions.
        """

    @property
    def _prompt_instructions(self) -> str:
        """Returns prompt instructions.

        :returns: If `_custom_prompt_instructions` is set, this is used. Otherwise, `_default_prompt_instructions` is
            used.
        """
        return self._custom_prompt_instructions or self._default_prompt_instructions

    @property
    def _prompt_example_xml(self) -> str | None:
        """Return prompt template for example injection.

        Examples are injected between instructions and conclusions.

        :return: Default prompt example template.
        """
        if not self._fewshot_examples:
            return None

        # format_as_xml handles escaping and structured formatting.
        # Passing a list of models usually results in an <examples> root tag.
        return format_as_xml(self._fewshot_examples).strip()

    @property
    def _prompt_conclusion(self) -> str | None:
        """Return prompt conclusion.

        Prompt conclusions are injected at the end of each prompt.

        :return: Default prompt conclusion.
        """
        return None

    @property
    def model_settings(self) -> ModelSettings:
        """Return model settings.

        :return: Model settings.
        """
        return self._model_settings

    @property
    def model_type(self) -> ModelType:
        """Return model type.

        :return: Model type.
        """
        return self._model_type

    @property
    def prompt_template(self) -> str:
        """Return prompt template.

        Chains `_prompt_instructions`, `_prompt_example_xml` and `_prompt_conclusion`.

        Note: different model have different expectations as to how a prompt should look like. E.g. outlines supports
        the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these
        things in a different value in the workflow and hence expects the prompt not to include these things. Mind
        model-specific expectations when creating a prompt template.
        :return str | None: Prompt template as string. None if not used by model wrapper.
        """
        instructions = inspect.cleandoc(self._custom_prompt_instructions or self._prompt_instructions)
        examples = (self._prompt_example_xml or "").strip()
        conclusion = inspect.cleandoc(self._prompt_conclusion or "")

        prompt_parts = [instructions]
        if examples:
            prompt_parts.append(examples)
        if conclusion:
            prompt_parts.append(conclusion)

        return "\n\n".join(prompt_parts).strip()

    @property
    def prompt_signature(self) -> type[TaskPromptSignature] | TaskPromptSignature:
        """Create output signature.

        E.g.: `Signature` in DSPy, Pydantic objects in outlines, JSON schema in jsonformers.
        This is model type-specific.

        :return type[_TaskPromptSignature] | _TaskPromptSignature: Output signature object. This can be an instance
            (e.g. a regex string) or a class (e.g. a Pydantic class).
        """
        # Extract framework-specific kwargs if needed.
        kwargs: dict[str, Any] = {}
        if self.model_settings.inference_mode:
            kwargs["inference_mode"] = self.model_settings.inference_mode

        return convert_to_signature(  # type: ignore[invalid-return-type]
            model_cls=self._pydantic_signature,
            model_type=self.model_type,
            **kwargs,
        )

    @property
    @abc.abstractmethod
    def inference_mode(self) -> ModelWrapperInferenceMode:
        """Return inference mode.

        :return ModelWrapperInferenceMode: Inference mode.
        """

    def extract(self, docs: Sequence[Doc]) -> Sequence[dict[str, Any]]:
        """Extract all values from doc instances that are to be injected into the prompts.

        :param docs: Docs to extract values from.
        :return: All values from doc instances that are to be injected into the prompts as a sequence.
        """
        return [{"text": doc.text if doc.text else None} for doc in docs]

    @abc.abstractmethod
    def integrate(self, results: Sequence[TaskResult], docs: list[Doc]) -> list[Doc]:
        """Integrate results into Doc instances.

        :param results: Results from prompt executable.
        :param docs: Doc instances to update.
        :return: Updated doc instances as a list.
        """

    @abc.abstractmethod
    def consolidate(self, results: Sequence[TaskResult], docs_offsets: list[tuple[int, int]]) -> Sequence[TaskResult]:
        """Consolidate results for document chunks into document results.

        :param results: Results per document chunk.
        :param docs_offsets: Chunk offsets per document. Chunks per document can be obtained with
            `results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]]`.
        :return: Results per document as a sequence.
        """

inference_mode abstractmethod property

Return inference mode.

Returns:

Type Description
ModelWrapperInferenceMode

Inference mode.

model_settings property

Return model settings.

Returns:

Type Description
ModelSettings

Model settings.

model_type property

Return model type.

Returns:

Type Description
ModelType

Model type.

prompt_signature property

Create output signature.

E.g.: Signature in DSPy, Pydantic objects in outlines, JSON schema in jsonformers. This is model type-specific.

Returns:

Type Description
type[TaskPromptSignature] | TaskPromptSignature

Output signature object. This can be an instance (e.g. a regex string) or a class (e.g. a Pydantic class).

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_xml and _prompt_conclusion.

Note: different model have different expectations as to how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind model-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by model wrapper.

__init__(task_id, prompt_instructions, overwrite, model_settings, prompt_signature, model_type, fewshot_examples=())

Initialize new bridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
overwrite bool

Whether to overwrite text with produced text. Considered only by bridges for tasks producing fluent text - like translation, summarization, PII masking, etc.

required
model_settings ModelSettings

Model settings including inference_mode.

required
prompt_signature type[BaseModel]

Pydantic model class representing the task's output schema.

required
model_type ModelType

Model type.

required
fewshot_examples Sequence[BaseModel]

Few-shot examples.

()
Source code in sieves/tasks/predictive/bridges.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    overwrite: bool,
    model_settings: ModelSettings,
    prompt_signature: type[pydantic.BaseModel],
    model_type: ModelType,
    fewshot_examples: Sequence[pydantic.BaseModel] = (),
):
    """Initialize new bridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param overwrite: Whether to overwrite text with produced text. Considered only by bridges for tasks producing
        fluent text - like translation, summarization, PII masking, etc.
    :param model_settings: Model settings including inference_mode.
    :param prompt_signature: Pydantic model class representing the task's output schema.
    :param model_type: Model type.
    :param fewshot_examples: Few-shot examples.
    """
    self._task_id = task_id
    self._custom_prompt_instructions = prompt_instructions
    self._overwrite = overwrite
    self._model_settings = model_settings
    self._pydantic_signature = prompt_signature
    self._model_type = model_type
    self._fewshot_examples = fewshot_examples

    self._validate()

consolidate(results, docs_offsets) abstractmethod

Consolidate results for document chunks into document results.

Parameters:

Name Type Description Default
results Sequence[TaskResult]

Results per document chunk.

required
docs_offsets list[tuple[int, int]]

Chunk offsets per document. Chunks per document can be obtained with results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]].

required

Returns:

Type Description
Sequence[TaskResult]

Results per document as a sequence.

Source code in sieves/tasks/predictive/bridges.py
195
196
197
198
199
200
201
202
203
@abc.abstractmethod
def consolidate(self, results: Sequence[TaskResult], docs_offsets: list[tuple[int, int]]) -> Sequence[TaskResult]:
    """Consolidate results for document chunks into document results.

    :param results: Results per document chunk.
    :param docs_offsets: Chunk offsets per document. Chunks per document can be obtained with
        `results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]]`.
    :return: Results per document as a sequence.
    """

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Sequence[Doc]

Docs to extract values from.

required

Returns:

Type Description
Sequence[dict[str, Any]]

All values from doc instances that are to be injected into the prompts as a sequence.

Source code in sieves/tasks/predictive/bridges.py
178
179
180
181
182
183
184
def extract(self, docs: Sequence[Doc]) -> Sequence[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return: All values from doc instances that are to be injected into the prompts as a sequence.
    """
    return [{"text": doc.text if doc.text else None} for doc in docs]

integrate(results, docs) abstractmethod

Integrate results into Doc instances.

Parameters:

Name Type Description Default
results Sequence[TaskResult]

Results from prompt executable.

required
docs list[Doc]

Doc instances to update.

required

Returns:

Type Description
list[Doc]

Updated doc instances as a list.

Source code in sieves/tasks/predictive/bridges.py
186
187
188
189
190
191
192
193
@abc.abstractmethod
def integrate(self, results: Sequence[TaskResult], docs: list[Doc]) -> list[Doc]:
    """Integrate results into Doc instances.

    :param results: Results from prompt executable.
    :param docs: Doc instances to update.
    :return: Updated doc instances as a list.
    """