Skip to content

Bridge

Bases: Generic[TaskPromptSignature, TaskResult, EngineInferenceMode], ABC

Bridge base class.

Source code in sieves/tasks/predictive/bridges.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
class Bridge(Generic[TaskPromptSignature, TaskResult, EngineInferenceMode], abc.ABC):
    """Bridge base class."""

    def __init__(self, task_id: str, prompt_instructions: str | None, overwrite: bool):
        """Initialize new bridge.

        :param task_id: Task ID.
        :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
        :param overwrite: Whether to overwrite text with produced text. Considered only by bridges for tasks producing
            fluent text - like translation, summarization, PII masking, etc.
        """
        self._task_id = task_id
        self._custom_prompt_instructions = prompt_instructions
        self._overwrite = overwrite

    @property
    @abc.abstractmethod
    def _default_prompt_instructions(self) -> str:
        """Return default prompt instructions.

        Instructions are injected at the beginning of each prompt.

        :return: Default prompt instructions.
        """

    @property
    def _prompt_instructions(self) -> str:
        """Returns prompt instructions.

        :returns: If `_custom_prompt_instructions` is set, this is used. Otherwise, `_default_prompt_instructions` is
            used.
        """
        return self._custom_prompt_instructions or self._default_prompt_instructions

    @property
    @abc.abstractmethod
    def _prompt_example_template(self) -> str | None:
        """Return default prompt template for example injection.

        Examples are injected between instructions and conclusions.

        :return: Default prompt example template.
        """

    @property
    @abc.abstractmethod
    def _prompt_conclusion(self) -> str | None:
        """Return prompt conclusion.

        Prompt conclusions are injected at the end of each prompt.

        :return: Default prompt conclusion.
        """

    @property
    def prompt_template(self) -> str:
        """Return prompt template.

        Chains `_prompt_instructions`, `_prompt_example_template` and `_prompt_conclusion`.

        Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the
        Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in
        a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific
        expectations when creating a prompt template.
        :return str | None: Prompt template as string. None if not used by engine.
        """
        return f"""
        {self._custom_prompt_instructions or self._prompt_instructions}
        {self._prompt_example_template or ""}
        {self._prompt_conclusion or ""}
        """

    @property
    @abc.abstractmethod
    def prompt_signature(self) -> type[TaskPromptSignature] | TaskPromptSignature:
        """Create output signature.

        E.g.: `Signature` in DSPy, Pydantic objects in outlines, JSON schema in jsonformers.
        This is engine-specific.

        :return type[_TaskPromptSignature] | _TaskPromptSignature: Output signature object. This can be an instance
            (e.g. a regex string) or a class (e.g. a Pydantic class).
        """

    @property
    @abc.abstractmethod
    def inference_mode(self) -> EngineInferenceMode:
        """Return inference mode.

        :return EngineInferenceMode: Inference mode.
        """

    def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
        """Extract all values from doc instances that are to be injected into the prompts.

        :param docs: Docs to extract values from.
        :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
        """
        return ({"text": doc.text if doc.text else None} for doc in docs)

    @abc.abstractmethod
    def integrate(self, results: Iterable[TaskResult], docs: Iterable[Doc]) -> Iterable[Doc]:
        """Integrate results into Doc instances.

        :param results: Results from prompt executable.
        :param docs: Doc instances to update.
        :return Iterable[Doc]: Updated doc instances.
        """

    @abc.abstractmethod
    def consolidate(self, results: Iterable[TaskResult], docs_offsets: list[tuple[int, int]]) -> Iterable[TaskResult]:
        """Consolidate results for document chunks into document results.

        :param results: Results per document chunk.
        :param docs_offsets: Chunk offsets per document. Chunks per document can be obtained with
            `results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]]`.
        :return Iterable[_TaskResult]: Results per document.
        """

inference_mode abstractmethod property

Return inference mode.

Returns:

Type Description
EngineInferenceMode

Inference mode.

prompt_signature abstractmethod property

Create output signature.

E.g.: Signature in DSPy, Pydantic objects in outlines, JSON schema in jsonformers. This is engine-specific.

Returns:

Type Description
type[TaskPromptSignature] | TaskPromptSignature

Output signature object. This can be an instance (e.g. a regex string) or a class (e.g. a Pydantic class).

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, overwrite)

Initialize new bridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
overwrite bool

Whether to overwrite text with produced text. Considered only by bridges for tasks producing fluent text - like translation, summarization, PII masking, etc.

required
Source code in sieves/tasks/predictive/bridges.py
21
22
23
24
25
26
27
28
29
30
31
def __init__(self, task_id: str, prompt_instructions: str | None, overwrite: bool):
    """Initialize new bridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param overwrite: Whether to overwrite text with produced text. Considered only by bridges for tasks producing
        fluent text - like translation, summarization, PII masking, etc.
    """
    self._task_id = task_id
    self._custom_prompt_instructions = prompt_instructions
    self._overwrite = overwrite

consolidate(results, docs_offsets) abstractmethod

Consolidate results for document chunks into document results.

Parameters:

Name Type Description Default
results Iterable[TaskResult]

Results per document chunk.

required
docs_offsets list[tuple[int, int]]

Chunk offsets per document. Chunks per document can be obtained with results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]].

required

Returns:

Type Description
Iterable[TaskResult]

Results per document.

Source code in sieves/tasks/predictive/bridges.py
127
128
129
130
131
132
133
134
135
@abc.abstractmethod
def consolidate(self, results: Iterable[TaskResult], docs_offsets: list[tuple[int, int]]) -> Iterable[TaskResult]:
    """Consolidate results for document chunks into document results.

    :param results: Results per document chunk.
    :param docs_offsets: Chunk offsets per document. Chunks per document can be obtained with
        `results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]]`.
    :return Iterable[_TaskResult]: Results per document.
    """

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

integrate(results, docs) abstractmethod

Integrate results into Doc instances.

Parameters:

Name Type Description Default
results Iterable[TaskResult]

Results from prompt executable.

required
docs Iterable[Doc]

Doc instances to update.

required

Returns:

Type Description
Iterable[Doc]

Updated doc instances.

Source code in sieves/tasks/predictive/bridges.py
118
119
120
121
122
123
124
125
@abc.abstractmethod
def integrate(self, results: Iterable[TaskResult], docs: Iterable[Doc]) -> Iterable[Doc]:
    """Integrate results into Doc instances.

    :param results: Results from prompt executable.
    :param docs: Doc instances to update.
    :return Iterable[Doc]: Updated doc instances.
    """

Bases: Bridge[list[str], Result, InferenceMode]

Bridge for GliX models.

Source code in sieves/tasks/predictive/bridges.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
class GliXBridge(Bridge[list[str], glix_.Result, glix_.InferenceMode]):
    """Bridge for GliX models."""

    def __init__(
        self,
        task_id: str,
        prompt_instructions: str | None,
        prompt_signature: tuple[str, ...] | list[str],
        inference_mode: glix_.InferenceMode,
        label_whitelist: tuple[str, ...] | None = None,
        only_keep_best: bool = False,
    ):
        """Initialize GliX bridge.

        :param task_id: Task ID.
        :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
        :param prompt_signature: Prompt signature.
        :param inference_mode: Inference mode.
        :param label_whitelist: Labels to record predictions for. If None, predictions for all labels are recorded.
        :param only_keep_best: Whether to only return the result with the highest score.
        """
        super().__init__(
            task_id=task_id,
            prompt_instructions=prompt_instructions,
            overwrite=False,
        )
        self._prompt_signature = prompt_signature
        self._inference_mode = inference_mode
        self._label_whitelist = label_whitelist
        self._has_scores = inference_mode in (
            glix_.InferenceMode.classification,
            glix_.InferenceMode.question_answering,
        )
        self._only_keep_best = only_keep_best
        self._pred_attr: str | None = None

    @override
    @property
    def _default_prompt_instructions(self) -> str:
        # GliNER doesn't support custom instructions.
        return ""

    @override
    @property
    def _prompt_example_template(self) -> str | None:
        return None

    @override
    @property
    def _prompt_conclusion(self) -> str | None:
        return None

    @override
    @property
    def prompt_signature(self) -> list[str]:
        return list(self._prompt_signature)

    @override
    @property
    def inference_mode(self) -> glix_.InferenceMode:
        return self._inference_mode

    @override
    def integrate(self, results: Iterable[glix_.Result], docs: Iterable[Doc]) -> Iterable[Doc]:
        for doc, result in zip(docs, results):
            if self._has_scores:
                doc.results[self._task_id] = []
                for res in sorted(result, key=lambda x: x["score"], reverse=True):
                    assert isinstance(res, dict)
                    doc.results[self._task_id].append((res[self._pred_attr], res["score"]))

                if self._only_keep_best:
                    doc.results[self._task_id] = doc.results[self._task_id][0]
            else:
                doc.results[self._task_id] = result
        return docs

    @override
    def consolidate(
        self, results: Iterable[glix_.Result], docs_offsets: list[tuple[int, int]]
    ) -> Iterable[glix_.Result]:
        results = list(results)

        # Determine label scores for chunks per document.
        for doc_offset in docs_offsets:
            # Prediction key exists: this is label-score situation. Extract scores and average.
            if self._has_scores:
                scores: dict[str, float] = defaultdict(lambda: 0)

                for res in results[doc_offset[0] : doc_offset[1]]:
                    seen_attrs: set[str] = set()

                    for entry in res:
                        assert isinstance(entry, dict)
                        # Fetch attribute name for predicted dict. Note that this assumes a (ATTR, score) structure.
                        if self._pred_attr is None:
                            self._pred_attr = [k for k in entry.keys() if k != "score"][0]
                        assert isinstance(self._pred_attr, str)
                        assert isinstance(entry[self._pred_attr], str)
                        assert isinstance(entry["score"], float)
                        label = entry[self._pred_attr]
                        assert isinstance(label, str)

                        # GliNER may return multiple results with the same attribute value (e.g. in classification:
                        # multiple scores for the same label). We ignore those.
                        if label in seen_attrs:
                            continue
                        seen_attrs.add(label)

                        # Ignore if whitelist set and predicted label isn't in whitelist.
                        if self._label_whitelist and label not in self._label_whitelist:
                            continue
                        scores[label] += entry["score"]

                # No predictions, yield empty list.
                if self._pred_attr is None:
                    yield []

                else:
                    # Average score, sort by it in descending order.
                    assert self._pred_attr is not None
                    sorted_scores: list[dict[str, str | float]] = sorted(
                        (
                            {self._pred_attr: attr, "score": score / (doc_offset[1] - doc_offset[0])}
                            for attr, score in scores.items()
                        ),
                        key=lambda x: x["score"],
                        reverse=True,
                    )
                    yield sorted_scores

            else:
                for res in results[doc_offset[0] : doc_offset[1]]:
                    yield res

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, prompt_signature, inference_mode, label_whitelist=None, only_keep_best=False)

Initialize GliX bridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
prompt_signature tuple[str, ...] | list[str]

Prompt signature.

required
inference_mode InferenceMode

Inference mode.

required
label_whitelist tuple[str, ...] | None

Labels to record predictions for. If None, predictions for all labels are recorded.

None
only_keep_best bool

Whether to only return the result with the highest score.

False
Source code in sieves/tasks/predictive/bridges.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    prompt_signature: tuple[str, ...] | list[str],
    inference_mode: glix_.InferenceMode,
    label_whitelist: tuple[str, ...] | None = None,
    only_keep_best: bool = False,
):
    """Initialize GliX bridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param prompt_signature: Prompt signature.
    :param inference_mode: Inference mode.
    :param label_whitelist: Labels to record predictions for. If None, predictions for all labels are recorded.
    :param only_keep_best: Whether to only return the result with the highest score.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._prompt_signature = prompt_signature
    self._inference_mode = inference_mode
    self._label_whitelist = label_whitelist
    self._has_scores = inference_mode in (
        glix_.InferenceMode.classification,
        glix_.InferenceMode.question_answering,
    )
    self._only_keep_best = only_keep_best
    self._pred_attr: str | None = None

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)