Skip to content

Classification

Classification predictive task and few‑shot example schemas.

Classification

Bases: PredictiveTask[_TaskPromptSignature, _TaskResult, _TaskBridge]

Predictive task for text classification across multiple engine backends.

Source code in sieves/tasks/predictive/classification/core.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
class Classification(PredictiveTask[_TaskPromptSignature, _TaskResult, _TaskBridge]):
    """Predictive task for text classification across multiple engine backends."""

    def __init__(
        self,
        labels: list[str],
        model: _TaskModel,
        task_id: str | None = None,
        include_meta: bool = True,
        batch_size: int = -1,
        prompt_instructions: str | None = None,
        fewshot_examples: Sequence[FewshotExample] = (),
        label_descriptions: dict[str, str] | None = None,
        multi_label: bool = True,
        generation_settings: GenerationSettings = GenerationSettings(),
    ) -> None:
        """Initialize new PredictiveTask.

        :param labels: Labels to predict.
        :param model: Model to use.
        :param task_id: Task ID.
        :param include_meta: Whether to include meta information generated by the task.
        :param batch_size: Batch size to use for inference. Use -1 to process all documents at once.
        :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
        :param fewshot_examples: Few-shot examples.
        :param label_descriptions: Optional descriptions for each label. If provided, the keys must match the labels.
        :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
            most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
            accuracy.
        :param generation_settings: Generation settings.
        """
        self._labels = labels
        self._label_descriptions = label_descriptions or {}
        self._validate_label_descriptions()
        self._multi_label = multi_label

        super().__init__(
            model=model,
            task_id=task_id,
            include_meta=include_meta,
            batch_size=batch_size,
            overwrite=False,
            prompt_instructions=prompt_instructions,
            fewshot_examples=fewshot_examples,
            generation_settings=generation_settings,
        )
        self._fewshot_examples: Sequence[FewshotExample]

    def _validate_label_descriptions(self) -> None:
        """Validate that all label descriptions correspond to valid labels.

        :raises ValueError: If any label description key is not present in the labels list.
        """
        if not self._label_descriptions:
            return

        invalid_labels = set(self._label_descriptions.keys()) - set(self._labels)
        if invalid_labels:
            raise ValueError(f"Label descriptions contain invalid labels: {invalid_labels}")

    def _init_bridge(self, engine_type: EngineType) -> _TaskBridge:
        """Initialize bridge.

        :return: Engine task.
        :raises ValueError: If engine type is not supported.
        """
        if engine_type == EngineType.glix:
            # GliXBridge needs different arguments than other bridges, hence we instantiate it differently.
            return GliXBridge(
                task_id=self._task_id,
                prompt_instructions=self._custom_prompt_instructions,
                prompt_signature=self._labels,
                inference_mode=glix_.InferenceMode.classification,
                label_whitelist=tuple(self._labels),
                only_keep_best=not self._multi_label,
            )

        bridge_types: dict[EngineType, type[_TaskBridge]] = {
            EngineType.dspy: DSPyClassification,
            EngineType.huggingface: HuggingFaceClassification,
            EngineType.outlines: OutlinesClassification,
            EngineType.langchain: LangChainClassification,
        }

        try:
            bridge_type = bridge_types[engine_type]
            assert not issubclass(bridge_type, GliXBridge)

            return bridge_type(
                task_id=self._task_id,
                prompt_instructions=self._custom_prompt_instructions,
                labels=self._labels,
                label_descriptions=self._label_descriptions,
                multi_label=self._multi_label,
            )
        except KeyError as err:
            raise KeyError(f"Engine type {engine_type} is not supported by {self.__class__.__name__}.") from err

    @override
    @property
    def supports(self) -> set[EngineType]:
        return {
            EngineType.dspy,
            EngineType.glix,
            EngineType.huggingface,
            EngineType.langchain,
            EngineType.outlines,
        }

    def _validate_fewshot_examples(self) -> None:
        label_error_text = (
            "Label mismatch: {task_id} has labels {labels}. Few-shot examples have labels {example_labels}."
        )
        example_type_error_text = "Fewshot example type mismatch: multi_label = {multi_label} requires {example_type}."

        for fs_example in self._fewshot_examples or []:
            if self._multi_label:
                assert isinstance(fs_example, FewshotExampleMultiLabel), TypeError(
                    example_type_error_text.format(example_type=FewshotExampleMultiLabel, multi_label=self._multi_label)
                )
                if any([label not in self._labels for label in fs_example.confidence_per_label]) or not all(
                    [label in fs_example.confidence_per_label for label in self._labels]
                ):
                    raise ValueError(
                        label_error_text.format(
                            task_id=self.id, labels=self._labels, example_labels=fs_example.confidence_per_label.keys()
                        )
                    )
            else:
                assert isinstance(fs_example, FewshotExampleSingleLabel), TypeError(
                    example_type_error_text.format(
                        example_type=FewshotExampleSingleLabel, multi_label=self._multi_label
                    )
                )
                if fs_example.label not in self._labels:
                    raise ValueError(
                        label_error_text.format(task_id=self.id, labels=self._labels, example_labels=(fs_example.label))
                    )

    @property
    def _state(self) -> dict[str, Any]:
        return {
            **super()._state,
            "labels": self._labels,
            "label_descriptions": self._label_descriptions,
        }

    @staticmethod
    def _result_to_scores(result: Any) -> dict[str, float]:
        """Normalize a single result to a mapping of label → score.

        Supports lists of pairs, a single (label, score) pair, a plain
        string label (assumes score 1.0), or a Pydantic model with
        attributes ``label`` and optional ``score``.

        :params result: One result value from ``doc.results``.

        :return: Mapping from label to score.

        :raises TypeError: If the result has an unsupported type or shape.

        """
        if isinstance(result, list) and all(isinstance(item, list | tuple) and len(item) == 2 for item in result):
            return {str(label): float(score) for label, score in result}

        if isinstance(result, tuple) and len(result) == 2:
            label, score = result
            return {str(label): float(score)}

        if isinstance(result, str):
            return {result: 1.0}

        if isinstance(result, pydantic.BaseModel) or hasattr(result, "model_dump"):
            try:
                label = getattr(result, "label")
                score = getattr(result, "score", 1.0)
                return {str(label): float(score)}
            except Exception as exc:
                raise TypeError(f"Unsupported pydantic result shape: {type(result)}") from exc

        raise TypeError(f"Unsupported result type in to_hf_dataset: {type(result)}")

    @override
    def distill(
        self,
        base_model_id: str,
        framework: DistillationFramework,
        data: datasets.Dataset | Sequence[Doc],
        output_path: Path | str,
        val_frac: float,
        init_kwargs: dict[str, Any] | None = None,
        train_kwargs: dict[str, Any] | None = None,
        seed: int | None = None,
    ) -> None:
        init_kwargs = init_kwargs or {}
        train_kwargs = train_kwargs or {}
        output_path = Path(output_path)
        output_path.mkdir(parents=True, exist_ok=True)

        data = self.to_hf_dataset(data) if isinstance(data, Sequence) else data

        required_columns = {"text", "labels"}
        if not required_columns.issubset(data.column_names):
            raise ValueError(f"Dataset must contain columns: {required_columns}. Found: {data.column_names}")

        dataset_splits = self._split_dataset(data, 1 - val_frac, val_frac, seed)
        dataset_splits.save_to_disk(output_path / "data")

        match framework:
            case DistillationFramework.setfit:
                default_init_kwargs: dict[str, Any] = {}
                metric_kwargs: dict[str, Any] = {}

                if self._multi_label:
                    default_init_kwargs["multi_target_strategy"] = "multi-output"
                    metric_kwargs = {"average": "macro"}

                model = setfit.SetFitModel.from_pretrained(base_model_id, **(default_init_kwargs | init_kwargs))

                args = setfit.TrainingArguments(
                    output_dir=str(output_path),
                    eval_strategy="epoch",
                    save_strategy="epoch",
                    load_best_model_at_end=True,
                    **train_kwargs,
                )

                trainer = setfit.Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_splits["train"],
                    eval_dataset=dataset_splits.get("val"),
                    metric="f1",
                    column_mapping={"text": "text", "labels": "label"},
                    metric_kwargs=metric_kwargs,
                )
                trainer.train()
                trainer.model.save_pretrained(output_path)

                metrics = trainer.evaluate()
                with open(output_path / "metrics.json", "w") as f:
                    json.dump(metrics, f, indent=4)

            case DistillationFramework.model2vec:

                def one_hot_to_label(label_indices: list[int]) -> list[str]:
                    """Convert list of label indices into list of labels.

                    :param label_indices: List of label indices.
                    :return: List of labels.
                    """
                    return [self._labels[i] for i, is_label in enumerate(label_indices) if is_label]

                classifier = model2vec.train.StaticModelForClassification.from_pretrained(
                    model_name=base_model_id, **init_kwargs
                )
                classifier.fit(
                    dataset_splits["train"]["text"],
                    [one_hot_to_label(encoded_labels) for encoded_labels in dataset_splits["train"]["labels"]],
                    **train_kwargs,
                )
                classifier.to_pipeline().save_pretrained(output_path)

                metrics = classifier.evaluate(
                    dataset_splits["val"]["text"],
                    [one_hot_to_label(encoded_labels) for encoded_labels in dataset_splits["val"]["labels"]],
                )
                with open(output_path / "metrics.json", "w") as f:
                    json.dump(metrics, f, indent=4)

            case _:
                raise NotImplementedError(
                    f"Unsupported distillation framework for this task: {framework}. "
                    f"Please choose one of {DistillationFramework.setfit, DistillationFramework.model2vec}"
                )

    def to_hf_dataset(self, docs: Iterable[Doc], threshold: float = 0.5) -> datasets.Dataset:
        """Convert results to a Hugging Face dataset with multi-hot labels.

        The emitted dataset contains a ``text`` column and a ``labels`` column which is a multi-hot list aligned to
        ``self._labels``. This method is robust to different result shapes produced by various engines and bridges in
        both single-label and multi-label configurations:
        - ``list[tuple[str, float]]`` for multi-label results
        - ``tuple[str, float]`` for single-label results
        - ``str`` for single-label results (assumes score ``1.0``)
        - ``pydantic.BaseModel`` exposing ``label`` and optional ``score``

        :param docs: Documents whose ``results`` contain outputs for this task id.
        :param threshold: Threshold to convert scores into multi-hot indicators.

        :return: A ``datasets.Dataset`` with ``text`` and multi-hot ``labels``.

        :raises KeyError: If any document is missing this task's results.
        :raises TypeError: If a result cannot be interpreted.

        """
        data: list[dict[str, str | list[bool]]] = []

        # Define metadata and features (multi-hot across declared labels for multi-label).
        if self._multi_label:
            features = datasets.Features(
                {"text": datasets.Value("string"), "labels": datasets.Sequence(datasets.Value("bool"))}
            )
        else:
            features = datasets.Features(
                {"text": datasets.Value("string"), "labels": datasets.ClassLabel(names=self._labels)}
            )

        info = datasets.DatasetInfo(
            description=(
                f"{'Multi-label' if self._multi_label else 'Single-label'} classification dataset with labels "
                f"{self._labels}. Generated with sieves v{Config.get_version()}."
            ),
            features=features,
        )

        try:
            for doc in docs:
                scores = Classification._result_to_scores(doc.results[self._task_id])

                # If multi-label: store one-hot representation.
                if self._multi_label:
                    result_normalized = [int(scores.get(label, 0.0) >= threshold) for label in self._labels]
                # If single-label: get single-label result as is
                else:
                    keys = list(scores.keys())
                    assert len(keys) == 1
                    result_normalized = keys[0]

                data.append({"text": doc.text, "labels": result_normalized})

        except KeyError as err:
            raise KeyError(f"Not all documents have results for this task with ID {self._task_id}") from err

        return datasets.Dataset.from_list(data, features=features, info=info)

    @override
    def _evaluate_optimization_example(
        self, truth: dspy.Example, pred: dspy.Prediction, model: dspy.LM, trace: Any | None = None
    ) -> float:
        if not self._multi_label:
            return 1 - abs(truth["confidence"] - pred["confidence"]) if truth["label"] == pred["label"] else 0

        # For multi-label: compute label-wise accuracy as
        # 1 - abs(true confidence for label - predicted confidence for label)
        # and normalize the sum of label-wise accuracies over all labels.
        accuracy = 0
        for label, confidence in truth["confidence_per_label"].items():
            if label in pred["confidence_per_label"]:
                pred_confidence = max(min(pred["confidence_per_label"][label], 1), 0)
                accuracy += 1 - abs(confidence - pred_confidence)

        return accuracy / len(truth["confidence_per_label"])

fewshot_examples property

Return few-shot examples.

Returns:

Type Description
Sequence[FewshotExample]

Few-shot examples.

id property

Return task ID.

Used by pipeline for results and dependency management.

Returns:

Type Description
str

Task ID.

prompt_signature_description property

Return prompt signature description.

Returns:

Type Description
str | None

Prompt signature description.

prompt_template property

Return prompt template.

Returns:

Type Description
str

Prompt template.

__add__(other)

Chain this task with another task or pipeline using the + operator.

This returns a new Pipeline that executes this task first, followed by the task(s) in other. The original task(s)/pipeline are not mutated.

Cache semantics: - If other is a Pipeline, the resulting pipeline adopts other's use_cache setting (because the left-hand side is a single task). - If other is a Task, the resulting pipeline defaults to use_cache=True.

Parameters:

Name Type Description Default
other Task | Pipeline

A Task or Pipeline to execute after this task.

required

Returns:

Type Description
Pipeline

A new Pipeline representing the chained execution.

Raises:

Type Description
TypeError

If other is not a Task or Pipeline.

Source code in sieves/tasks/core.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def __add__(self, other: Task | Pipeline) -> Pipeline:
    """Chain this task with another task or pipeline using the ``+`` operator.

    This returns a new ``Pipeline`` that executes this task first, followed by the
    task(s) in ``other``. The original task(s)/pipeline are not mutated.

    Cache semantics:
    - If ``other`` is a ``Pipeline``, the resulting pipeline adopts ``other``'s
      ``use_cache`` setting (because the left-hand side is a single task).
    - If ``other`` is a ``Task``, the resulting pipeline defaults to ``use_cache=True``.

    :param other: A ``Task`` or ``Pipeline`` to execute after this task.
    :return: A new ``Pipeline`` representing the chained execution.
    :raises TypeError: If ``other`` is not a ``Task`` or ``Pipeline``.
    """
    # Lazy import to avoid circular dependency at module import time.
    from sieves.pipeline import Pipeline

    if isinstance(other, Pipeline):
        return Pipeline(tasks=[self, *other.tasks], use_cache=other.use_cache)

    if isinstance(other, Task):
        return Pipeline(tasks=[self, other])

    raise TypeError(f"Cannot chain Task with {type(other).__name__}")

__call__(docs)

Execute the task on a set of documents.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Documents to process.

required

Returns:

Type Description
Iterable[Doc]

Processed documents.

Source code in sieves/tasks/predictive/core.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def __call__(self, docs: Iterable[Doc]) -> Iterable[Doc]:
    """Execute the task on a set of documents.

    :param docs: Documents to process.
    :return Iterable[Doc]: Processed documents.
    """
    # 1. Compile expected prompt signatures.
    signature = self._bridge.prompt_signature

    # 2. Build executable.
    executable = self._engine.build_executable(
        inference_mode=self._bridge.inference_mode,
        prompt_template=self.prompt_template,
        prompt_signature=signature,
        fewshot_examples=self._fewshot_examples,
    )

    # Compute batch-wise results.
    batch_size = self._batch_size if self._batch_size > 0 else sys.maxsize
    while docs_batch := [doc for doc in itertools.islice(docs, batch_size)]:
        if len(docs_batch) == 0:
            break

        # 3. Extract values from docs to inject/render those into prompt templates.
        docs_values = list(self._bridge.extract(docs_batch))
        assert len(docs_values) == len(docs_batch)

        # 4. Map extracted docs values onto chunks.
        docs_chunks_offsets: list[tuple[int, int]] = []
        docs_chunks: list[dict[str, Any]] = []
        for doc, doc_values in zip(docs_batch, docs_values):
            assert doc.text
            doc_chunks_values = [doc_values | {"text": chunk} for chunk in (doc.chunks or [doc.text])]
            docs_chunks_offsets.append((len(docs_chunks), len(docs_chunks) + len(doc_chunks_values)))
            docs_chunks.extend(doc_chunks_values)

        # 5. Execute prompts per chunk.
        results = list(executable(docs_chunks))
        assert len(results) == len(docs_chunks)

        # 6. Consolidate chunk results.
        results = list(self._bridge.consolidate(results, docs_chunks_offsets))
        assert len(results) == len(docs_batch)

        # 7. Integrate results into docs.
        docs_batch = self._bridge.integrate(results, docs_batch)

        yield from docs_batch

__init__(labels, model, task_id=None, include_meta=True, batch_size=-1, prompt_instructions=None, fewshot_examples=(), label_descriptions=None, multi_label=True, generation_settings=GenerationSettings())

Initialize new PredictiveTask.

Parameters:

Name Type Description Default
labels list[str]

Labels to predict.

required
model _TaskModel

Model to use.

required
task_id str | None

Task ID.

None
include_meta bool

Whether to include meta information generated by the task.

True
batch_size int

Batch size to use for inference. Use -1 to process all documents at once.

-1
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

None
fewshot_examples Sequence[FewshotExample]

Few-shot examples.

()
label_descriptions dict[str, str] | None

Optional descriptions for each label. If provided, the keys must match the labels.

None
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

True
generation_settings GenerationSettings

Generation settings.

GenerationSettings()
Source code in sieves/tasks/predictive/classification/core.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def __init__(
    self,
    labels: list[str],
    model: _TaskModel,
    task_id: str | None = None,
    include_meta: bool = True,
    batch_size: int = -1,
    prompt_instructions: str | None = None,
    fewshot_examples: Sequence[FewshotExample] = (),
    label_descriptions: dict[str, str] | None = None,
    multi_label: bool = True,
    generation_settings: GenerationSettings = GenerationSettings(),
) -> None:
    """Initialize new PredictiveTask.

    :param labels: Labels to predict.
    :param model: Model to use.
    :param task_id: Task ID.
    :param include_meta: Whether to include meta information generated by the task.
    :param batch_size: Batch size to use for inference. Use -1 to process all documents at once.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param fewshot_examples: Few-shot examples.
    :param label_descriptions: Optional descriptions for each label. If provided, the keys must match the labels.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param generation_settings: Generation settings.
    """
    self._labels = labels
    self._label_descriptions = label_descriptions or {}
    self._validate_label_descriptions()
    self._multi_label = multi_label

    super().__init__(
        model=model,
        task_id=task_id,
        include_meta=include_meta,
        batch_size=batch_size,
        overwrite=False,
        prompt_instructions=prompt_instructions,
        fewshot_examples=fewshot_examples,
        generation_settings=generation_settings,
    )
    self._fewshot_examples: Sequence[FewshotExample]

deserialize(config, **kwargs) classmethod

Generate PredictiveTask instance from config.

Parameters:

Name Type Description Default
config Config

Config to generate instance from.

required
kwargs dict[str, Any]

Values to inject into loaded config.

{}

Returns:

Type Description
PredictiveTask[TaskPromptSignature, TaskResult, TaskBridge]

Deserialized PredictiveTask instance.

Source code in sieves/tasks/predictive/core.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
@classmethod
def deserialize(
    cls, config: Config, **kwargs: dict[str, Any]
) -> PredictiveTask[TaskPromptSignature, TaskResult, TaskBridge]:
    """Generate PredictiveTask instance from config.

    :param config: Config to generate instance from.
    :param kwargs: Values to inject into loaded config.
    :return PredictiveTask[_TaskPromptSignature, _TaskResult, _TaskBridge]: Deserialized PredictiveTask instance.
    """
    init_dict = config.to_init_dict(cls, **kwargs)
    init_dict["generation_settings"] = GenerationSettings.model_validate(init_dict["generation_settings"])

    return cls(**init_dict)

optimize(optimizer, verbose=True)

Optimize task prompt and few-shot examples with the available optimization config.

Updates task to use best prompt and few-shot examples found by the optimizer.

Parameters:

Name Type Description Default
optimizer Optimizer

Optimizer to run.

required
verbose bool

Whether to suppress output. DSPy produces a good amount of logs, so this can be useful to not pollute your terminal. Only warnings and errors will be printed.

True

Returns:

Type Description
tuple[str, Sequence[FewshotExample]]

Best found prompt and few-shot examples.

Source code in sieves/tasks/predictive/core.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def optimize(self, optimizer: optimization.Optimizer, verbose: bool = True) -> tuple[str, Sequence[FewshotExample]]:
    """Optimize task prompt and few-shot examples with the available optimization config.

    Updates task to use best prompt and few-shot examples found by the optimizer.

    :param optimizer: Optimizer to run.
    :param verbose: Whether to suppress output. DSPy produces a good amount of logs, so this can be useful to
        not pollute your terminal. Only warnings and errors will be printed.

    :return tuple[str, Sequence[FewshotExample]]: Best found prompt and few-shot examples.
    """
    assert len(self._fewshot_examples) > 1, "At least two few-shot examples need to be provided to optimize."

    # Run optimizer to get best prompt and few-shot examples.
    signature = self._get_task_signature()
    dspy_examples = [ex.to_dspy() for ex in self._fewshot_examples]
    pred_eval = functools.partial(self._evaluate_optimization_example, model=optimizer.model)

    if verbose:
        best_prompt, best_examples = optimizer(signature, dspy_examples, pred_eval, verbose=verbose)
    else:
        # Temporarily suppress DSPy logs.
        dspy_logger = logging.getLogger("dspy")
        optuna_logger = logging.getLogger("optuna")
        original_dspy_level = dspy_logger.level
        original_optuna_level = optuna_logger.level

        try:
            dspy_logger.setLevel(logging.ERROR)
            optuna_logger.setLevel(logging.ERROR)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                best_prompt, best_examples = optimizer(signature, dspy_examples, pred_eval, verbose=verbose)
        finally:
            dspy_logger.setLevel(original_dspy_level)
            optuna_logger.setLevel(original_optuna_level)

    # Update few-shot examples and prompt instructions.
    fewshot_example_cls = self._fewshot_examples[0].__class__
    self._fewshot_examples = [fewshot_example_cls.from_dspy(ex) for ex in best_examples]
    self._validate_fewshot_examples()
    self._custom_prompt_instructions = best_prompt

    # Reinitialize bridge to use new prompt and few-shot examples.
    self._bridge = self._init_bridge(EngineType.get_engine_type(self._engine))

    return best_prompt, self._fewshot_examples

serialize()

Serialize task.

Returns:

Type Description
Config

Config instance.

Source code in sieves/tasks/core.py
88
89
90
91
92
93
def serialize(self) -> Config:
    """Serialize task.

    :return: Config instance.
    """
    return Config.create(self.__class__, {k: Attribute(value=v) for k, v in self._state.items()})

to_hf_dataset(docs, threshold=0.5)

Convert results to a Hugging Face dataset with multi-hot labels.

The emitted dataset contains a text column and a labels column which is a multi-hot list aligned to self._labels. This method is robust to different result shapes produced by various engines and bridges in both single-label and multi-label configurations: - list[tuple[str, float]] for multi-label results - tuple[str, float] for single-label results - str for single-label results (assumes score 1.0) - pydantic.BaseModel exposing label and optional score

Parameters:

Name Type Description Default
docs Iterable[Doc]

Documents whose results contain outputs for this task id.

required
threshold float

Threshold to convert scores into multi-hot indicators.

0.5

Returns:

Type Description
Dataset

A datasets.Dataset with text and multi-hot labels.

Raises:

Type Description
KeyError

If any document is missing this task's results.

TypeError

If a result cannot be interpreted.

Source code in sieves/tasks/predictive/classification/core.py
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def to_hf_dataset(self, docs: Iterable[Doc], threshold: float = 0.5) -> datasets.Dataset:
    """Convert results to a Hugging Face dataset with multi-hot labels.

    The emitted dataset contains a ``text`` column and a ``labels`` column which is a multi-hot list aligned to
    ``self._labels``. This method is robust to different result shapes produced by various engines and bridges in
    both single-label and multi-label configurations:
    - ``list[tuple[str, float]]`` for multi-label results
    - ``tuple[str, float]`` for single-label results
    - ``str`` for single-label results (assumes score ``1.0``)
    - ``pydantic.BaseModel`` exposing ``label`` and optional ``score``

    :param docs: Documents whose ``results`` contain outputs for this task id.
    :param threshold: Threshold to convert scores into multi-hot indicators.

    :return: A ``datasets.Dataset`` with ``text`` and multi-hot ``labels``.

    :raises KeyError: If any document is missing this task's results.
    :raises TypeError: If a result cannot be interpreted.

    """
    data: list[dict[str, str | list[bool]]] = []

    # Define metadata and features (multi-hot across declared labels for multi-label).
    if self._multi_label:
        features = datasets.Features(
            {"text": datasets.Value("string"), "labels": datasets.Sequence(datasets.Value("bool"))}
        )
    else:
        features = datasets.Features(
            {"text": datasets.Value("string"), "labels": datasets.ClassLabel(names=self._labels)}
        )

    info = datasets.DatasetInfo(
        description=(
            f"{'Multi-label' if self._multi_label else 'Single-label'} classification dataset with labels "
            f"{self._labels}. Generated with sieves v{Config.get_version()}."
        ),
        features=features,
    )

    try:
        for doc in docs:
            scores = Classification._result_to_scores(doc.results[self._task_id])

            # If multi-label: store one-hot representation.
            if self._multi_label:
                result_normalized = [int(scores.get(label, 0.0) >= threshold) for label in self._labels]
            # If single-label: get single-label result as is
            else:
                keys = list(scores.keys())
                assert len(keys) == 1
                result_normalized = keys[0]

            data.append({"text": doc.text, "labels": result_normalized})

    except KeyError as err:
        raise KeyError(f"Not all documents have results for this task with ID {self._task_id}") from err

    return datasets.Dataset.from_list(data, features=features, info=info)

FewshotExampleMultiLabel

Bases: FewshotExample

Few‑shot example for multi‑label classification with per‑label confidences.

Source code in sieves/tasks/predictive/classification/core.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class FewshotExampleMultiLabel(BaseFewshotExample):
    """Few‑shot example for multi‑label classification with per‑label confidences."""

    reasoning: str
    confidence_per_label: dict[str, float]

    @override
    @property
    def target_fields(self) -> Sequence[str]:
        return ("confidence_per_label",)

    @pydantic.model_validator(mode="after")
    def check_confidence(self) -> FewshotExampleMultiLabel:
        """Validate that confidences lie within [0, 1]."""
        if any([conf for conf in self.confidence_per_label.values() if not 0 <= conf <= 1]):
            raise ValueError("Confidence has to be between 0 and 1.")
        return self

input_fields property

Defines which fields are inputs.

Returns:

Type Description
Sequence[str]

Sequence of field names.

check_confidence()

Validate that confidences lie within [0, 1].

Source code in sieves/tasks/predictive/classification/core.py
49
50
51
52
53
54
@pydantic.model_validator(mode="after")
def check_confidence(self) -> FewshotExampleMultiLabel:
    """Validate that confidences lie within [0, 1]."""
    if any([conf for conf in self.confidence_per_label.values() if not 0 <= conf <= 1]):
        raise ValueError("Confidence has to be between 0 and 1.")
    return self

from_dspy(example) classmethod

Convert from dspy.Example.

Parameters:

Name Type Description Default
example Example

Example as dspy.Example.

required

Returns:

Type Description
Self

Example as FewshotExample.

Source code in sieves/tasks/predictive/core.py
76
77
78
79
80
81
82
83
@classmethod
def from_dspy(cls, example: dspy.Example) -> Self:
    """Convert from `dspy.Example`.

    :param example: Example as `dspy.Example`.
    :returns: Example as `FewshotExample`.
    """
    return cls(**example)

to_dspy()

Convert to dspy.Example.

Returns:

Type Description
Example

Example as dspy.Example.

Source code in sieves/tasks/predictive/core.py
69
70
71
72
73
74
def to_dspy(self) -> dspy.Example:
    """Convert to `dspy.Example`.

    :returns: Example as `dspy.Example`.
    """
    return dspy.Example(**Engine.convert_fewshot_examples([self])[0]).with_inputs(self.input_fields)

FewshotExampleSingleLabel

Bases: FewshotExample

Few‑shot example for single‑label classification with a global confidence.

Source code in sieves/tasks/predictive/classification/core.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class FewshotExampleSingleLabel(BaseFewshotExample):
    """Few‑shot example for single‑label classification with a global confidence."""

    reasoning: str
    label: str
    confidence: float

    @override
    @property
    def target_fields(self) -> Sequence[str]:
        return ("label", "confidence")

    @pydantic.model_validator(mode="after")
    def check_confidence(self) -> FewshotExampleSingleLabel:
        """Check confidence value.

        Return:
            FewshotExampleSingleLabel instance.

        """
        if not (0 <= self.confidence <= 1):
            raise ValueError("Confidence has to be between 0 and 1.")
        return self

input_fields property

Defines which fields are inputs.

Returns:

Type Description
Sequence[str]

Sequence of field names.

check_confidence()

Check confidence value.

Return: FewshotExampleSingleLabel instance.

Source code in sieves/tasks/predictive/classification/core.py
69
70
71
72
73
74
75
76
77
78
79
@pydantic.model_validator(mode="after")
def check_confidence(self) -> FewshotExampleSingleLabel:
    """Check confidence value.

    Return:
        FewshotExampleSingleLabel instance.

    """
    if not (0 <= self.confidence <= 1):
        raise ValueError("Confidence has to be between 0 and 1.")
    return self

from_dspy(example) classmethod

Convert from dspy.Example.

Parameters:

Name Type Description Default
example Example

Example as dspy.Example.

required

Returns:

Type Description
Self

Example as FewshotExample.

Source code in sieves/tasks/predictive/core.py
76
77
78
79
80
81
82
83
@classmethod
def from_dspy(cls, example: dspy.Example) -> Self:
    """Convert from `dspy.Example`.

    :param example: Example as `dspy.Example`.
    :returns: Example as `FewshotExample`.
    """
    return cls(**example)

to_dspy()

Convert to dspy.Example.

Returns:

Type Description
Example

Example as dspy.Example.

Source code in sieves/tasks/predictive/core.py
69
70
71
72
73
74
def to_dspy(self) -> dspy.Example:
    """Convert to `dspy.Example`.

    :returns: Example as `dspy.Example`.
    """
    return dspy.Example(**Engine.convert_fewshot_examples([self])[0]).with_inputs(self.input_fields)

Bridges for classification task.

ClassificationBridge

Bases: Bridge[_BridgePromptSignature, _BridgeResult, EngineInferenceMode], ABC

Abstract base class for classification bridges.

Source code in sieves/tasks/predictive/classification/bridges.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class ClassificationBridge(Bridge[_BridgePromptSignature, _BridgeResult, EngineInferenceMode], abc.ABC):
    """Abstract base class for classification bridges."""

    def __init__(
        self,
        task_id: str,
        prompt_instructions: str | None,
        labels: list[str],
        multi_label: bool,
        label_descriptions: dict[str, str] | None = None,
    ):
        """Initialize InformationExtractionBridge.

        :param task_id: Task ID.
        :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
        :param labels: Labels to classify.
        :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
            most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
            accuracy.
        :param label_descriptions: Optional descriptions for each label.
        """
        super().__init__(
            task_id=task_id,
            prompt_instructions=prompt_instructions,
            overwrite=False,
        )
        self._labels = labels
        self._multi_label = multi_label
        self._label_descriptions = label_descriptions or {}

    def _get_label_descriptions(self) -> str:
        """Return a string with the label descriptions.

        :return: A string with the label descriptions.
        """
        labels_with_descriptions: list[str] = []
        for label in self._labels:
            if label in self._label_descriptions:
                labels_with_descriptions.append(
                    f"<label_description><label>{label}</label><description>"
                    f"{self._label_descriptions[label]}</description></label_description>"
                )
            else:
                labels_with_descriptions.append(label)

        crlf = "\n\t\t\t"
        label_desc_string = crlf + "\t" + (crlf + "\t").join(labels_with_descriptions)
        return f"{crlf}<label_descriptions>{label_desc_string}{crlf}</label_descriptions>\n\t\t"

inference_mode abstractmethod property

Return inference mode.

Returns:

Type Description
EngineInferenceMode

Inference mode.

prompt_signature abstractmethod property

Create output signature.

E.g.: Signature in DSPy, Pydantic objects in outlines, JSON schema in jsonformers. This is engine-specific.

Returns:

Type Description
type[TaskPromptSignature] | TaskPromptSignature

Output signature object. This can be an instance (e.g. a regex string) or a class (e.g. a Pydantic class).

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

consolidate(results, docs_offsets) abstractmethod

Consolidate results for document chunks into document results.

Parameters:

Name Type Description Default
results Iterable[TaskResult]

Results per document chunk.

required
docs_offsets list[tuple[int, int]]

Chunk offsets per document. Chunks per document can be obtained with results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]].

required

Returns:

Type Description
Iterable[TaskResult]

Results per document.

Source code in sieves/tasks/predictive/bridges.py
127
128
129
130
131
132
133
134
135
@abc.abstractmethod
def consolidate(self, results: Iterable[TaskResult], docs_offsets: list[tuple[int, int]]) -> Iterable[TaskResult]:
    """Consolidate results for document chunks into document results.

    :param results: Results per document chunk.
    :param docs_offsets: Chunk offsets per document. Chunks per document can be obtained with
        `results[docs_chunk_offsets[i][0]:docs_chunk_offsets[i][1]]`.
    :return Iterable[_TaskResult]: Results per document.
    """

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

integrate(results, docs) abstractmethod

Integrate results into Doc instances.

Parameters:

Name Type Description Default
results Iterable[TaskResult]

Results from prompt executable.

required
docs Iterable[Doc]

Doc instances to update.

required

Returns:

Type Description
Iterable[Doc]

Updated doc instances.

Source code in sieves/tasks/predictive/bridges.py
118
119
120
121
122
123
124
125
@abc.abstractmethod
def integrate(self, results: Iterable[TaskResult], docs: Iterable[Doc]) -> Iterable[Doc]:
    """Integrate results into Doc instances.

    :param results: Results from prompt executable.
    :param docs: Doc instances to update.
    :return Iterable[Doc]: Updated doc instances.
    """

DSPyClassification

Bases: ClassificationBridge[PromptSignature, Result, InferenceMode]

DSPy bridge for classification.

Source code in sieves/tasks/predictive/classification/bridges.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
class DSPyClassification(ClassificationBridge[dspy_.PromptSignature, dspy_.Result, dspy_.InferenceMode]):
    """DSPy bridge for classification."""

    @override
    @property
    def _default_prompt_instructions(self) -> str:
        if self._multi_label:
            return f"""
            Multi-label classification of the provided text given the labels {self._labels}.
            For each label, provide the confidence with which you believe that the provided text should be assigned
            this label. A confidence of 1.0 means that this text should absolutely be assigned this label. 0 means the
            opposite. Confidence per label should always be between 0 and 1. Confidence across lables does not have to
            add up to 1.

            {self._get_label_descriptions()}
            """

        return f"""
        Single-label classification of the provided text given the labels {self._labels}.
        Return the label that is the best fit for the provided text with the corresponding confidence.
        Exactly one label must be returned. Provide label as simple string, not as list.
        {self._get_label_descriptions()}
        """

    @override
    @property
    def _prompt_example_template(self) -> str | None:
        return None

    @override
    @property
    def _prompt_conclusion(self) -> str | None:
        return None

    @override
    @cached_property
    def prompt_signature(self) -> type[dspy_.PromptSignature]:
        labels = self._labels
        LabelType = Literal[*labels]  # type: ignore[valid-type]

        if self._multi_label:

            class MultiLabelTextClassification(dspy.Signature):  # type: ignore[misc]
                text: str = dspy.InputField(description="Text to classify.")
                confidence_per_label: dict[LabelType, float] = dspy.OutputField(
                    description="Confidence per label that text should be classified with this label."
                )

            cls = MultiLabelTextClassification

        else:

            class SingleLabelTextClassification(dspy.Signature):  # type: ignore[misc]
                text: str = dspy.InputField(description="Text to classify.")
                label: LabelType = dspy.OutputField(
                    description="Correct label for the provided text. You MUST NOT provide a list for this attribute. "
                    "This a single label. Do not wrap this label in []."
                )
                confidence: float = dspy.OutputField(
                    description="Confidence that this label is correct as a float between 0 and 1."
                )

            cls = SingleLabelTextClassification

        cls.__doc__ = jinja2.Template(self._prompt_instructions).render()

        return cls

    @override
    @property
    def inference_mode(self) -> dspy_.InferenceMode:
        return dspy_.InferenceMode.chain_of_thought

    @override
    def integrate(self, results: Iterable[dspy_.Result], docs: Iterable[Doc]) -> Iterable[Doc]:
        for doc, result in zip(docs, results):
            assert len(result.completions.confidence_per_label) == 1
            sorted_preds = sorted(
                ((label, score) for label, score in result.completions.confidence_per_label[0].items()),
                key=lambda x: x[1],
                reverse=True,
            )
            doc.results[self._task_id] = sorted_preds

            if not self._multi_label:
                if isinstance(sorted_preds, list) and len(sorted_preds) > 0:
                    doc.results[self._task_id] = sorted_preds[0]

        return docs

    @override
    def consolidate(
        self, results: Iterable[dspy_.Result], docs_offsets: list[tuple[int, int]]
    ) -> Iterable[dspy_.Result]:
        results = list(results)

        # Determine label scores for chunks per document.
        for doc_offset in docs_offsets:
            label_scores: dict[str, float] = {label: 0.0 for label in self._labels}
            doc_results = results[doc_offset[0] : doc_offset[1]]

            for res in doc_results:
                # Clamp score to range between 0 and 1. Alternatively we could force this in the prompt signature,
                # but this fails occasionally with some models and feels too strict.
                if self._multi_label:
                    for label, score in res.confidence_per_label.items():
                        label_scores[label] += max(0, min(score, 1))
                else:
                    label_scores[res.label] += max(0, min(res.confidence, 1))

            sorted_label_scores: list[dict[str, str | float]] = sorted(
                (
                    {"label": label, "score": score / (doc_offset[1] - doc_offset[0])}
                    for label, score in label_scores.items()
                ),
                key=lambda x: x["score"],
                reverse=True,
            )

            yield dspy.Prediction.from_completions(
                {
                    "confidence_per_label": [{sls["label"]: sls["score"] for sls in sorted_label_scores}],
                    "reasoning": [str([res.reasoning for res in doc_results])],
                },
                signature=self.prompt_signature,
            )

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

HuggingFaceClassification

Bases: ClassificationBridge[list[str], Result, InferenceMode]

HuggingFace bridge for classification.

Source code in sieves/tasks/predictive/classification/bridges.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
class HuggingFaceClassification(ClassificationBridge[list[str], huggingface_.Result, huggingface_.InferenceMode]):
    """HuggingFace bridge for classification."""

    @override
    @property
    def _default_prompt_instructions(self) -> str:
        return f"""
        This text is about {{}}.
        {self._get_label_descriptions()}
        """

    @override
    @property
    def _prompt_example_template(self) -> str | None:
        if self._multi_label:
            return """
            {% if examples|length > 0 -%}

                Examples:
                <examples>
                {%- for example in examples %}
                    <example>
                        <text>{{ example.text }}</text>
                        <output>
                            <reasoning>{{ example.reasoning }}</reasoning>
                            {%- for l, s in example.confidence_per_label.items() %}
                            <label_score>
                                <label>{{ l }}</label><
                                score>{{ s }}</score>
                            </label_score>{% endfor %}
                        </output>
                    </example>
                {% endfor %}</examples>
            {% endif %}
            """

        return """
        {% if examples|length > 0 -%}

        Examples:
        <examples>
        {%- for example in examples %}
            <example>
                <text>{{ example.text }}</text>
                <reasoning>{{ example.reasoning }}</reasoning>
                <output>
                    <label>{{ example.label }}</label><score>{{ example.confidence }}</score>
                </output>
            </example>
        {% endfor -%}
        </examples>
        {% endif -%}
        """

    @override
    @property
    def _prompt_conclusion(self) -> str | None:
        return None

    @override
    @property
    def prompt_signature(self) -> list[str]:
        return self._labels

    @override
    @property
    def inference_mode(self) -> huggingface_.InferenceMode:
        return huggingface_.InferenceMode.zeroshot_cls

    @override
    def integrate(self, results: Iterable[huggingface_.Result], docs: Iterable[Doc]) -> Iterable[Doc]:
        for doc, result in zip(docs, results):
            doc.results[self._task_id] = [(label, score) for label, score in zip(result["labels"], result["scores"])]

            if not self._multi_label:
                if isinstance(doc.results[self._task_id], list) and len(doc.results[self._task_id]) > 0:
                    doc.results[self._task_id] = doc.results[self._task_id][0]
        return docs

    @override
    def consolidate(
        self, results: Iterable[huggingface_.Result], docs_offsets: list[tuple[int, int]]
    ) -> Iterable[huggingface_.Result]:
        results = list(results)

        # Determine label scores for chunks per document.
        for doc_offset in docs_offsets:
            label_scores: dict[str, float] = {label: 0.0 for label in self._labels}

            for res in results[doc_offset[0] : doc_offset[1]]:
                for label, score in zip(res["labels"], res["scores"]):
                    assert isinstance(label, str)
                    assert isinstance(score, float)
                    label_scores[label] += score

            # Average score, sort by it in descending order.
            sorted_label_scores: list[dict[str, str | float]] = sorted(
                (
                    {"label": label, "score": score / (doc_offset[1] - doc_offset[0])}
                    for label, score in label_scores.items()
                ),
                key=lambda x: x["score"],
                reverse=True,
            )
            yield {
                "labels": [rec["label"] for rec in sorted_label_scores],  # type: ignore[dict-item]
                "scores": [rec["score"] for rec in sorted_label_scores],  # type: ignore[dict-item]
            }

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

LangChainClassification

Bases: PydanticBasedClassification[InferenceMode]

LangChain bridge for classification.

Source code in sieves/tasks/predictive/classification/bridges.py
488
489
490
491
492
493
494
class LangChainClassification(PydanticBasedClassification[langchain_.InferenceMode]):
    """LangChain bridge for classification."""

    @override
    @property
    def inference_mode(self) -> langchain_.InferenceMode:
        return langchain_.InferenceMode.structured

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

OutlinesClassification

Bases: PydanticBasedClassificationWithLabelForcing[InferenceMode]

Outlines bridge for classification.

Source code in sieves/tasks/predictive/classification/bridges.py
571
572
573
574
575
576
577
class OutlinesClassification(PydanticBasedClassificationWithLabelForcing[outlines_.InferenceMode]):
    """Outlines bridge for classification."""

    @override
    @property
    def inference_mode(self) -> outlines_.InferenceMode:
        return outlines_.InferenceMode.json if self._multi_label else outlines_.InferenceMode.choice

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

PydanticBasedClassification

Bases: ClassificationBridge[BaseModel | list[str], BaseModel | str, EngineInferenceMode], ABC

Base class for Pydantic-based classification bridges.

Source code in sieves/tasks/predictive/classification/bridges.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
class PydanticBasedClassification(
    ClassificationBridge[pydantic.BaseModel | list[str], pydantic.BaseModel | str, EngineInferenceMode], abc.ABC
):
    """Base class for Pydantic-based classification bridges."""

    @override
    @property
    def _default_prompt_instructions(self) -> str:
        if self._multi_label:
            return (
                f"""
            Perform multi-label classification of the provided text given the provided labels: {",".join(self._labels)}.
            {self._get_label_descriptions()}"""
                + """
            For each label, provide the confidence with which you believe that the provided text should be assigned
            this label. A confidence of 1.0 means that this text should absolutely be assigned this label. 0 means the
            opposite. Confidence per label should ALWAYS be between 0 and 1. Provide the reasoning for your decision.

            The output for two labels LABEL_1 and LABEL_2 should look like this:
            <output>
                <reasoning>REASONING</reasoning>
                <label_score><label>LABEL_1</label><score>CONFIDENCE_SCORE_1</score></label_score>
                <label_score><label>LABEL_2</label><score>CONFIDENCE_SCORE_2</score></label_score>
            </output>
            """
            )

        return f"""
        Classify the provided text. Your classification match one of these labels: {",".join(self._labels)}.
        {self._get_label_descriptions()}
        Also provide a confidence score reflecting how likely it is that your chosen label is the correct
        fit for the text.

        The output for two labels LABEL_1 and LABEL_2 should look like this:
        <output>
            <reasoning>REASONING</reasoning>
            <label>LABEL_1</label>
            <score>CONFIDENCE_SCORE_1</score>
        </output>
        """

    @override
    @property
    def _prompt_example_template(self) -> str | None:
        if self._multi_label:
            return """
            {% if examples|length > 0 -%}
                Examples:
                <examples>
                {%- for example in examples %}
                    <example>
                        <text>{{ example.text }}</text>
                        <output>
                            <reasoning>{{ example.reasoning }}</reasoning>
                            {%- for l, s in example.confidence_per_label.items() %}
                            <label_score><label>{{ l }}</label><score>{{ s }}</score></label_score>{% endfor %}
                        </output>
                    </example>
                {% endfor %}</examples>
            {% endif %}
            """

        return """
        {% if examples|length > 0 -%}
            Examples:
            <examples>
            {%- for example in examples %}
                <example>
                    <text>{{ example.text }}</text>
                    <output>
                        <reasoning>{{ example.reasoning }}</reasoning>
                        <label>{{ example.label }}</label>
                        <score>{{ example.confidence }}</score>
                    </output>
                </example>
            {% endfor %}</examples>
        {% endif %}
        """

    @override
    @property
    def _prompt_conclusion(self) -> str | None:
        return """
        ========

        <text>{{ text }}</text>
        <output>
        """

    @override
    @cached_property
    def prompt_signature(self) -> type[pydantic.BaseModel] | list[str]:
        if self._multi_label:
            prompt_sig = pydantic.create_model(  # type: ignore[call-overload]
                "MultilabelClassification",
                __base__=pydantic.BaseModel,
                __doc__="Result of multi-label classification.",
                reasoning=(str, ...),
                **{label: (float, ...) for label in self._labels},
            )
        else:
            labels = self._labels
            LabelType = Literal[*labels]  # type: ignore[valid-type]

            class SingleLabelClassification(pydantic.BaseModel):
                """Result of single-label classification."""

                reasoning: str
                label: LabelType
                score: float

            prompt_sig = SingleLabelClassification

        assert isinstance(prompt_sig, type) and issubclass(prompt_sig, pydantic.BaseModel)
        return prompt_sig

    @override
    def integrate(self, results: Iterable[pydantic.BaseModel | str], docs: Iterable[Doc]) -> Iterable[Doc]:
        for doc, result in zip(docs, results):
            if self._multi_label:
                assert isinstance(result, pydantic.BaseModel)
                label_scores = {k: v for k, v in result.model_dump().items() if k != "reasoning"}
                doc.results[self._task_id] = sorted(
                    ((label, score) for label, score in label_scores.items()), key=lambda x: x[1], reverse=True
                )
            else:
                assert hasattr(result, "label") and hasattr(result, "score")
                doc.results[self._task_id] = (result.label, result.score)

        return docs

    @override
    def consolidate(
        self, results: Iterable[pydantic.BaseModel | str], docs_offsets: list[tuple[int, int]]
    ) -> Iterable[pydantic.BaseModel | str]:
        results = list(results)

        # Determine label scores for chunks per document.
        reasonings: list[str] = []
        for doc_offset in docs_offsets:
            label_scores: dict[str, float] = {label: 0.0 for label in self._labels}
            doc_results = results[doc_offset[0] : doc_offset[1]]

            for res in doc_results:
                if res is None:
                    continue  # type: ignore[unreachable]

                assert hasattr(res, "reasoning")
                reasonings.append(res.reasoning)
                # We clamp the score to 0 <= x <= 1. Alternatively we could force this in the prompt signature, but
                # this fails occasionally with some models and feels too strict.
                if self._multi_label:
                    for label in self._labels:
                        label_scores[label] += max(0, min(getattr(res, label), 1))
                else:
                    label_scores[getattr(res, "label")] += max(0, min(getattr(res, "score"), 1))

            avg_label_scores = {label: score / (doc_offset[1] - doc_offset[0]) for label, score in label_scores.items()}
            prompt_signature = self.prompt_signature
            assert issubclass(prompt_signature, pydantic.BaseModel)  # type: ignore[arg-type]
            assert callable(prompt_signature)

            if self._multi_label:
                yield prompt_signature(reasoning=str(reasonings), **avg_label_scores)
            else:
                max_score_label = max(avg_label_scores, key=avg_label_scores.__getitem__)
                yield prompt_signature(
                    reasoning=str(reasonings),
                    label=max_score_label,
                    score=avg_label_scores[max_score_label],
                )

inference_mode abstractmethod property

Return inference mode.

Returns:

Type Description
EngineInferenceMode

Inference mode.

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)

PydanticBasedClassificationWithLabelForcing

Bases: PydanticBasedClassification[EngineInferenceMode], ABC

Base class for Pydantic-based classification bridges with label forcing.

Source code in sieves/tasks/predictive/classification/bridges.py
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
class PydanticBasedClassificationWithLabelForcing(PydanticBasedClassification[EngineInferenceMode], abc.ABC):
    """Base class for Pydantic-based classification bridges with label forcing."""

    @override
    @cached_property
    def prompt_signature(self) -> type[pydantic.BaseModel] | list[str]:
        return super().prompt_signature if self._multi_label else self._labels

    @override
    @property
    def _default_prompt_instructions(self) -> str:
        if self._multi_label:
            return super()._default_prompt_instructions

        return f"""
        Perform single-label classification of the provided text given the provided labels: {",".join(self._labels)}.
        {self._get_label_descriptions()}

        Provide the best-fitting label for given text.

        The output for two labels LABEL_1 and LABEL_2 should look like this:
        <output>
            <reasoning>REASONING</reasoning>
            <label>LABEL_1</label>
        </output>
        """

    @override
    @property
    def _prompt_example_template(self) -> str | None:
        if self._multi_label:
            return super()._prompt_example_template

        return """
        {% if examples|length > 0 -%}
            Examples:
            <examples>
            {%- for example in examples %}
                <example>
                    <text>{{ example.text }}</text>
                    <output>
                        <reasoning>{{ example.reasoning }}</reasoning>
                        <label>{{ example.label }}</label>
                    </output>
                </example>
            {% endfor %}</examples>
        {% endif %}
        """

    @override
    def integrate(self, results: Iterable[pydantic.BaseModel | str], docs: Iterable[Doc]) -> Iterable[Doc]:
        if self._multi_label:
            return super().integrate(results, docs)

        for doc, result in zip(docs, results):
            doc.results[self._task_id] = result
        return docs

    @override
    def consolidate(
        self, results: Iterable[pydantic.BaseModel | str], docs_offsets: list[tuple[int, int]]
    ) -> Iterable[pydantic.BaseModel | str]:
        if self._multi_label:
            yield from super().consolidate(results, docs_offsets)

        else:
            # Determine label scores for chunks per document.
            results = list(results)
            for doc_offset in docs_offsets:
                doc_results = results[doc_offset[0] : doc_offset[1]]
                label_counts = Counter(doc_results)
                yield label_counts.most_common()[0][0]

inference_mode abstractmethod property

Return inference mode.

Returns:

Type Description
EngineInferenceMode

Inference mode.

prompt_template property

Return prompt template.

Chains _prompt_instructions, _prompt_example_template and _prompt_conclusion.

Note: different engines have different expectations as how a prompt should look like. E.g. outlines supports the Jinja 2 templating format for insertion of values and few-shot examples, whereas DSPy integrates these things in a different value in the workflow and hence expects the prompt not to include these things. Mind engine-specific expectations when creating a prompt template.

Returns:

Type Description
str

Prompt template as string. None if not used by engine.

__init__(task_id, prompt_instructions, labels, multi_label, label_descriptions=None)

Initialize InformationExtractionBridge.

Parameters:

Name Type Description Default
task_id str

Task ID.

required
prompt_instructions str | None

Custom prompt instructions. If None, default instructions are used.

required
labels list[str]

Labels to classify.

required
multi_label bool

If True, task returns confidence scores for all specified labels. If False, task returns most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher accuracy.

required
label_descriptions dict[str, str] | None

Optional descriptions for each label.

None
Source code in sieves/tasks/predictive/classification/bridges.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    task_id: str,
    prompt_instructions: str | None,
    labels: list[str],
    multi_label: bool,
    label_descriptions: dict[str, str] | None = None,
):
    """Initialize InformationExtractionBridge.

    :param task_id: Task ID.
    :param prompt_instructions: Custom prompt instructions. If None, default instructions are used.
    :param labels: Labels to classify.
    :param multi_label: If True, task returns confidence scores for all specified labels. If False, task returns
        most likely class label. In the latter case label forcing mechanisms are utilized, which can lead to higher
        accuracy.
    :param label_descriptions: Optional descriptions for each label.
    """
    super().__init__(
        task_id=task_id,
        prompt_instructions=prompt_instructions,
        overwrite=False,
    )
    self._labels = labels
    self._multi_label = multi_label
    self._label_descriptions = label_descriptions or {}

extract(docs)

Extract all values from doc instances that are to be injected into the prompts.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to extract values from.

required

Returns:

Type Description
Iterable[dict[str, Any]]

All values from doc instances that are to be injected into the prompts

Source code in sieves/tasks/predictive/bridges.py
110
111
112
113
114
115
116
def extract(self, docs: Iterable[Doc]) -> Iterable[dict[str, Any]]:
    """Extract all values from doc instances that are to be injected into the prompts.

    :param docs: Docs to extract values from.
    :return Iterable[dict[str, Any]]: All values from doc instances that are to be injected into the prompts
    """
    return ({"text": doc.text if doc.text else None} for doc in docs)