Skip to content

Hugging Face

sieves supports Hugging Face pipelines for zero-shot classification.

Usage

import transformers
from sieves import tasks

# Initialize a Hugging Face pipeline
model = transformers.pipeline(
    "zero-shot-classification",
    model="MoritzLaurer/xtremedistil-l6-h256-zeroshot-v1.1-all-33"
)

# Pass it to a task
task = tasks.Classification(
    labels=["positive", "negative"],
    model=model
)

Bases: ModelWrapper[PromptSignature, Result, Model, InferenceMode]

ModelWrapper adapter around transformers.Pipeline for zero‑shot tasks.

Source code in sieves/model_wrappers/huggingface_.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
class HuggingFace(ModelWrapper[PromptSignature, Result, Model, InferenceMode]):
    """ModelWrapper adapter around ``transformers.Pipeline`` for zero‑shot tasks."""

    @override
    @property
    def inference_modes(self) -> type[InferenceMode]:
        return InferenceMode

    @override
    @property
    def supports_few_shotting(self) -> bool:
        return True

    @override
    def build_executable(
        self,
        inference_mode: InferenceMode,
        prompt_template: str | None,
        prompt_signature: type[PromptSignature] | PromptSignature,
        fewshot_examples: Sequence[pydantic.BaseModel] = (),
    ) -> Executable[Result | None]:
        cls_name = self.__class__.__name__
        assert prompt_template, ValueError(f"prompt_template has to be provided to {cls_name} model wrapper by task.")
        assert isinstance(prompt_signature, list)

        # Render template with few-shot examples. Note that we don't use extracted document values here, as HF zero-shot
        # pipelines only support one hypothesis template per _call - and we want to batch, so our hypothesis template
        # will be document-invariant.
        fewshot_examples_dict = HuggingFace.convert_fewshot_examples(fewshot_examples)
        # Render hypothesis template with everything but text.
        template = jinja2.Template(prompt_template).render(**({"examples": fewshot_examples_dict}))

        def execute(values: Sequence[dict[str, Any]]) -> Sequence[tuple[Result | None, Any, TokenUsage]]:
            """Execute prompts with model wrapper for given values.

            :param values: Values to inject into prompts.
            :return: Sequence of tuples containing results, raw outputs, and token usage.
            """
            match inference_mode:
                case InferenceMode.zeroshot_cls:
                    results = self._model(
                        sequences=[doc_values["text"] for doc_values in values],
                        candidate_labels=prompt_signature,
                        hypothesis_template=template,
                        mode="multi",
                        **self._inference_kwargs,
                    )

                    # Estimate token usage if tokenizer is available.
                    tokenizer = self._get_tokenizer()

                    final_results: list[tuple[Result, Any, TokenUsage]] = []
                    for doc_values, res in zip(values, results):
                        usage = TokenUsage(
                            input_tokens=self._count_tokens(doc_values["text"], tokenizer),
                            # For classification, we estimate output tokens based on the labels.
                            output_tokens=self._count_tokens(" ".join(res["labels"]), tokenizer),
                        )

                        final_results.append((res, res, usage))
                    return final_results

                case _:
                    raise ValueError(f"Inference mode {inference_mode} not supported by {cls_name} model wrapper.")

        return execute

    @override
    def _get_tokenizer(self) -> Any | None:
        return getattr(self._model, "tokenizer", None)

model property

Return model instance.

Returns:

Type Description
ModelWrapperModel

Model instance.

model_settings property

Return model settings.

Returns:

Type Description
ModelSettings

Model settings.

__init__(model, model_settings)

Initialize model wrapper with model and model settings.

Parameters:

Name Type Description Default
model ModelWrapperModel

Instantiated model instance.

required
model_settings ModelSettings

Model settings.

required
Source code in sieves/model_wrappers/core.py
37
38
39
40
41
42
43
44
45
46
47
def __init__(self, model: ModelWrapperModel, model_settings: ModelSettings):
    """Initialize model wrapper with model and model settings.

    :param model: Instantiated model instance.
    :param model_settings: Model settings.
    """
    self._model = model
    self._model_settings = model_settings
    self._inference_kwargs = model_settings.inference_kwargs or {}
    self._init_kwargs = model_settings.init_kwargs or {}
    self._strict = model_settings.strict

convert_fewshot_examples(fewshot_examples) staticmethod

Convert few‑shot examples to dicts.

Parameters:

Name Type Description Default
fewshot_examples Sequence[BaseModel]

Fewshot examples to convert.

required

Returns:

Type Description
list[dict[str, Any]]

Fewshot examples as dicts.

Source code in sieves/model_wrappers/core.py
101
102
103
104
105
106
107
108
@staticmethod
def convert_fewshot_examples(fewshot_examples: Sequence[pydantic.BaseModel]) -> list[dict[str, Any]]:
    """Convert few‑shot examples to dicts.

    :param fewshot_examples: Fewshot examples to convert.
    :return: Fewshot examples as dicts.
    """
    return [fs_example.model_dump(serialize_as_any=True) for fs_example in fewshot_examples]