Skip to content

Task

Bases: ABC

Abstract base class for tasks that can be executed on documents.

Source code in sieves/tasks/core.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class Task(abc.ABC):
    """Abstract base class for tasks that can be executed on documents."""

    def __init__(self, task_id: str | None, show_progress: bool, include_meta: bool):
        """
        Initiates new Task.
        :param task_id: Task ID.
        :param show_progress: Whether to show progress bar for processed documents.
        :param include_meta: Whether to include meta information generated by the task.
        """
        self._show_progress = show_progress
        self._task_id = task_id if task_id else self.__class__.__name__
        self._include_meta = include_meta

    @property
    def id(self) -> str:
        """Returns task ID. Used by pipeline for results and dependency management.
        :return: Task ID.
        """
        return self._task_id

    @abc.abstractmethod
    def __call__(self, docs: Iterable[Doc]) -> Iterable[Doc]:
        """Execute task.
        :param docs: Docs to process.
        :return: Processed docs.
        """

    @property
    def _state(self) -> dict[str, Any]:
        """Returns attributes to serialize.
        :return: Dict of attributes to serialize.
        """
        return {
            "task_id": self._task_id,
            "show_progress": self._show_progress,
            "include_meta": self._include_meta,
        }

    def serialize(self) -> Config:
        """Serializes task.
        :return: Config instance.
        """
        return Config.create(self.__class__, {k: Attribute(value=v) for k, v in self._state.items()})

    @classmethod
    def deserialize(cls, config: Config, **kwargs: dict[str, Any]) -> Task:
        """Generate Task instance from config.
        :param config: Config to generate instance from.
        :param kwargs: Values to inject into loaded config.
        :return: Deserialized Task instance.
        """
        # Deserialize and inject engine.
        return cls(**config.to_init_dict(cls, **kwargs))

_state property

Returns attributes to serialize.

Returns:

Type Description
dict[str, Any]

Dict of attributes to serialize.

id property

Returns task ID. Used by pipeline for results and dependency management.

Returns:

Type Description
str

Task ID.

__call__(docs) abstractmethod

Execute task.

Parameters:

Name Type Description Default
docs Iterable[Doc]

Docs to process.

required

Returns:

Type Description
Iterable[Doc]

Processed docs.

Source code in sieves/tasks/core.py
32
33
34
35
36
37
@abc.abstractmethod
def __call__(self, docs: Iterable[Doc]) -> Iterable[Doc]:
    """Execute task.
    :param docs: Docs to process.
    :return: Processed docs.
    """

__init__(task_id, show_progress, include_meta)

Initiates new Task.

Parameters:

Name Type Description Default
task_id str | None

Task ID.

required
show_progress bool

Whether to show progress bar for processed documents.

required
include_meta bool

Whether to include meta information generated by the task.

required
Source code in sieves/tasks/core.py
14
15
16
17
18
19
20
21
22
23
def __init__(self, task_id: str | None, show_progress: bool, include_meta: bool):
    """
    Initiates new Task.
    :param task_id: Task ID.
    :param show_progress: Whether to show progress bar for processed documents.
    :param include_meta: Whether to include meta information generated by the task.
    """
    self._show_progress = show_progress
    self._task_id = task_id if task_id else self.__class__.__name__
    self._include_meta = include_meta

deserialize(config, **kwargs) classmethod

Generate Task instance from config.

Parameters:

Name Type Description Default
config Config

Config to generate instance from.

required
kwargs dict[str, Any]

Values to inject into loaded config.

{}

Returns:

Type Description
Task

Deserialized Task instance.

Source code in sieves/tasks/core.py
56
57
58
59
60
61
62
63
64
@classmethod
def deserialize(cls, config: Config, **kwargs: dict[str, Any]) -> Task:
    """Generate Task instance from config.
    :param config: Config to generate instance from.
    :param kwargs: Values to inject into loaded config.
    :return: Deserialized Task instance.
    """
    # Deserialize and inject engine.
    return cls(**config.to_init_dict(cls, **kwargs))

serialize()

Serializes task.

Returns:

Type Description
Config

Config instance.

Source code in sieves/tasks/core.py
50
51
52
53
54
def serialize(self) -> Config:
    """Serializes task.
    :return: Config instance.
    """
    return Config.create(self.__class__, {k: Attribute(value=v) for k, v in self._state.items()})