Skip to content

Artifacts

zenml.artifacts special

Artifacts are the data that power your experimentation and model training.

It is actually steps that produce artifacts, which are then stored in the artifact store. Artifacts are written in the signature of a step like so:

    def my_step(first_artifact: int, second_artifact: torch.nn.Module -> int:
        # first_artifact is an integer
        # second_artifact is a torch.nn.Module
        return 1

Artifacts can be serialized and deserialized (i.e. written and read from the Artifact Store) in various ways like TFRecords or saved model pickles, depending on what the step produces.The serialization and deserialization logic of artifacts is defined by the appropriate Materializer.

base_artifact

Base class for ZenML artifacts.

The below code is copied from the TFX source repo with minor changes. All credits go to the TFX team for the core implementation.

BaseArtifact (Artifact)

Base class for all ZenML artifacts.

Every implementation of an artifact needs to inherit this class.

While inheriting from this class there are a few things to consider:

  • Upon creation, each artifact class needs to be given a unique TYPE_NAME.
  • Your artifact can feature different properties under the parameter PROPERTIES which will be tracked throughout your pipeline runs.
Source code in zenml/artifacts/base_artifact.py
class BaseArtifact(Artifact):
    """Base class for all ZenML artifacts.

    Every implementation of an artifact needs to inherit this class.

    While inheriting from this class there are a few things to consider:

    - Upon creation, each artifact class needs to be given a unique TYPE_NAME.
    - Your artifact can feature different properties under the parameter
        PROPERTIES which will be tracked throughout your pipeline runs.
    """

    TYPE_NAME: str = "BaseArtifact"  # type: ignore[assignment]
    PROPERTIES: Dict[str, Property] = {  # type: ignore[assignment]
        MATERIALIZER_PROPERTY_KEY: MATERIALIZER_PROPERTY,
        DATATYPE_PROPERTY_KEY: DATATYPE_PROPERTY,
    }
    _MLMD_ARTIFACT_TYPE: Any = None

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        """Init method for BaseArtifact.

        Args:
            *args: Positional arguments.
            **kwargs: Keyword arguments.
        """
        self.set_zenml_artifact_type()
        super(BaseArtifact, self).__init__(*args, **kwargs)

    @classmethod
    def set_zenml_artifact_type(cls) -> None:
        """Set the type of the artifact.

        Raises:
            ValueError: If the artifact type is not a string or dictionary.
        """
        type_name = cls.TYPE_NAME
        if not (type_name and isinstance(type_name, str)):
            raise ValueError(
                (
                    "The Artifact subclass %s must override the TYPE_NAME attribute "
                    "with a string type name identifier (got %r instead)."
                )
                % (cls, type_name)
            )
        artifact_type = metadata_store_pb2.ArtifactType()
        artifact_type.name = type_name
        if cls.PROPERTIES:
            # Perform validation on PROPERTIES dictionary.
            if not isinstance(cls.PROPERTIES, dict):
                raise ValueError(
                    "Artifact subclass %s.PROPERTIES is not a dictionary." % cls
                )
            for key, value in cls.PROPERTIES.items():
                if not (
                    isinstance(key, (str, bytes))
                    and isinstance(value, Property)
                ):
                    raise ValueError(
                        (
                            "Artifact subclass %s.PROPERTIES dictionary must have keys of "
                            "type string and values of type artifact.Property."
                        )
                        % cls
                    )

            # Populate ML Metadata artifact properties dictionary.
            for key, value in cls.PROPERTIES.items():
                artifact_type.properties[
                    key
                ] = value.mlmd_type()  # type: ignore[no-untyped-call]
        cls._MLMD_ARTIFACT_TYPE = artifact_type
__init__(self, *args, **kwargs) special

Init method for BaseArtifact.

Parameters:

Name Type Description Default
*args Any

Positional arguments.

()
**kwargs Any

Keyword arguments.

{}
Source code in zenml/artifacts/base_artifact.py
def __init__(self, *args: Any, **kwargs: Any) -> None:
    """Init method for BaseArtifact.

    Args:
        *args: Positional arguments.
        **kwargs: Keyword arguments.
    """
    self.set_zenml_artifact_type()
    super(BaseArtifact, self).__init__(*args, **kwargs)
set_zenml_artifact_type() classmethod

Set the type of the artifact.

Exceptions:

Type Description
ValueError

If the artifact type is not a string or dictionary.

Source code in zenml/artifacts/base_artifact.py
@classmethod
def set_zenml_artifact_type(cls) -> None:
    """Set the type of the artifact.

    Raises:
        ValueError: If the artifact type is not a string or dictionary.
    """
    type_name = cls.TYPE_NAME
    if not (type_name and isinstance(type_name, str)):
        raise ValueError(
            (
                "The Artifact subclass %s must override the TYPE_NAME attribute "
                "with a string type name identifier (got %r instead)."
            )
            % (cls, type_name)
        )
    artifact_type = metadata_store_pb2.ArtifactType()
    artifact_type.name = type_name
    if cls.PROPERTIES:
        # Perform validation on PROPERTIES dictionary.
        if not isinstance(cls.PROPERTIES, dict):
            raise ValueError(
                "Artifact subclass %s.PROPERTIES is not a dictionary." % cls
            )
        for key, value in cls.PROPERTIES.items():
            if not (
                isinstance(key, (str, bytes))
                and isinstance(value, Property)
            ):
                raise ValueError(
                    (
                        "Artifact subclass %s.PROPERTIES dictionary must have keys of "
                        "type string and values of type artifact.Property."
                    )
                    % cls
                )

        # Populate ML Metadata artifact properties dictionary.
        for key, value in cls.PROPERTIES.items():
            artifact_type.properties[
                key
            ] = value.mlmd_type()  # type: ignore[no-untyped-call]
    cls._MLMD_ARTIFACT_TYPE = artifact_type

constants

Constants for ZenML artifacts.

data_analysis_artifact

Class for all ZenML data analysis artifacts.

DataAnalysisArtifact (BaseArtifact)

Class for all ZenML data analysis artifacts.

This should act as a base class for all artifacts generated from processes such as data profiling, data drift analyses, model drift detection etc.

Source code in zenml/artifacts/data_analysis_artifact.py
class DataAnalysisArtifact(BaseArtifact):
    """Class for all ZenML data analysis artifacts.

    This should act as a base class for all artifacts generated from
    processes such as data profiling, data drift analyses, model drift
    detection etc.
    """

    TYPE_NAME = ArtifactType.DATAANALYSIS

data_artifact

Class for all ZenML data artifacts.

DataArtifact (BaseArtifact)

Class for all ZenML data artifacts.

Source code in zenml/artifacts/data_artifact.py
class DataArtifact(BaseArtifact):
    """Class for all ZenML data artifacts."""

    TYPE_NAME = ArtifactType.DATA

model_artifact

Class for all ZenML model artifacts.

ModelArtifact (BaseArtifact)

Class for all ZenML model artifacts.

Source code in zenml/artifacts/model_artifact.py
class ModelArtifact(BaseArtifact):
    """Class for all ZenML model artifacts."""

    TYPE_NAME = ArtifactType.MODEL

schema_artifact

Schema artifact class.

SchemaArtifact (BaseArtifact)

Class for all ZenML schema artifacts.

Source code in zenml/artifacts/schema_artifact.py
class SchemaArtifact(BaseArtifact):
    """Class for all ZenML schema artifacts."""

    TYPE_NAME = ArtifactType.SCHEMA

service_artifact

Class for all ZenML service artifacts.

ServiceArtifact (BaseArtifact)

Class for all ZenML service artifacts.

Source code in zenml/artifacts/service_artifact.py
class ServiceArtifact(BaseArtifact):
    """Class for all ZenML service artifacts."""

    TYPE_NAME = ArtifactType.SERVICE

statistics_artifact

Class for a statistics artifact.

StatisticsArtifact (BaseArtifact)

Class for all ZenML statistics artifacts.

Source code in zenml/artifacts/statistics_artifact.py
class StatisticsArtifact(BaseArtifact):
    """Class for all ZenML statistics artifacts."""

    TYPE_NAME = ArtifactType.STATISTICS

type_registry

Class for artifact type registry.

ArtifactTypeRegistry

A registry to keep track of which datatypes map to which artifact types.

Source code in zenml/artifacts/type_registry.py
class ArtifactTypeRegistry(object):
    """A registry to keep track of which datatypes map to which artifact types."""

    def __init__(self) -> None:
        """Initialization with an empty registry."""
        self._artifact_types: Dict[
            Type[Any], Tuple[Type["BaseArtifact"], ...]
        ] = {}

    def register_integration(
        self, key: Type[Any], type_: Iterable[Type["BaseArtifact"]]
    ) -> None:
        """Method to register an integration within the registry.

        Args:
            key: any datatype
            type_: the list of artifact type that the given datatypes is
                associated with
        """
        self._artifact_types[key] = tuple(type_)

    def get_artifact_type(
        self, key: Type[Any]
    ) -> Tuple[Type["BaseArtifact"], ...]:
        """Method to extract the list of artifact types given the data type.

        Args:
            key: Indicates the type of object.

        Returns:
            A list of `Artifact` types that was registered for this key.

        Raises:
            StepInterfaceError: If the key (or any of its superclasses) is not
                registered or the key has more than one superclass with
                different default materializers/artifact types
        """
        # Check whether the type is registered
        if key in self._artifact_types:
            return self._artifact_types[key]
        else:
            # If the type is not registered, check for superclasses
            artifact_types_for_compatible_superclasses = {
                artifact_types
                for registered_type, artifact_types in self._artifact_types.items()
                if issubclass(key, registered_type)
            }
            # Make sure that there is only a single list of artifact types
            if len(artifact_types_for_compatible_superclasses) == 1:
                return artifact_types_for_compatible_superclasses.pop()
            elif len(artifact_types_for_compatible_superclasses) > 1:
                raise StepInterfaceError(
                    f"Type {key} is subclassing more than one type and these "
                    f"types map to different materializers. These "
                    f"materializers feature a different list associated "
                    f"artifact types within the registry: "
                    f"{artifact_types_for_compatible_superclasses}. Please "
                    f"specify which of these artifact types you would like to "
                    f"use explicitly in your step."
                )

        raise StepInterfaceError(
            f"Type {key} does not have a default `Materializer` thus it does "
            f"not have any associated `ArtifactType`s! Please specify your "
            f"own `Materializer`."
        )
__init__(self) special

Initialization with an empty registry.

Source code in zenml/artifacts/type_registry.py
def __init__(self) -> None:
    """Initialization with an empty registry."""
    self._artifact_types: Dict[
        Type[Any], Tuple[Type["BaseArtifact"], ...]
    ] = {}
get_artifact_type(self, key)

Method to extract the list of artifact types given the data type.

Parameters:

Name Type Description Default
key Type[Any]

Indicates the type of object.

required

Returns:

Type Description
Tuple[Type[BaseArtifact], ...]

A list of Artifact types that was registered for this key.

Exceptions:

Type Description
StepInterfaceError

If the key (or any of its superclasses) is not registered or the key has more than one superclass with different default materializers/artifact types

Source code in zenml/artifacts/type_registry.py
def get_artifact_type(
    self, key: Type[Any]
) -> Tuple[Type["BaseArtifact"], ...]:
    """Method to extract the list of artifact types given the data type.

    Args:
        key: Indicates the type of object.

    Returns:
        A list of `Artifact` types that was registered for this key.

    Raises:
        StepInterfaceError: If the key (or any of its superclasses) is not
            registered or the key has more than one superclass with
            different default materializers/artifact types
    """
    # Check whether the type is registered
    if key in self._artifact_types:
        return self._artifact_types[key]
    else:
        # If the type is not registered, check for superclasses
        artifact_types_for_compatible_superclasses = {
            artifact_types
            for registered_type, artifact_types in self._artifact_types.items()
            if issubclass(key, registered_type)
        }
        # Make sure that there is only a single list of artifact types
        if len(artifact_types_for_compatible_superclasses) == 1:
            return artifact_types_for_compatible_superclasses.pop()
        elif len(artifact_types_for_compatible_superclasses) > 1:
            raise StepInterfaceError(
                f"Type {key} is subclassing more than one type and these "
                f"types map to different materializers. These "
                f"materializers feature a different list associated "
                f"artifact types within the registry: "
                f"{artifact_types_for_compatible_superclasses}. Please "
                f"specify which of these artifact types you would like to "
                f"use explicitly in your step."
            )

    raise StepInterfaceError(
        f"Type {key} does not have a default `Materializer` thus it does "
        f"not have any associated `ArtifactType`s! Please specify your "
        f"own `Materializer`."
    )
register_integration(self, key, type_)

Method to register an integration within the registry.

Parameters:

Name Type Description Default
key Type[Any]

any datatype

required
type_ Iterable[Type[BaseArtifact]]

the list of artifact type that the given datatypes is associated with

required
Source code in zenml/artifacts/type_registry.py
def register_integration(
    self, key: Type[Any], type_: Iterable[Type["BaseArtifact"]]
) -> None:
    """Method to register an integration within the registry.

    Args:
        key: any datatype
        type_: the list of artifact type that the given datatypes is
            associated with
    """
    self._artifact_types[key] = tuple(type_)