Skip to content

Deepchecks

zenml.integrations.deepchecks special

Deepchecks integration for ZenML.

The Deepchecks integration provides a way to validate your data in your pipelines. It includes a way to detect data anomalies and define checks to ensure quality of data.

The integration includes custom materializers to store and visualize Deepchecks SuiteResults.

DeepchecksIntegration (Integration)

Definition of Deepchecks integration for ZenML.

Source code in zenml/integrations/deepchecks/__init__.py
class DeepchecksIntegration(Integration):
    """Definition of [Deepchecks](https://github.com/deepchecks/deepchecks) integration for ZenML."""

    NAME = DEEPCHECKS
    REQUIREMENTS = [
        "deepchecks[vision]==0.8.0",
        "torchvision==0.14.0",
        "pandas<2.0.0",
    ]
    APT_PACKAGES = ["ffmpeg", "libsm6", "libxext6"]

    @staticmethod
    def activate() -> None:
        """Activate the Deepchecks integration."""
        from zenml.integrations.deepchecks import materializers  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Deepchecks integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.deepchecks.flavors import (
            DeepchecksDataValidatorFlavor,
        )

        return [DeepchecksDataValidatorFlavor]

activate() staticmethod

Activate the Deepchecks integration.

Source code in zenml/integrations/deepchecks/__init__.py
@staticmethod
def activate() -> None:
    """Activate the Deepchecks integration."""
    from zenml.integrations.deepchecks import materializers  # noqa

flavors() classmethod

Declare the stack component flavors for the Deepchecks integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Deepchecks integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.deepchecks.flavors import (
        DeepchecksDataValidatorFlavor,
    )

    return [DeepchecksDataValidatorFlavor]

data_validators special

Initialization of the Deepchecks data validator for ZenML.

deepchecks_data_validator

Implementation of the Deepchecks data validator.

DeepchecksDataValidator (BaseDataValidator)

Deepchecks data validator stack component.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidator(BaseDataValidator):
    """Deepchecks data validator stack component."""

    NAME: ClassVar[str] = "Deepchecks"
    FLAVOR: ClassVar[
        Type[BaseDataValidatorFlavor]
    ] = DeepchecksDataValidatorFlavor

    @staticmethod
    def _split_checks(
        check_list: Sequence[str],
    ) -> Tuple[Sequence[str], Sequence[str]]:
        """Split a list of check identifiers in two lists, one for tabular and one for computer vision checks.

        Args:
            check_list: A list of check identifiers.

        Returns:
            List of tabular check identifiers and list of computer vision
            check identifiers.
        """
        tabular_checks = list(
            filter(
                lambda check: DeepchecksValidationCheck.is_tabular_check(
                    check
                ),
                check_list,
            )
        )
        vision_checks = list(
            filter(
                lambda check: DeepchecksValidationCheck.is_vision_check(check),
                check_list,
            )
        )
        return tabular_checks, vision_checks

    @classmethod
    def _create_and_run_check_suite(
        cls,
        check_enum: Type[DeepchecksValidationCheck],
        reference_dataset: Union[pd.DataFrame, DataLoader[Any]],
        comparison_dataset: Optional[
            Union[pd.DataFrame, DataLoader[Any]]
        ] = None,
        model: Optional[Union[ClassifierMixin, Module]] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
    ) -> SuiteResult:
        """Create and run a Deepchecks check suite corresponding to the input parameters.

        This method contains generic logic common to all Deepchecks data
        validator methods that validates the input arguments and uses them to
        generate and run a Deepchecks check suite.

        Args:
            check_enum: ZenML enum type grouping together Deepchecks checks with
                the same characteristics. This is used to generate a default
                list of checks, if a custom list isn't provided via the
                `check_list` argument.
            reference_dataset: Primary (reference) dataset argument used during
                validation.
            comparison_dataset: Optional secondary (comparison) dataset argument
                used during comparison checks.
            model: Optional model argument used during validation.
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the list of Deepchecks checks to be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks tabular.Dataset or vision.VisionData constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.

        Returns:
            Deepchecks SuiteResult object with the Suite run results.

        Raises:
            TypeError: If the datasets, model and check list arguments combine
                data types and/or checks from different categories (tabular and
                computer vision).
        """
        # Detect what type of check to perform (tabular or computer vision) from
        # the dataset/model datatypes and the check list. At the same time,
        # validate the combination of data types used for dataset and model
        # arguments and the check list.
        is_tabular = False
        is_vision = False
        for dataset in [reference_dataset, comparison_dataset]:
            if dataset is None:
                continue
            if isinstance(dataset, pd.DataFrame):
                is_tabular = True
            elif isinstance(dataset, DataLoader):
                is_vision = True
            else:
                raise TypeError(
                    f"Unsupported dataset data type found: {type(dataset)}. "
                    f"Supported data types are {str(pd.DataFrame)} for tabular "
                    f"data and {str(DataLoader)} for computer vision data."
                )

        if model:
            if isinstance(model, ClassifierMixin):
                is_tabular = True
            elif isinstance(model, Module):
                is_vision = True
            else:
                raise TypeError(
                    f"Unsupported model data type found: {type(model)}. "
                    f"Supported data types are {str(ClassifierMixin)} for "
                    f"tabular data and {str(Module)} for computer vision "
                    f"data."
                )

        if is_tabular and is_vision:
            raise TypeError(
                f"Tabular and computer vision data types used for datasets and "
                f"models cannot be mixed. They must all belong to the same "
                f"category. Supported data types for tabular data are "
                f"{str(pd.DataFrame)} for datasets and {str(ClassifierMixin)} "
                f"for models. Supported data types for computer vision data "
                f"are {str(pd.DataFrame)} for datasets and and {str(Module)} "
                f"for models."
            )

        if not check_list:
            # default to executing all the checks listed in the supplied
            # checks enum type if a custom check list is not supplied
            tabular_checks, vision_checks = cls._split_checks(
                check_enum.values()
            )
            if is_tabular:
                check_list = tabular_checks
                vision_checks = []
            else:
                check_list = vision_checks
                tabular_checks = []
        else:
            tabular_checks, vision_checks = cls._split_checks(check_list)

        if tabular_checks and vision_checks:
            raise TypeError(
                f"The check list cannot mix tabular checks "
                f"({tabular_checks}) and computer vision checks ("
                f"{vision_checks})."
            )

        if is_tabular and vision_checks:
            raise TypeError(
                f"Tabular data types used for datasets and models can only "
                f"be used with tabular validation checks. The following "
                f"computer vision checks included in the check list are "
                f"not valid: {vision_checks}."
            )

        if is_vision and tabular_checks:
            raise TypeError(
                f"Computer vision data types used for datasets and models "
                f"can only be used with computer vision validation checks. "
                f"The following tabular checks included in the check list "
                f"are not valid: {tabular_checks}."
            )

        check_classes = map(
            lambda check: (
                check,
                check_enum.get_check_class(check),
            ),
            check_list,
        )

        # use the pipeline name and the step name to generate a unique suite
        # name
        try:
            # get pipeline name and step name
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
            suite_name = f"{step_env.pipeline_name}_{step_env.step_name}"
        except KeyError:
            # if not running inside a pipeline step, use random values
            suite_name = f"suite_{random_str(5)}"

        if is_tabular:
            dataset_class = TabularData
            suite_class = TabularSuite
            full_suite = full_tabular_suite()
        else:
            dataset_class = VisionData
            suite_class = VisionSuite
            full_suite = full_vision_suite()

        train_dataset = dataset_class(reference_dataset, **dataset_kwargs)
        test_dataset = None
        if comparison_dataset is not None:
            test_dataset = dataset_class(comparison_dataset, **dataset_kwargs)
        suite = suite_class(name=suite_name)

        # Some Deepchecks checks require a minimum configuration such as
        # conditions to be configured (see https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_configure_check_conditions.html#sphx-glr-user-guide-general-customizations-examples-plot-configure-check-conditions-py)
        # for their execution to have meaning. For checks that don't have
        # custom configuration attributes explicitly specified in the
        # `check_kwargs` input parameter, we use the default check
        # instances extracted from the full suite shipped with Deepchecks.
        default_checks = {
            check.__class__: check for check in full_suite.checks.values()
        }
        for check_name, check_class in check_classes:
            extra_kwargs = check_kwargs.get(check_name, {})
            default_check = default_checks.get(check_class)
            check: BaseCheck
            if extra_kwargs or not default_check:
                check = check_class(**check_kwargs)
            else:
                check = default_check

            # extract the condition kwargs from the check kwargs
            for arg_name, condition_kwargs in extra_kwargs.items():
                if not arg_name.startswith("condition_") or not isinstance(
                    condition_kwargs, dict
                ):
                    continue
                condition_method = getattr(check, f"add_{arg_name}", None)
                if not condition_method or not callable(condition_method):
                    logger.warning(
                        f"Deepchecks check type {check.__class__} has no "
                        f"condition named {arg_name}. Ignoring the check "
                        f"argument."
                    )
                    continue
                condition_method(**condition_kwargs)

            suite.add(check)
        return suite.run(
            train_dataset=train_dataset,
            test_dataset=test_dataset,
            model=model,
            **run_kwargs,
        )

    def data_validation(
        self,
        dataset: Union[pd.DataFrame, DataLoader[Any]],
        comparison_dataset: Optional[Any] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ) -> SuiteResult:
        """Run one or more Deepchecks data validation checks on a dataset.

        Call this method to analyze and identify potential integrity problems
        with a single dataset (e.g. missing values, conflicting labels, mixed
        data types etc.) and dataset comparison checks (e.g. data drift
        checks). Dataset comparison checks require that a second dataset be
        supplied via the `comparison_dataset` argument.

        The `check_list` argument may be used to specify a custom set of
        Deepchecks data integrity checks to perform, identified by
        `DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
        values. If omitted:

        * if the `comparison_dataset` is omitted, a suite with all available
        data integrity checks will be performed on the input data. See
        `DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
        checks that are compatible with this method.

        * if the `comparison_dataset` is supplied, a suite with all
        available data drift checks will be performed on the input
        data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
        builtin checks that are compatible with this method.

        Args:
            dataset: Target dataset to be validated.
            comparison_dataset: Optional second dataset to be used for data
                comparison checks (e.g data drift checks).
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the data validation checks to be performed.
                `DeepchecksDataIntegrityCheck` enum values should be used for
                single data validation checks and `DeepchecksDataDriftCheck`
                enum values for data comparison checks. If not supplied, the
                entire set of checks applicable to the input dataset(s)
                will be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.
            kwargs: Additional keyword arguments (unused).

        Returns:
            A Deepchecks SuiteResult with the results of the validation.
        """
        check_enum: Type[DeepchecksValidationCheck]
        if comparison_dataset is None:
            check_enum = DeepchecksDataIntegrityCheck
        else:
            check_enum = DeepchecksDataDriftCheck

        return self._create_and_run_check_suite(
            check_enum=check_enum,
            reference_dataset=dataset,
            comparison_dataset=comparison_dataset,
            check_list=check_list,
            dataset_kwargs=dataset_kwargs,
            check_kwargs=check_kwargs,
            run_kwargs=run_kwargs,
        )

    def model_validation(
        self,
        dataset: Union[pd.DataFrame, DataLoader[Any]],
        model: Union[ClassifierMixin, Module],
        comparison_dataset: Optional[Any] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ) -> Any:
        """Run one or more Deepchecks model validation checks.

        Call this method to perform model validation checks (e.g. confusion
        matrix validation, performance reports, model error analyses, etc).
        A second dataset is required for model performance comparison tests
        (i.e. tests that identify changes in a model behavior by comparing how
        it performs on two different datasets).

        The `check_list` argument may be used to specify a custom set of
        Deepchecks model validation checks to perform, identified by
        `DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
        values. If omitted:

            * if the `comparison_dataset` is omitted, a suite with all available
            model validation checks will be performed on the input data. See
            `DeepchecksModelValidationCheck` for a list of Deepchecks builtin
            checks that are compatible with this method.

            * if the `comparison_dataset` is supplied, a suite with all
            available model comparison checks will be performed on the input
            data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
            builtin checks that are compatible with this method.

        Args:
            dataset: Target dataset to be validated.
            model: Target model to be validated.
            comparison_dataset: Optional second dataset to be used for model
                comparison checks.
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the model validation checks to be performed.
                `DeepchecksModelValidationCheck` enum values should be used for
                model validation checks and `DeepchecksModelDriftCheck` enum
                values for model comparison checks. If not supplied, the
                entire set of checks applicable to the input dataset(s)
                will be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks tabular.Dataset or vision.VisionData constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.
            kwargs: Additional keyword arguments (unused).

        Returns:
            A Deepchecks SuiteResult with the results of the validation.
        """
        check_enum: Type[DeepchecksValidationCheck]
        if comparison_dataset is None:
            check_enum = DeepchecksModelValidationCheck
        else:
            check_enum = DeepchecksModelDriftCheck

        return self._create_and_run_check_suite(
            check_enum=check_enum,
            reference_dataset=dataset,
            comparison_dataset=comparison_dataset,
            model=model,
            check_list=check_list,
            dataset_kwargs=dataset_kwargs,
            check_kwargs=check_kwargs,
            run_kwargs=run_kwargs,
        )
FLAVOR (BaseDataValidatorFlavor)

Flavor of the Deepchecks data validator.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
    """Flavor of the Deepchecks data validator."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return DEEPCHECKS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/deepchecks.png"

    @property
    def implementation_class(self) -> Type["DeepchecksDataValidator"]:
        """Implementation class.

        Returns:
            The implementation class.
        """
        from zenml.integrations.deepchecks.data_validators import (
            DeepchecksDataValidator,
        )

        return DeepchecksDataValidator
docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[DeepchecksDataValidator] property readonly

Implementation class.

Returns:

Type Description
Type[DeepchecksDataValidator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

data_validation(self, dataset, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)

Run one or more Deepchecks data validation checks on a dataset.

Call this method to analyze and identify potential integrity problems with a single dataset (e.g. missing values, conflicting labels, mixed data types etc.) and dataset comparison checks (e.g. data drift checks). Dataset comparison checks require that a second dataset be supplied via the comparison_dataset argument.

The check_list argument may be used to specify a custom set of Deepchecks data integrity checks to perform, identified by DeepchecksDataIntegrityCheck and DeepchecksDataDriftCheck enum values. If omitted:

  • if the comparison_dataset is omitted, a suite with all available data integrity checks will be performed on the input data. See DeepchecksDataIntegrityCheck for a list of Deepchecks builtin checks that are compatible with this method.

  • if the comparison_dataset is supplied, a suite with all available data drift checks will be performed on the input data. See DeepchecksDataDriftCheck for a list of Deepchecks builtin checks that are compatible with this method.

Parameters:

Name Type Description Default
dataset Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]]

Target dataset to be validated.

required
comparison_dataset Optional[Any]

Optional second dataset to be used for data comparison checks (e.g data drift checks).

None
check_list Optional[Sequence[str]]

Optional list of ZenML Deepchecks check identifiers specifying the data validation checks to be performed. DeepchecksDataIntegrityCheck enum values should be used for single data validation checks and DeepchecksDataDriftCheck enum values for data comparison checks. If not supplied, the entire set of checks applicable to the input dataset(s) will be performed.

None
dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

{}
check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

{}
run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

{}
kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
SuiteResult

A Deepchecks SuiteResult with the results of the validation.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def data_validation(
    self,
    dataset: Union[pd.DataFrame, DataLoader[Any]],
    comparison_dataset: Optional[Any] = None,
    check_list: Optional[Sequence[str]] = None,
    dataset_kwargs: Dict[str, Any] = {},
    check_kwargs: Dict[str, Dict[str, Any]] = {},
    run_kwargs: Dict[str, Any] = {},
    **kwargs: Any,
) -> SuiteResult:
    """Run one or more Deepchecks data validation checks on a dataset.

    Call this method to analyze and identify potential integrity problems
    with a single dataset (e.g. missing values, conflicting labels, mixed
    data types etc.) and dataset comparison checks (e.g. data drift
    checks). Dataset comparison checks require that a second dataset be
    supplied via the `comparison_dataset` argument.

    The `check_list` argument may be used to specify a custom set of
    Deepchecks data integrity checks to perform, identified by
    `DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
    values. If omitted:

    * if the `comparison_dataset` is omitted, a suite with all available
    data integrity checks will be performed on the input data. See
    `DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
    checks that are compatible with this method.

    * if the `comparison_dataset` is supplied, a suite with all
    available data drift checks will be performed on the input
    data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
    builtin checks that are compatible with this method.

    Args:
        dataset: Target dataset to be validated.
        comparison_dataset: Optional second dataset to be used for data
            comparison checks (e.g data drift checks).
        check_list: Optional list of ZenML Deepchecks check identifiers
            specifying the data validation checks to be performed.
            `DeepchecksDataIntegrityCheck` enum values should be used for
            single data validation checks and `DeepchecksDataDriftCheck`
            enum values for data comparison checks. If not supplied, the
            entire set of checks applicable to the input dataset(s)
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
        kwargs: Additional keyword arguments (unused).

    Returns:
        A Deepchecks SuiteResult with the results of the validation.
    """
    check_enum: Type[DeepchecksValidationCheck]
    if comparison_dataset is None:
        check_enum = DeepchecksDataIntegrityCheck
    else:
        check_enum = DeepchecksDataDriftCheck

    return self._create_and_run_check_suite(
        check_enum=check_enum,
        reference_dataset=dataset,
        comparison_dataset=comparison_dataset,
        check_list=check_list,
        dataset_kwargs=dataset_kwargs,
        check_kwargs=check_kwargs,
        run_kwargs=run_kwargs,
    )
model_validation(self, dataset, model, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)

Run one or more Deepchecks model validation checks.

Call this method to perform model validation checks (e.g. confusion matrix validation, performance reports, model error analyses, etc). A second dataset is required for model performance comparison tests (i.e. tests that identify changes in a model behavior by comparing how it performs on two different datasets).

The check_list argument may be used to specify a custom set of Deepchecks model validation checks to perform, identified by DeepchecksModelValidationCheck and DeepchecksModelDriftCheck enum values. If omitted:

* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.

* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.

Parameters:

Name Type Description Default
dataset Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]]

Target dataset to be validated.

required
model Union[sklearn.base.ClassifierMixin, torch.nn.modules.module.Module]

Target model to be validated.

required
comparison_dataset Optional[Any]

Optional second dataset to be used for model comparison checks.

None
check_list Optional[Sequence[str]]

Optional list of ZenML Deepchecks check identifiers specifying the model validation checks to be performed. DeepchecksModelValidationCheck enum values should be used for model validation checks and DeepchecksModelDriftCheck enum values for model comparison checks. If not supplied, the entire set of checks applicable to the input dataset(s) will be performed.

None
dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

{}
check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

{}
run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

{}
kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
Any

A Deepchecks SuiteResult with the results of the validation.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def model_validation(
    self,
    dataset: Union[pd.DataFrame, DataLoader[Any]],
    model: Union[ClassifierMixin, Module],
    comparison_dataset: Optional[Any] = None,
    check_list: Optional[Sequence[str]] = None,
    dataset_kwargs: Dict[str, Any] = {},
    check_kwargs: Dict[str, Dict[str, Any]] = {},
    run_kwargs: Dict[str, Any] = {},
    **kwargs: Any,
) -> Any:
    """Run one or more Deepchecks model validation checks.

    Call this method to perform model validation checks (e.g. confusion
    matrix validation, performance reports, model error analyses, etc).
    A second dataset is required for model performance comparison tests
    (i.e. tests that identify changes in a model behavior by comparing how
    it performs on two different datasets).

    The `check_list` argument may be used to specify a custom set of
    Deepchecks model validation checks to perform, identified by
    `DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
    values. If omitted:

        * if the `comparison_dataset` is omitted, a suite with all available
        model validation checks will be performed on the input data. See
        `DeepchecksModelValidationCheck` for a list of Deepchecks builtin
        checks that are compatible with this method.

        * if the `comparison_dataset` is supplied, a suite with all
        available model comparison checks will be performed on the input
        data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
        builtin checks that are compatible with this method.

    Args:
        dataset: Target dataset to be validated.
        model: Target model to be validated.
        comparison_dataset: Optional second dataset to be used for model
            comparison checks.
        check_list: Optional list of ZenML Deepchecks check identifiers
            specifying the model validation checks to be performed.
            `DeepchecksModelValidationCheck` enum values should be used for
            model validation checks and `DeepchecksModelDriftCheck` enum
            values for model comparison checks. If not supplied, the
            entire set of checks applicable to the input dataset(s)
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks tabular.Dataset or vision.VisionData constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
        kwargs: Additional keyword arguments (unused).

    Returns:
        A Deepchecks SuiteResult with the results of the validation.
    """
    check_enum: Type[DeepchecksValidationCheck]
    if comparison_dataset is None:
        check_enum = DeepchecksModelValidationCheck
    else:
        check_enum = DeepchecksModelDriftCheck

    return self._create_and_run_check_suite(
        check_enum=check_enum,
        reference_dataset=dataset,
        comparison_dataset=comparison_dataset,
        model=model,
        check_list=check_list,
        dataset_kwargs=dataset_kwargs,
        check_kwargs=check_kwargs,
        run_kwargs=run_kwargs,
    )

flavors special

Deepchecks integration flavors.

deepchecks_data_validator_flavor

Deepchecks data validator flavor.

DeepchecksDataValidatorFlavor (BaseDataValidatorFlavor)

Flavor of the Deepchecks data validator.

Source code in zenml/integrations/deepchecks/flavors/deepchecks_data_validator_flavor.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
    """Flavor of the Deepchecks data validator."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return DEEPCHECKS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/deepchecks.png"

    @property
    def implementation_class(self) -> Type["DeepchecksDataValidator"]:
        """Implementation class.

        Returns:
            The implementation class.
        """
        from zenml.integrations.deepchecks.data_validators import (
            DeepchecksDataValidator,
        )

        return DeepchecksDataValidator
docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[DeepchecksDataValidator] property readonly

Implementation class.

Returns:

Type Description
Type[DeepchecksDataValidator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

materializers special

Deepchecks materializers.

deepchecks_dataset_materializer

Implementation of Deepchecks dataset materializer.

DeepchecksDatasetMaterializer (PandasMaterializer)

Materializer to read data to and from Deepchecks dataset.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
class DeepchecksDatasetMaterializer(PandasMaterializer):
    """Materializer to read data to and from Deepchecks dataset."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Dataset,)
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA

    def load(self, data_type: Type[Any]) -> Dataset:
        """Reads pandas dataframes and creates `deepchecks.Dataset` from it.

        Args:
            data_type: The type of the data to read.

        Returns:
            A Deepchecks Dataset.
        """
        df = super().load(data_type)
        return Dataset(df)

    def save(self, dataset: Dataset) -> None:
        """Serializes pandas dataframe within a `Dataset` object.

        Args:
            dataset: A deepchecks.Dataset object.
        """
        super().save(dataset.data)

    def save_visualizations(
        self, dataset: Dataset
    ) -> Dict[str, VisualizationType]:
        """Saves visualizations for the given Deepchecks dataset.

        Args:
            dataset: The Deepchecks dataset to save visualizations for.

        Returns:
            A dictionary of visualization URIs and their types.
        """
        return super().save_visualizations(dataset.data)

    def extract_metadata(self, dataset: Dataset) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given `Dataset` object.

        Args:
            dataset: The `Dataset` object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        return super().extract_metadata(dataset.data)
extract_metadata(self, dataset)

Extract metadata from the given Dataset object.

Parameters:

Name Type Description Default
dataset Dataset

The Dataset object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def extract_metadata(self, dataset: Dataset) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given `Dataset` object.

    Args:
        dataset: The `Dataset` object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    return super().extract_metadata(dataset.data)
load(self, data_type)

Reads pandas dataframes and creates deepchecks.Dataset from it.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
Dataset

A Deepchecks Dataset.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def load(self, data_type: Type[Any]) -> Dataset:
    """Reads pandas dataframes and creates `deepchecks.Dataset` from it.

    Args:
        data_type: The type of the data to read.

    Returns:
        A Deepchecks Dataset.
    """
    df = super().load(data_type)
    return Dataset(df)
save(self, dataset)

Serializes pandas dataframe within a Dataset object.

Parameters:

Name Type Description Default
dataset Dataset

A deepchecks.Dataset object.

required
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def save(self, dataset: Dataset) -> None:
    """Serializes pandas dataframe within a `Dataset` object.

    Args:
        dataset: A deepchecks.Dataset object.
    """
    super().save(dataset.data)
save_visualizations(self, dataset)

Saves visualizations for the given Deepchecks dataset.

Parameters:

Name Type Description Default
dataset Dataset

The Deepchecks dataset to save visualizations for.

required

Returns:

Type Description
Dict[str, zenml.enums.VisualizationType]

A dictionary of visualization URIs and their types.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def save_visualizations(
    self, dataset: Dataset
) -> Dict[str, VisualizationType]:
    """Saves visualizations for the given Deepchecks dataset.

    Args:
        dataset: The Deepchecks dataset to save visualizations for.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    return super().save_visualizations(dataset.data)

deepchecks_results_materializer

Implementation of Deepchecks suite results materializer.

DeepchecksResultMaterializer (BaseMaterializer)

Materializer to read data to and from CheckResult and SuiteResult objects.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
class DeepchecksResultMaterializer(BaseMaterializer):
    """Materializer to read data to and from CheckResult and SuiteResult objects."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (
        CheckResult,
        SuiteResult,
    )
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[
        ArtifactType
    ] = ArtifactType.DATA_ANALYSIS

    def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
        """Reads a Deepchecks check or suite result from a serialized JSON file.

        Args:
            data_type: The type of the data to read.

        Returns:
            A Deepchecks CheckResult or SuiteResult.

        Raises:
            RuntimeError: if the input data type is not supported.
        """
        filepath = os.path.join(self.uri, RESULTS_FILENAME)

        json_res = io_utils.read_file_contents_as_string(filepath)
        if data_type == SuiteResult:
            res = SuiteResult.from_json(json_res)
        elif data_type == CheckResult:
            res = CheckResult.from_json(json_res)
        else:
            raise RuntimeError(f"Unknown data type: {data_type}")
        return res

    def save(self, result: Union[CheckResult, SuiteResult]) -> None:
        """Creates a JSON serialization for a CheckResult or SuiteResult.

        Args:
            result: A Deepchecks CheckResult or SuiteResult.
        """
        filepath = os.path.join(self.uri, RESULTS_FILENAME)
        serialized_json = result.to_json(True)
        io_utils.write_file_contents_as_string(filepath, serialized_json)

    def save_visualizations(
        self, result: Union[CheckResult, SuiteResult]
    ) -> Dict[str, VisualizationType]:
        """Saves visualizations for the given Deepchecks result.

        Args:
            result: The Deepchecks result to save visualizations for.

        Returns:
            A dictionary of visualization URIs and their types.
        """
        visualization_path = os.path.join(self.uri, HTML_FILENAME)
        with fileio.open(visualization_path, "w") as f:
            result.save_as_html(f)
        return {visualization_path: VisualizationType.HTML}

    def extract_metadata(
        self, result: Union[CheckResult, SuiteResult]
    ) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given Deepchecks result.

        Args:
            result: The Deepchecks result to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        if isinstance(result, CheckResult):
            return {
                "deepchecks_check_name": result.get_header(),
                "deepchecks_check_passed": result.passed_conditions(),
            }
        elif isinstance(result, SuiteResult):
            return {
                "deepchecks_suite_name": result.name,
                "deepchecks_suite_passed": result.passed(),
            }
        return {}
extract_metadata(self, result)

Extract metadata from the given Deepchecks result.

Parameters:

Name Type Description Default
result Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

The Deepchecks result to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def extract_metadata(
    self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given Deepchecks result.

    Args:
        result: The Deepchecks result to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    if isinstance(result, CheckResult):
        return {
            "deepchecks_check_name": result.get_header(),
            "deepchecks_check_passed": result.passed_conditions(),
        }
    elif isinstance(result, SuiteResult):
        return {
            "deepchecks_suite_name": result.name,
            "deepchecks_suite_passed": result.passed(),
        }
    return {}
load(self, data_type)

Reads a Deepchecks check or suite result from a serialized JSON file.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

A Deepchecks CheckResult or SuiteResult.

Exceptions:

Type Description
RuntimeError

if the input data type is not supported.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
    """Reads a Deepchecks check or suite result from a serialized JSON file.

    Args:
        data_type: The type of the data to read.

    Returns:
        A Deepchecks CheckResult or SuiteResult.

    Raises:
        RuntimeError: if the input data type is not supported.
    """
    filepath = os.path.join(self.uri, RESULTS_FILENAME)

    json_res = io_utils.read_file_contents_as_string(filepath)
    if data_type == SuiteResult:
        res = SuiteResult.from_json(json_res)
    elif data_type == CheckResult:
        res = CheckResult.from_json(json_res)
    else:
        raise RuntimeError(f"Unknown data type: {data_type}")
    return res
save(self, result)

Creates a JSON serialization for a CheckResult or SuiteResult.

Parameters:

Name Type Description Default
result Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

A Deepchecks CheckResult or SuiteResult.

required
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def save(self, result: Union[CheckResult, SuiteResult]) -> None:
    """Creates a JSON serialization for a CheckResult or SuiteResult.

    Args:
        result: A Deepchecks CheckResult or SuiteResult.
    """
    filepath = os.path.join(self.uri, RESULTS_FILENAME)
    serialized_json = result.to_json(True)
    io_utils.write_file_contents_as_string(filepath, serialized_json)
save_visualizations(self, result)

Saves visualizations for the given Deepchecks result.

Parameters:

Name Type Description Default
result Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

The Deepchecks result to save visualizations for.

required

Returns:

Type Description
Dict[str, zenml.enums.VisualizationType]

A dictionary of visualization URIs and their types.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def save_visualizations(
    self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, VisualizationType]:
    """Saves visualizations for the given Deepchecks result.

    Args:
        result: The Deepchecks result to save visualizations for.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    visualization_path = os.path.join(self.uri, HTML_FILENAME)
    with fileio.open(visualization_path, "w") as f:
        result.save_as_html(f)
    return {visualization_path: VisualizationType.HTML}

steps special

Initialization of the Deepchecks Standard Steps.

deepchecks_data_drift

Implementation of the Deepchecks data drift validation step.

deepchecks_data_integrity

Implementation of the Deepchecks data integrity validation step.

deepchecks_model_drift

Implementation of the Deepchecks model drift validation step.

deepchecks_model_validation

Implementation of the Deepchecks model validation validation step.

validation_checks

Definition of the Deepchecks validation check types.

DeepchecksDataDriftCheck (DeepchecksValidationCheck)

Categories of Deepchecks data drift checks.

This list reflects the set of train-test validation checks provided by Deepchecks:

All these checks inherit from deepchecks.tabular.TrainTestCheck or deepchecks.vision.TrainTestCheck and require two datasets as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataDriftCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks data drift checks.

    This list reflects the set of train-test validation checks provided by
    Deepchecks:

      * [for tabular data](https://docs.deepchecks.com/stable/checks_gallery/tabular.html#train-test-validation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#train-test-validation)

    All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
    `deepchecks.vision.TrainTestCheck` and require two datasets as input.
    """

    TABULAR_CATEGORY_MISMATCH_TRAIN_TEST = source_utils.resolve(
        tabular_checks.CategoryMismatchTrainTest
    ).import_path
    TABULAR_DATASET_SIZE_COMPARISON = source_utils.resolve(
        tabular_checks.DatasetsSizeComparison
    ).import_path
    TABULAR_DATE_TRAIN_TEST_LEAKAGE_DUPLICATES = source_utils.resolve(
        tabular_checks.DateTrainTestLeakageDuplicates
    ).import_path
    TABULAR_DATE_TRAIN_TEST_LEAKAGE_OVERLAP = source_utils.resolve(
        tabular_checks.DateTrainTestLeakageOverlap
    ).import_path
    TABULAR_DOMINANT_FREQUENCY_CHANGE = source_utils.resolve(
        tabular_checks.DominantFrequencyChange
    ).import_path
    TABULAR_FEATURE_LABEL_CORRELATION_CHANGE = source_utils.resolve(
        tabular_checks.FeatureLabelCorrelationChange
    ).import_path
    TABULAR_INDEX_LEAKAGE = source_utils.resolve(
        tabular_checks.IndexTrainTestLeakage
    ).import_path
    TABULAR_NEW_LABEL_TRAIN_TEST = source_utils.resolve(
        tabular_checks.NewLabelTrainTest
    ).import_path
    TABULAR_STRING_MISMATCH_COMPARISON = source_utils.resolve(
        tabular_checks.StringMismatchComparison
    ).import_path
    TABULAR_TRAIN_TEST_FEATURE_DRIFT = source_utils.resolve(
        tabular_checks.TrainTestFeatureDrift
    ).import_path
    TABULAR_TRAIN_TEST_LABEL_DRIFT = source_utils.resolve(
        tabular_checks.TrainTestLabelDrift
    ).import_path
    TABULAR_TRAIN_TEST_SAMPLES_MIX = source_utils.resolve(
        tabular_checks.TrainTestSamplesMix
    ).import_path
    TABULAR_WHOLE_DATASET_DRIFT = source_utils.resolve(
        tabular_checks.WholeDatasetDrift
    ).import_path

    VISION_FEATURE_LABEL_CORRELATION_CHANGE = source_utils.resolve(
        vision_checks.FeatureLabelCorrelationChange
    ).import_path
    VISION_HEATMAP_COMPARISON = source_utils.resolve(
        vision_checks.HeatmapComparison
    ).import_path
    VISION_IMAGE_DATASET_DRIFT = source_utils.resolve(
        vision_checks.ImageDatasetDrift
    ).import_path
    VISION_IMAGE_PROPERTY_DRIFT = source_utils.resolve(
        vision_checks.ImagePropertyDrift
    ).import_path
    VISION_NEW_LABELS = source_utils.resolve(
        vision_checks.NewLabels
    ).import_path
    VISION_SIMILAR_IMAGE_LEAKAGE = source_utils.resolve(
        vision_checks.SimilarImageLeakage
    ).import_path
    VISION_TRAIN_TEST_LABEL_DRIFT = source_utils.resolve(
        vision_checks.TrainTestLabelDrift
    ).import_path

DeepchecksDataIntegrityCheck (DeepchecksValidationCheck)

Categories of Deepchecks data integrity checks.

This list reflects the set of data integrity checks provided by Deepchecks:

All these checks inherit from deepchecks.tabular.SingleDatasetCheck or deepchecks.vision.SingleDatasetCheck and require a single dataset as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataIntegrityCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks data integrity checks.

    This list reflects the set of data integrity checks provided by Deepchecks:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#data-integrity)
      * [for computer vision](https://docs.deepchecks.com/en/stable/checks_gallery/vision.html#data-integrity)

    All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
    `deepchecks.vision.SingleDatasetCheck` and require a single dataset as input.
    """

    TABULAR_COLUMNS_INFO = source_utils.resolve(
        tabular_checks.ColumnsInfo
    ).import_path
    TABULAR_CONFLICTING_LABELS = source_utils.resolve(
        tabular_checks.ConflictingLabels
    ).import_path

    TABULAR_DATA_DUPLICATES = source_utils.resolve(
        tabular_checks.DataDuplicates
    ).import_path
    TABULAR_FEATURE_FEATURE_CORRELATION = source_utils.resolve(
        FeatureFeatureCorrelation
    ).import_path
    TABULAR_FEATURE_LABEL_CORRELATION = source_utils.resolve(
        tabular_checks.FeatureLabelCorrelation
    ).import_path
    TABULAR_IDENTIFIER_LEAKAGE = source_utils.resolve(
        tabular_checks.IdentifierLeakage
    ).import_path
    TABULAR_IS_SINGLE_VALUE = source_utils.resolve(
        tabular_checks.IsSingleValue
    ).import_path
    TABULAR_MIXED_DATA_TYPES = source_utils.resolve(
        tabular_checks.MixedDataTypes
    ).import_path
    TABULAR_MIXED_NULLS = source_utils.resolve(
        tabular_checks.MixedNulls
    ).import_path
    TABULAR_OUTLIER_SAMPLE_DETECTION = source_utils.resolve(
        tabular_checks.OutlierSampleDetection
    ).import_path
    TABULAR_SPECIAL_CHARS = source_utils.resolve(
        tabular_checks.SpecialCharacters
    ).import_path
    TABULAR_STRING_LENGTH_OUT_OF_BOUNDS = source_utils.resolve(
        tabular_checks.StringLengthOutOfBounds
    ).import_path
    TABULAR_STRING_MISMATCH = source_utils.resolve(
        tabular_checks.StringMismatch
    ).import_path

    VISION_IMAGE_PROPERTY_OUTLIERS = source_utils.resolve(
        vision_checks.ImagePropertyOutliers
    ).import_path
    VISION_LABEL_PROPERTY_OUTLIERS = source_utils.resolve(
        vision_checks.LabelPropertyOutliers
    ).import_path

DeepchecksModelDriftCheck (DeepchecksValidationCheck)

Categories of Deepchecks model drift checks.

This list includes a subset of the model evaluation checks provided by Deepchecks that require two datasets and a mandatory model as input:

All these checks inherit from deepchecks.tabular.TrainTestCheck or deepchecks.vision.TrainTestCheck and require two datasets and a mandatory model as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelDriftCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks model drift checks.

    This list includes a subset of the model evaluation checks provided by
    Deepchecks that require two datasets and a mandatory model as input:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)

    All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
    `deepchecks.vision.TrainTestCheck` and require two datasets and a mandatory
    model as input.
    """

    TABULAR_BOOSTING_OVERFIT = source_utils.resolve(
        tabular_checks.BoostingOverfit
    ).import_path
    TABULAR_MODEL_ERROR_ANALYSIS = source_utils.resolve(
        tabular_checks.ModelErrorAnalysis
    ).import_path
    TABULAR_PERFORMANCE_REPORT = source_utils.resolve(
        tabular_checks.PerformanceReport
    ).import_path
    TABULAR_SIMPLE_MODEL_COMPARISON = source_utils.resolve(
        tabular_checks.SimpleModelComparison
    ).import_path
    TABULAR_TRAIN_TEST_PREDICTION_DRIFT = source_utils.resolve(
        tabular_checks.TrainTestPredictionDrift
    ).import_path
    TABULAR_UNUSED_FEATURES = source_utils.resolve(
        tabular_checks.UnusedFeatures
    ).import_path

    VISION_CLASS_PERFORMANCE = source_utils.resolve(
        vision_checks.ClassPerformance
    ).import_path
    VISION_MODEL_ERROR_ANALYSIS = source_utils.resolve(
        vision_checks.ModelErrorAnalysis
    ).import_path
    VISION_SIMPLE_MODEL_COMPARISON = source_utils.resolve(
        vision_checks.SimpleModelComparison
    ).import_path
    VISION_TRAIN_TEST_PREDICTION_DRIFT = source_utils.resolve(
        vision_checks.TrainTestPredictionDrift
    ).import_path

DeepchecksModelValidationCheck (DeepchecksValidationCheck)

Categories of Deepchecks model validation checks.

This list includes a subset of the model evaluation checks provided by Deepchecks that require a single dataset and a mandatory model as input:

All these checks inherit from deepchecks.tabular.SingleDatasetCheck or `deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory model as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelValidationCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks model validation checks.

    This list includes a subset of the model evaluation checks provided by
    Deepchecks that require a single dataset and a mandatory model as input:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)

    All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
    `deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
    model as input.
    """

    TABULAR_CALIBRATION_SCORE = source_utils.resolve(
        tabular_checks.CalibrationScore
    ).import_path
    TABULAR_CONFUSION_MATRIX_REPORT = source_utils.resolve(
        tabular_checks.ConfusionMatrixReport
    ).import_path
    TABULAR_MODEL_INFERENCE_TIME = source_utils.resolve(
        tabular_checks.ModelInferenceTime
    ).import_path
    TABULAR_REGRESSION_ERROR_DISTRIBUTION = source_utils.resolve(
        tabular_checks.RegressionErrorDistribution
    ).import_path
    TABULAR_REGRESSION_SYSTEMATIC_ERROR = source_utils.resolve(
        tabular_checks.RegressionSystematicError
    ).import_path
    TABULAR_ROC_REPORT = source_utils.resolve(
        tabular_checks.RocReport
    ).import_path
    TABULAR_SEGMENT_PERFORMANCE = source_utils.resolve(
        tabular_checks.SegmentPerformance
    ).import_path

    VISION_CONFUSION_MATRIX_REPORT = source_utils.resolve(
        vision_checks.ConfusionMatrixReport
    ).import_path
    VISION_IMAGE_SEGMENT_PERFORMANCE = source_utils.resolve(
        vision_checks.ImageSegmentPerformance
    ).import_path
    VISION_MEAN_AVERAGE_PRECISION_REPORT = source_utils.resolve(
        vision_checks.MeanAveragePrecisionReport
    ).import_path
    VISION_MEAN_AVERAGE_RECALL_REPORT = source_utils.resolve(
        vision_checks.MeanAverageRecallReport
    ).import_path
    VISION_ROBUSTNESS_REPORT = source_utils.resolve(
        vision_checks.RobustnessReport
    ).import_path
    VISION_SINGLE_DATASET_SCALAR_PERFORMANCE = source_utils.resolve(
        vision_checks.SingleDatasetScalarPerformance
    ).import_path

DeepchecksValidationCheck (StrEnum)

Base class for all Deepchecks categories of validation checks.

This base class defines some conventions used for all enum values used to identify the various validation checks that can be performed with Deepchecks:

  • enum values represent fully formed class paths pointing to Deepchecks BaseCheck subclasses
  • all tabular data checks are located under the deepchecks.tabular.checks module sub-tree
  • all computer vision data checks are located under the deepchecks.vision.checks module sub-tree
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksValidationCheck(StrEnum):
    """Base class for all Deepchecks categories of validation checks.

    This base class defines some conventions used for all enum values used to
    identify the various validation checks that can be performed with
    Deepchecks:

      * enum values represent fully formed class paths pointing to Deepchecks
      BaseCheck subclasses
      * all tabular data checks are located under the
      `deepchecks.tabular.checks` module sub-tree
      * all computer vision data checks are located under the
      `deepchecks.vision.checks` module sub-tree
    """

    @classmethod
    def validate_check_name(cls, check_name: str) -> None:
        """Validate a Deepchecks check identifier.

        Args:
            check_name: Identifies a builtin Deepchecks check. The identifier
                must be formatted as `deepchecks.{tabular|vision}.checks.<...>.<class-name>`.

        Raises:
            ValueError: If the check identifier does not follow the convention
                used by ZenML to identify Deepchecks builtin checks.
        """
        if not re.match(
            r"^deepchecks\.(tabular|vision)\.checks\.",
            check_name,
        ):
            raise ValueError(
                f"The supplied Deepcheck check identifier does not follow the "
                f"convention used by ZenML: `{check_name}`. The identifier "
                f"must be formatted as `deepchecks.<tabular|vision>.checks...` "
                f"and must be resolvable to a valid Deepchecks BaseCheck "
                f"subclass."
            )

    @classmethod
    def is_tabular_check(cls, check_name: str) -> bool:
        """Check if a validation check is applicable to tabular data.

        Args:
            check_name: Identifies a builtin Deepchecks check.

        Returns:
            True if the check is applicable to tabular data, otherwise False.
        """
        cls.validate_check_name(check_name)
        return check_name.startswith("deepchecks.tabular.")

    @classmethod
    def is_vision_check(cls, check_name: str) -> bool:
        """Check if a validation check is applicable to computer vision data.

        Args:
            check_name: Identifies a builtin Deepchecks check.

        Returns:
            True if the check is applicable to compute vision data, otherwise
            False.
        """
        cls.validate_check_name(check_name)
        return check_name.startswith("deepchecks.vision.")

    @classmethod
    def get_check_class(cls, check_name: str) -> Type[BaseCheck]:
        """Get the Deepchecks check class associated with an enum value or a custom check name.

        Args:
            check_name: Identifies a builtin Deepchecks check. The identifier
                must be formatted as `deepchecks.{tabular|vision}.checks.<class-name>`
                and must be resolvable to a valid Deepchecks BaseCheck class.

        Returns:
            The Deepchecks check class associated with this enum value.

        Raises:
            ValueError: If the check name could not be converted to a valid
                Deepchecks check class. This can happen for example if the enum
                values fall out of sync with the Deepchecks code base or if a
                custom check name is supplied that cannot be resolved to a valid
                Deepchecks BaseCheck class.
        """
        cls.validate_check_name(check_name)

        try:
            check_class: Type[
                BaseCheck
            ] = source_utils.load_and_validate_class(
                check_name, expected_class=BaseCheck
            )
        except AttributeError:
            raise ValueError(
                f"Could not map the `{check_name}` check identifier to a valid "
                f"Deepchecks check class."
            )

        if check_name not in cls.values():
            logger.warning(
                f"You are using a custom Deepchecks check identifier that is "
                f"not listed in the `{str(cls)}` enum type. This could lead "
                f"to unexpected behavior."
            )

        return check_class

    @property
    def check_class(self) -> Type[BaseCheck]:
        """Convert the enum value to a valid Deepchecks check class.

        Returns:
            The Deepchecks check class associated with the enum value.
        """
        return self.get_check_class(self.value)