Deepchecks
zenml.integrations.deepchecks
special
Deepchecks integration for ZenML.
The Deepchecks integration provides a way to validate your data in your pipelines. It includes a way to detect data anomalies and define checks to ensure quality of data.
The integration includes custom materializers to store Deepchecks SuiteResults
and
a visualizer to visualize the results in an easy way on a notebook and in your
browser.
DeepchecksIntegration (Integration)
Definition of Deepchecks integration for ZenML.
Source code in zenml/integrations/deepchecks/__init__.py
class DeepchecksIntegration(Integration):
"""Definition of [Deepchecks](https://github.com/deepchecks/deepchecks) integration for ZenML."""
NAME = DEEPCHECKS
REQUIREMENTS = ["deepchecks[vision]==0.8.0", "torchvision==0.11.2"]
APT_PACKAGES = ["ffmpeg", "libsm6", "libxext6"]
@staticmethod
def activate() -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.deepchecks import materializers # noqa
from zenml.integrations.deepchecks import visualizers # noqa
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Deepchecks integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.deepchecks.flavors import (
DeepchecksDataValidatorFlavor,
)
return [DeepchecksDataValidatorFlavor]
activate()
staticmethod
Activate the Deepchecks integration.
Source code in zenml/integrations/deepchecks/__init__.py
@staticmethod
def activate() -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.deepchecks import materializers # noqa
from zenml.integrations.deepchecks import visualizers # noqa
flavors()
classmethod
Declare the stack component flavors for the Deepchecks integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Deepchecks integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.deepchecks.flavors import (
DeepchecksDataValidatorFlavor,
)
return [DeepchecksDataValidatorFlavor]
data_validators
special
Initialization of the Deepchecks data validator for ZenML.
deepchecks_data_validator
Implementation of the Deepchecks data validator.
DeepchecksDataValidator (BaseDataValidator)
Deepchecks data validator stack component.
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidator(BaseDataValidator):
"""Deepchecks data validator stack component."""
NAME: ClassVar[str] = "Deepchecks"
FLAVOR: ClassVar[
Type[BaseDataValidatorFlavor]
] = DeepchecksDataValidatorFlavor
@staticmethod
def _split_checks(
check_list: Sequence[str],
) -> Tuple[Sequence[str], Sequence[str]]:
"""Split a list of check identifiers in two lists, one for tabular and one for computer vision checks.
Args:
check_list: A list of check identifiers.
Returns:
List of tabular check identifiers and list of computer vision
check identifiers.
"""
tabular_checks = list(
filter(
lambda check: DeepchecksValidationCheck.is_tabular_check(check),
check_list,
)
)
vision_checks = list(
filter(
lambda check: DeepchecksValidationCheck.is_vision_check(check),
check_list,
)
)
return tabular_checks, vision_checks
# flake8: noqa: C901
@classmethod
def _create_and_run_check_suite(
cls,
check_enum: Type[DeepchecksValidationCheck],
reference_dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[
Union[pd.DataFrame, DataLoader[Any]]
] = None,
model: Optional[Union[ClassifierMixin, Module]] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
) -> SuiteResult:
"""Create and run a Deepchecks check suite corresponding to the input parameters.
This method contains generic logic common to all Deepchecks data
validator methods that validates the input arguments and uses them to
generate and run a Deepchecks check suite.
Args:
check_enum: ZenML enum type grouping together Deepchecks checks with
the same characteristics. This is used to generate a default
list of checks, if a custom list isn't provided via the
`check_list` argument.
reference_dataset: Primary (reference) dataset argument used during
validation.
comparison_dataset: Optional secondary (comparison) dataset argument
used during comparison checks.
model: Optional model argument used during validation.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the list of Deepchecks checks to be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
Returns:
Deepchecks SuiteResult object with the Suite run results.
Raises:
TypeError: If the datasets, model and check list arguments combine
data types and/or checks from different categories (tabular and
computer vision).
"""
# Detect what type of check to perform (tabular or computer vision) from
# the dataset/model datatypes and the check list. At the same time,
# validate the combination of data types used for dataset and model
# arguments and the check list.
is_tabular = False
is_vision = False
for dataset in [reference_dataset, comparison_dataset]:
if dataset is None:
continue
if isinstance(dataset, pd.DataFrame):
is_tabular = True
elif isinstance(dataset, DataLoader):
is_vision = True
else:
raise TypeError(
f"Unsupported dataset data type found: {type(dataset)}. "
f"Supported data types are {str(pd.DataFrame)} for tabular "
f"data and {str(DataLoader)} for computer vision data."
)
if model:
if isinstance(model, ClassifierMixin):
is_tabular = True
elif isinstance(model, Module):
is_vision = True
else:
raise TypeError(
f"Unsupported model data type found: {type(model)}. "
f"Supported data types are {str(ClassifierMixin)} for "
f"tabular data and {str(Module)} for computer vision "
f"data."
)
if is_tabular and is_vision:
raise TypeError(
f"Tabular and computer vision data types used for datasets and "
f"models cannot be mixed. They must all belong to the same "
f"category. Supported data types for tabular data are "
f"{str(pd.DataFrame)} for datasets and {str(ClassifierMixin)} "
f"for models. Supported data types for computer vision data "
f"are {str(pd.DataFrame)} for datasets and and {str(Module)} "
f"for models."
)
if not check_list:
# default to executing all the checks listed in the supplied
# checks enum type if a custom check list is not supplied
tabular_checks, vision_checks = cls._split_checks(
check_enum.values()
)
if is_tabular:
check_list = tabular_checks
vision_checks = []
else:
check_list = vision_checks
tabular_checks = []
else:
tabular_checks, vision_checks = cls._split_checks(check_list)
if tabular_checks and vision_checks:
raise TypeError(
f"The check list cannot mix tabular checks "
f"({tabular_checks}) and computer vision checks ("
f"{vision_checks})."
)
if is_tabular and vision_checks:
raise TypeError(
f"Tabular data types used for datasets and models can only "
f"be used with tabular validation checks. The following "
f"computer vision checks included in the check list are "
f"not valid: {vision_checks}."
)
if is_vision and tabular_checks:
raise TypeError(
f"Computer vision data types used for datasets and models "
f"can only be used with computer vision validation checks. "
f"The following tabular checks included in the check list "
f"are not valid: {tabular_checks}."
)
check_classes = map(
lambda check: (
check,
check_enum.get_check_class(check),
),
check_list,
)
# use the pipeline name and the step name to generate a unique suite
# name
try:
# get pipeline name and step name
step_env = cast(
StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
)
suite_name = f"{step_env.pipeline_name}_{step_env.step_name}"
except KeyError:
# if not running inside a pipeline step, use random values
suite_name = f"suite_{random_str(5)}"
if is_tabular:
dataset_class = TabularData
suite_class = TabularSuite
full_suite = full_tabular_suite()
else:
dataset_class = VisionData
suite_class = VisionSuite
full_suite = full_vision_suite()
train_dataset = dataset_class(reference_dataset, **dataset_kwargs)
test_dataset = None
if comparison_dataset is not None:
test_dataset = dataset_class(comparison_dataset, **dataset_kwargs)
suite = suite_class(name=suite_name)
# Some Deepchecks checks require a minimum configuration such as
# conditions to be configured (see https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_configure_check_conditions.html#sphx-glr-user-guide-general-customizations-examples-plot-configure-check-conditions-py)
# for their execution to have meaning. For checks that don't have
# custom configuration attributes explicitly specified in the
# `check_kwargs` input parameter, we use the default check
# instances extracted from the full suite shipped with Deepchecks.
default_checks = {
check.__class__: check for check in full_suite.checks.values()
}
for check_name, check_class in check_classes:
extra_kwargs = check_kwargs.get(check_name, {})
default_check = default_checks.get(check_class)
check: BaseCheck
if extra_kwargs or not default_check:
check = check_class(**check_kwargs)
else:
check = default_check
# extract the condition kwargs from the check kwargs
for arg_name, condition_kwargs in extra_kwargs.items():
if not arg_name.startswith("condition_") or not isinstance(
condition_kwargs, dict
):
continue
condition_method = getattr(check, f"add_{arg_name}", None)
if not condition_method or not callable(condition_method):
logger.warning(
f"Deepchecks check type {check.__class__} has no "
f"condition named {arg_name}. Ignoring the check "
f"argument."
)
continue
condition_method(**condition_kwargs)
suite.add(check)
return suite.run(
train_dataset=train_dataset,
test_dataset=test_dataset,
model=model,
**run_kwargs,
)
def data_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> SuiteResult:
"""Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the `comparison_dataset` argument.
The `check_list` argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
`DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
data integrity checks will be performed on the input data. See
`DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available data drift checks will be performed on the input
data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
comparison_dataset: Optional second dataset to be used for data
comparison checks (e.g data drift checks).
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
`DeepchecksDataIntegrityCheck` enum values should be used for
single data validation checks and `DeepchecksDataDriftCheck`
enum values for data comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksDataIntegrityCheck
else:
check_enum = DeepchecksDataDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
def model_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
model: Union[ClassifierMixin, Module],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> Any:
"""Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion
matrix validation, performance reports, model error analyses, etc).
A second dataset is required for model performance comparison tests
(i.e. tests that identify changes in a model behavior by comparing how
it performs on two different datasets).
The `check_list` argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
`DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
model: Target model to be validated.
comparison_dataset: Optional second dataset to be used for model
comparison checks.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
`DeepchecksModelValidationCheck` enum values should be used for
model validation checks and `DeepchecksModelDriftCheck` enum
values for model comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksModelValidationCheck
else:
check_enum = DeepchecksModelDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
model=model,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
FLAVOR (BaseDataValidatorFlavor)
Flavor of the Deepchecks data validator.
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
"""Flavor of the Deepchecks data validator."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return DEEPCHECKS_DATA_VALIDATOR_FLAVOR
@property
def implementation_class(self) -> Type["DeepchecksDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.deepchecks.data_validators import (
DeepchecksDataValidator,
)
return DeepchecksDataValidator
implementation_class: Type[DeepchecksDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[DeepchecksDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
data_validation(self, dataset, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)
Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the comparison_dataset
argument.
The check_list
argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
DeepchecksDataIntegrityCheck
and DeepchecksDataDriftCheck
enum
values. If omitted:
-
if the
comparison_dataset
is omitted, a suite with all available data integrity checks will be performed on the input data. SeeDeepchecksDataIntegrityCheck
for a list of Deepchecks builtin checks that are compatible with this method. -
if the
comparison_dataset
is supplied, a suite with all available data drift checks will be performed on the input data. SeeDeepchecksDataDriftCheck
for a list of Deepchecks builtin checks that are compatible with this method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]] |
Target dataset to be validated. |
required |
comparison_dataset |
Optional[Any] |
Optional second dataset to be used for data comparison checks (e.g data drift checks). |
None |
check_list |
Optional[Sequence[str]] |
Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
|
None |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
{} |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
{} |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
{} |
kwargs |
Any |
Additional keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
SuiteResult |
A Deepchecks SuiteResult with the results of the validation. |
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def data_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> SuiteResult:
"""Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the `comparison_dataset` argument.
The `check_list` argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
`DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
data integrity checks will be performed on the input data. See
`DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available data drift checks will be performed on the input
data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
comparison_dataset: Optional second dataset to be used for data
comparison checks (e.g data drift checks).
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
`DeepchecksDataIntegrityCheck` enum values should be used for
single data validation checks and `DeepchecksDataDriftCheck`
enum values for data comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksDataIntegrityCheck
else:
check_enum = DeepchecksDataDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
model_validation(self, dataset, model, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)
Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion matrix validation, performance reports, model error analyses, etc). A second dataset is required for model performance comparison tests (i.e. tests that identify changes in a model behavior by comparing how it performs on two different datasets).
The check_list
argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
DeepchecksModelValidationCheck
and DeepchecksModelDriftCheck
enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]] |
Target dataset to be validated. |
required |
model |
Union[sklearn.base.ClassifierMixin, torch.nn.modules.module.Module] |
Target model to be validated. |
required |
comparison_dataset |
Optional[Any] |
Optional second dataset to be used for model comparison checks. |
None |
check_list |
Optional[Sequence[str]] |
Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
|
None |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor. |
{} |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
{} |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
{} |
kwargs |
Any |
Additional keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
Any |
A Deepchecks SuiteResult with the results of the validation. |
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def model_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
model: Union[ClassifierMixin, Module],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> Any:
"""Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion
matrix validation, performance reports, model error analyses, etc).
A second dataset is required for model performance comparison tests
(i.e. tests that identify changes in a model behavior by comparing how
it performs on two different datasets).
The `check_list` argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
`DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
model: Target model to be validated.
comparison_dataset: Optional second dataset to be used for model
comparison checks.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
`DeepchecksModelValidationCheck` enum values should be used for
model validation checks and `DeepchecksModelDriftCheck` enum
values for model comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksModelValidationCheck
else:
check_enum = DeepchecksModelDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
model=model,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
flavors
special
Deepchecks integration flavors.
deepchecks_data_validator_flavor
Deepchecks data validator flavor.
DeepchecksDataValidatorFlavor (BaseDataValidatorFlavor)
Flavor of the Deepchecks data validator.
Source code in zenml/integrations/deepchecks/flavors/deepchecks_data_validator_flavor.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
"""Flavor of the Deepchecks data validator."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return DEEPCHECKS_DATA_VALIDATOR_FLAVOR
@property
def implementation_class(self) -> Type["DeepchecksDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.deepchecks.data_validators import (
DeepchecksDataValidator,
)
return DeepchecksDataValidator
implementation_class: Type[DeepchecksDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[DeepchecksDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
materializers
special
Deepchecks materializers.
deepchecks_dataset_materializer
Implementation of Deepchecks dataset materializer.
DeepchecksDatasetMaterializer (BaseMaterializer)
Materializer to read data to and from Deepchecks dataset.
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
class DeepchecksDatasetMaterializer(BaseMaterializer):
"""Materializer to read data to and from Deepchecks dataset."""
ASSOCIATED_TYPES = (Dataset,)
ASSOCIATED_ARTIFACT_TYPES = (DataArtifact,)
def handle_input(self, data_type: Type[Any]) -> Dataset:
"""Reads pandas dataframes and creates deepchecks.Dataset from it.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks Dataset.
"""
super().handle_input(data_type)
# Outsource to pandas
pandas_materializer = PandasMaterializer(self.artifact)
df = pandas_materializer.handle_input(data_type)
# Recreate from pandas dataframe
return Dataset(df)
def handle_return(self, df: Dataset) -> None:
"""Serializes pandas dataframe within a Dataset object.
Args:
df: A deepchecks.Dataset object.
"""
super().handle_return(df)
# Outsource to pandas
pandas_materializer = PandasMaterializer(self.artifact)
pandas_materializer.handle_return(df.data)
handle_input(self, data_type)
Reads pandas dataframes and creates deepchecks.Dataset from it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Dataset |
A Deepchecks Dataset. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def handle_input(self, data_type: Type[Any]) -> Dataset:
"""Reads pandas dataframes and creates deepchecks.Dataset from it.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks Dataset.
"""
super().handle_input(data_type)
# Outsource to pandas
pandas_materializer = PandasMaterializer(self.artifact)
df = pandas_materializer.handle_input(data_type)
# Recreate from pandas dataframe
return Dataset(df)
handle_return(self, df)
Serializes pandas dataframe within a Dataset object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
df |
Dataset |
A deepchecks.Dataset object. |
required |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def handle_return(self, df: Dataset) -> None:
"""Serializes pandas dataframe within a Dataset object.
Args:
df: A deepchecks.Dataset object.
"""
super().handle_return(df)
# Outsource to pandas
pandas_materializer = PandasMaterializer(self.artifact)
pandas_materializer.handle_return(df.data)
deepchecks_results_materializer
Implementation of Deepchecks suite results materializer.
DeepchecksResultMaterializer (BaseMaterializer)
Materializer to read data to and from CheckResult and SuiteResult objects.
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
class DeepchecksResultMaterializer(BaseMaterializer):
"""Materializer to read data to and from CheckResult and SuiteResult objects."""
ASSOCIATED_TYPES = (
CheckResult,
SuiteResult,
)
ASSOCIATED_ARTIFACT_TYPES = (DataAnalysisArtifact,)
def handle_input(
self, data_type: Type[Any]
) -> Union[CheckResult, SuiteResult]:
"""Reads a Deepchecks check or suite result from a serialized JSON file.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks CheckResult or SuiteResult.
Raises:
RuntimeError: if the input data type is not supported.
"""
super().handle_input(data_type)
filepath = os.path.join(self.artifact.uri, RESULTS_FILENAME)
json_res = io_utils.read_file_contents_as_string(filepath)
if data_type == SuiteResult:
res = SuiteResult.from_json(json_res)
elif data_type == CheckResult:
res = CheckResult.from_json(json_res)
else:
raise RuntimeError(f"Unknown data type: {data_type}")
return res
def handle_return(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Creates a JSON serialization for a CheckResult or SuiteResult.
Args:
result: A Deepchecks CheckResult or SuiteResult.
"""
super().handle_return(result)
filepath = os.path.join(self.artifact.uri, RESULTS_FILENAME)
serialized_json = result.to_json(True)
io_utils.write_file_contents_as_string(filepath, serialized_json)
handle_input(self, data_type)
Reads a Deepchecks check or suite result from a serialized JSON file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
A Deepchecks CheckResult or SuiteResult. |
Exceptions:
Type | Description |
---|---|
RuntimeError |
if the input data type is not supported. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def handle_input(
self, data_type: Type[Any]
) -> Union[CheckResult, SuiteResult]:
"""Reads a Deepchecks check or suite result from a serialized JSON file.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks CheckResult or SuiteResult.
Raises:
RuntimeError: if the input data type is not supported.
"""
super().handle_input(data_type)
filepath = os.path.join(self.artifact.uri, RESULTS_FILENAME)
json_res = io_utils.read_file_contents_as_string(filepath)
if data_type == SuiteResult:
res = SuiteResult.from_json(json_res)
elif data_type == CheckResult:
res = CheckResult.from_json(json_res)
else:
raise RuntimeError(f"Unknown data type: {data_type}")
return res
handle_return(self, result)
Creates a JSON serialization for a CheckResult or SuiteResult.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result |
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
A Deepchecks CheckResult or SuiteResult. |
required |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def handle_return(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Creates a JSON serialization for a CheckResult or SuiteResult.
Args:
result: A Deepchecks CheckResult or SuiteResult.
"""
super().handle_return(result)
filepath = os.path.join(self.artifact.uri, RESULTS_FILENAME)
serialized_json = result.to_json(True)
io_utils.write_file_contents_as_string(filepath, serialized_json)
steps
special
Initialization of the Deepchecks Standard Steps.
deepchecks_data_drift
Implementation of the Deepchecks data drift validation step.
DeepchecksDataDriftCheckStep (BaseStep)
Deepchecks data drift validator step.
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStep(BaseStep):
"""Deepchecks data drift validator step."""
def entrypoint( # type: ignore[override]
self,
reference_dataset: pd.DataFrame,
target_dataset: pd.DataFrame,
params: DeepchecksDataDriftCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks data drift validator step.
Args:
reference_dataset: Reference dataset for the data drift check.
target_dataset: Target dataset to be used for the data drift check.
params: The parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.data_validation(
dataset=reference_dataset,
comparison_dataset=target_dataset,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
PARAMETERS_CLASS (BaseParameters)
pydantic-model
Parameter class for the Deepchecks data drift validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataDriftCheck]] |
Optional list of DeepchecksDataDriftCheck identifiers specifying the subset of Deepchecks data drift checks to be performed. If not supplied, the entire set of data drift checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStepParameters(BaseParameters):
"""Parameter class for the Deepchecks data drift validator step.
Attributes:
check_list: Optional list of DeepchecksDataDriftCheck identifiers
specifying the subset of Deepchecks data drift checks to be
performed. If not supplied, the entire set of data drift checks will
be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksDataDriftCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, reference_dataset, target_dataset, params)
Main entrypoint for the Deepchecks data drift validator step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reference_dataset |
DataFrame |
Reference dataset for the data drift check. |
required |
target_dataset |
DataFrame |
Target dataset to be used for the data drift check. |
required |
params |
DeepchecksDataDriftCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
SuiteResult |
A Deepchecks suite result with the validation results. |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
def entrypoint( # type: ignore[override]
self,
reference_dataset: pd.DataFrame,
target_dataset: pd.DataFrame,
params: DeepchecksDataDriftCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks data drift validator step.
Args:
reference_dataset: Reference dataset for the data drift check.
target_dataset: Target dataset to be used for the data drift check.
params: The parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.data_validation(
dataset=reference_dataset,
comparison_dataset=target_dataset,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
DeepchecksDataDriftCheckStepParameters (BaseParameters)
pydantic-model
Parameter class for the Deepchecks data drift validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataDriftCheck]] |
Optional list of DeepchecksDataDriftCheck identifiers specifying the subset of Deepchecks data drift checks to be performed. If not supplied, the entire set of data drift checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStepParameters(BaseParameters):
"""Parameter class for the Deepchecks data drift validator step.
Attributes:
check_list: Optional list of DeepchecksDataDriftCheck identifiers
specifying the subset of Deepchecks data drift checks to be
performed. If not supplied, the entire set of data drift checks will
be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksDataDriftCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_data_drift_check_step(step_name, params)
Shortcut function to create a new instance of the DeepchecksDataDriftCheckStep step.
The returned DeepchecksDataDriftCheckStep can be used in a pipeline to run data drift checks on two input pd.DataFrame and return the results as a Deepchecks SuiteResult object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
DeepchecksDataDriftCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
BaseStep |
a DeepchecksDataDriftCheckStep step instance |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
def deepchecks_data_drift_check_step(
step_name: str,
params: DeepchecksDataDriftCheckStepParameters,
) -> BaseStep:
"""Shortcut function to create a new instance of the DeepchecksDataDriftCheckStep step.
The returned DeepchecksDataDriftCheckStep can be used in a pipeline to
run data drift checks on two input pd.DataFrame and return the results
as a Deepchecks SuiteResult object.
Args:
step_name: The name of the step
params: The parameters for the step
Returns:
a DeepchecksDataDriftCheckStep step instance
"""
return DeepchecksDataDriftCheckStep(name=step_name, params=params)
deepchecks_data_integrity
Implementation of the Deepchecks data integrity validation step.
DeepchecksDataIntegrityCheckStep (BaseStep)
Deepchecks data integrity validator step.
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStep(BaseStep):
"""Deepchecks data integrity validator step."""
def entrypoint( # type: ignore[override]
self,
dataset: pd.DataFrame,
params: DeepchecksDataIntegrityCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks data integrity validator step.
Args:
dataset: a Pandas DataFrame to validate
params: The parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.data_validation(
dataset=dataset,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
PARAMETERS_CLASS (BaseParameters)
pydantic-model
Parameters class for the Deepchecks data integrity validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataIntegrityCheck]] |
Optional list of DeepchecksDataIntegrityCheck identifiers specifying the subset of Deepchecks data integrity checks to be performed. If not supplied, the entire set of data integrity checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks data integrity validator step.
Attributes:
check_list: Optional list of DeepchecksDataIntegrityCheck identifiers
specifying the subset of Deepchecks data integrity checks to be
performed. If not supplied, the entire set of data integrity checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksDataIntegrityCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, dataset, params)
Main entrypoint for the Deepchecks data integrity validator step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
DataFrame |
a Pandas DataFrame to validate |
required |
params |
DeepchecksDataIntegrityCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
SuiteResult |
A Deepchecks suite result with the validation results. |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
def entrypoint( # type: ignore[override]
self,
dataset: pd.DataFrame,
params: DeepchecksDataIntegrityCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks data integrity validator step.
Args:
dataset: a Pandas DataFrame to validate
params: The parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.data_validation(
dataset=dataset,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
DeepchecksDataIntegrityCheckStepParameters (BaseParameters)
pydantic-model
Parameters class for the Deepchecks data integrity validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataIntegrityCheck]] |
Optional list of DeepchecksDataIntegrityCheck identifiers specifying the subset of Deepchecks data integrity checks to be performed. If not supplied, the entire set of data integrity checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks data integrity validator step.
Attributes:
check_list: Optional list of DeepchecksDataIntegrityCheck identifiers
specifying the subset of Deepchecks data integrity checks to be
performed. If not supplied, the entire set of data integrity checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksDataIntegrityCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_data_integrity_check_step(step_name, params)
Shortcut function to create a new instance of the DeepchecksDataIntegrityCheckStep step.
The returned DeepchecksDataIntegrityCheckStep can be used in a pipeline to run data integrity checks on an input pd.DataFrame and return the results as a Deepchecks SuiteResult object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
DeepchecksDataIntegrityCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
BaseStep |
a DeepchecksDataIntegrityCheckStep step instance |
Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
def deepchecks_data_integrity_check_step(
step_name: str,
params: DeepchecksDataIntegrityCheckStepParameters,
) -> BaseStep:
"""Shortcut function to create a new instance of the DeepchecksDataIntegrityCheckStep step.
The returned DeepchecksDataIntegrityCheckStep can be used in a pipeline to
run data integrity checks on an input pd.DataFrame and return the results
as a Deepchecks SuiteResult object.
Args:
step_name: The name of the step
params: The parameters for the step
Returns:
a DeepchecksDataIntegrityCheckStep step instance
"""
return DeepchecksDataIntegrityCheckStep(name=step_name, params=params)
deepchecks_model_drift
Implementation of the Deepchecks model drift validation step.
DeepchecksModelDriftCheckStep (BaseStep)
Deepchecks model drift step.
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStep(BaseStep):
"""Deepchecks model drift step."""
def entrypoint( # type: ignore[override]
self,
reference_dataset: pd.DataFrame,
target_dataset: pd.DataFrame,
model: ClassifierMixin,
params: DeepchecksModelDriftCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks model drift step.
Args:
reference_dataset: Reference dataset for the model drift check.
target_dataset: Target dataset to be used for the model drift check.
model: a scikit-learn model to validate
params: the parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.model_validation(
dataset=reference_dataset,
comparison_dataset=target_dataset,
model=model,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
PARAMETERS_CLASS (BaseParameters)
pydantic-model
Parameters class for the Deepchecks model drift validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelDriftCheck]] |
Optional list of DeepchecksModelDriftCheck identifiers specifying the subset of Deepchecks model drift checks to be performed. If not supplied, the entire set of model drift checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks model drift validator step.
Attributes:
check_list: Optional list of DeepchecksModelDriftCheck identifiers
specifying the subset of Deepchecks model drift checks to be
performed. If not supplied, the entire set of model drift checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksModelDriftCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, reference_dataset, target_dataset, model, params)
Main entrypoint for the Deepchecks model drift step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reference_dataset |
DataFrame |
Reference dataset for the model drift check. |
required |
target_dataset |
DataFrame |
Target dataset to be used for the model drift check. |
required |
model |
ClassifierMixin |
a scikit-learn model to validate |
required |
params |
DeepchecksModelDriftCheckStepParameters |
the parameters for the step |
required |
Returns:
Type | Description |
---|---|
SuiteResult |
A Deepchecks suite result with the validation results. |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
def entrypoint( # type: ignore[override]
self,
reference_dataset: pd.DataFrame,
target_dataset: pd.DataFrame,
model: ClassifierMixin,
params: DeepchecksModelDriftCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks model drift step.
Args:
reference_dataset: Reference dataset for the model drift check.
target_dataset: Target dataset to be used for the model drift check.
model: a scikit-learn model to validate
params: the parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.model_validation(
dataset=reference_dataset,
comparison_dataset=target_dataset,
model=model,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
DeepchecksModelDriftCheckStepParameters (BaseParameters)
pydantic-model
Parameters class for the Deepchecks model drift validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelDriftCheck]] |
Optional list of DeepchecksModelDriftCheck identifiers specifying the subset of Deepchecks model drift checks to be performed. If not supplied, the entire set of model drift checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks model drift validator step.
Attributes:
check_list: Optional list of DeepchecksModelDriftCheck identifiers
specifying the subset of Deepchecks model drift checks to be
performed. If not supplied, the entire set of model drift checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksModelDriftCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_model_drift_check_step(step_name, params)
Shortcut function to create a new instance of the DeepchecksModelDriftCheckStep step.
The returned DeepchecksModelDriftCheckStep can be used in a pipeline to run model drift checks on two input pd.DataFrame datasets and an input scikit-learn ClassifierMixin model and return the results as a Deepchecks SuiteResult object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
DeepchecksModelDriftCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
BaseStep |
a DeepchecksModelDriftCheckStep step instance |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
def deepchecks_model_drift_check_step(
step_name: str,
params: DeepchecksModelDriftCheckStepParameters,
) -> BaseStep:
"""Shortcut function to create a new instance of the DeepchecksModelDriftCheckStep step.
The returned DeepchecksModelDriftCheckStep can be used in a pipeline to
run model drift checks on two input pd.DataFrame datasets and an input
scikit-learn ClassifierMixin model and return the results as a Deepchecks
SuiteResult object.
Args:
step_name: The name of the step
params: The parameters for the step
Returns:
a DeepchecksModelDriftCheckStep step instance
"""
return DeepchecksModelDriftCheckStep(name=step_name, params=params)
deepchecks_model_validation
Implementation of the Deepchecks model validation validation step.
DeepchecksModelValidationCheckStep (BaseStep)
Deepchecks model validation step.
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStep(BaseStep):
"""Deepchecks model validation step."""
def entrypoint( # type: ignore[override]
self,
dataset: pd.DataFrame,
model: ClassifierMixin,
params: DeepchecksModelValidationCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks model validation step.
Args:
dataset: a Pandas DataFrame to use for the validation
model: a scikit-learn model to validate
params: the parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.model_validation(
dataset=dataset,
model=model,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
PARAMETERS_CLASS (BaseParameters)
pydantic-model
Parameters class for the Deepchecks model validation validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelValidationCheck]] |
Optional list of DeepchecksModelValidationCheck identifiers specifying the subset of Deepchecks model validation checks to be performed. If not supplied, the entire set of model validation checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks model validation validator step.
Attributes:
check_list: Optional list of DeepchecksModelValidationCheck identifiers
specifying the subset of Deepchecks model validation checks to be
performed. If not supplied, the entire set of model validation checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksModelValidationCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, dataset, model, params)
Main entrypoint for the Deepchecks model validation step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
DataFrame |
a Pandas DataFrame to use for the validation |
required |
model |
ClassifierMixin |
a scikit-learn model to validate |
required |
params |
DeepchecksModelValidationCheckStepParameters |
the parameters for the step |
required |
Returns:
Type | Description |
---|---|
SuiteResult |
A Deepchecks suite result with the validation results. |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
def entrypoint( # type: ignore[override]
self,
dataset: pd.DataFrame,
model: ClassifierMixin,
params: DeepchecksModelValidationCheckStepParameters,
) -> SuiteResult:
"""Main entrypoint for the Deepchecks model validation step.
Args:
dataset: a Pandas DataFrame to use for the validation
model: a scikit-learn model to validate
params: the parameters for the step
Returns:
A Deepchecks suite result with the validation results.
"""
data_validator = cast(
DeepchecksDataValidator,
DeepchecksDataValidator.get_active_data_validator(),
)
return data_validator.model_validation(
dataset=dataset,
model=model,
check_list=cast(Optional[Sequence[str]], params.check_list),
dataset_kwargs=params.dataset_kwargs,
check_kwargs=params.check_kwargs,
run_kwargs=params.run_kwargs,
)
DeepchecksModelValidationCheckStepParameters (BaseParameters)
pydantic-model
Parameters class for the Deepchecks model validation validator step.
Attributes:
Name | Type | Description |
---|---|---|
check_list |
Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelValidationCheck]] |
Optional list of DeepchecksModelValidationCheck identifiers specifying the subset of Deepchecks model validation checks to be performed. If not supplied, the entire set of model validation checks will be performed. |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStepParameters(BaseParameters):
"""Parameters class for the Deepchecks model validation validator step.
Attributes:
check_list: Optional list of DeepchecksModelValidationCheck identifiers
specifying the subset of Deepchecks model validation checks to be
performed. If not supplied, the entire set of model validation checks
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
"""
check_list: Optional[Sequence[DeepchecksModelValidationCheck]] = None
dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_model_validation_check_step(step_name, params)
Shortcut function to create a new instance of the DeepchecksModelValidationCheckStep step.
The returned DeepchecksModelValidationCheckStep can be used in a pipeline to run model validation checks on an input pd.DataFrame dataset and an input scikit-learn ClassifierMixin model and return the results as a Deepchecks SuiteResult object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
DeepchecksModelValidationCheckStepParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
BaseStep |
a DeepchecksModelValidationCheckStep step instance |
Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
def deepchecks_model_validation_check_step(
step_name: str,
params: DeepchecksModelValidationCheckStepParameters,
) -> BaseStep:
"""Shortcut function to create a new instance of the DeepchecksModelValidationCheckStep step.
The returned DeepchecksModelValidationCheckStep can be used in a pipeline to
run model validation checks on an input pd.DataFrame dataset and an input
scikit-learn ClassifierMixin model and return the results as a Deepchecks
SuiteResult object.
Args:
step_name: The name of the step
params: The parameters for the step
Returns:
a DeepchecksModelValidationCheckStep step instance
"""
return DeepchecksModelValidationCheckStep(name=step_name, params=params)
validation_checks
Definition of the Deepchecks validation check types.
DeepchecksDataDriftCheck (DeepchecksValidationCheck)
Categories of Deepchecks data drift checks.
This list reflects the set of train-test validation checks provided by Deepchecks:
All these checks inherit from deepchecks.tabular.TrainTestCheck
or
deepchecks.vision.TrainTestCheck
and require two datasets as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataDriftCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks data drift checks.
This list reflects the set of train-test validation checks provided by
Deepchecks:
* [for tabular data](https://docs.deepchecks.com/stable/checks_gallery/tabular.html#train-test-validation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#train-test-validation)
All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
`deepchecks.vision.TrainTestCheck` and require two datasets as input.
"""
TABULAR_CATEGORY_MISMATCH_TRAIN_TEST = resolve_class(
tabular_checks.CategoryMismatchTrainTest
)
TABULAR_DATASET_SIZE_COMPARISON = resolve_class(
tabular_checks.DatasetsSizeComparison
)
TABULAR_DATE_TRAIN_TEST_LEAKAGE_DUPLICATES = resolve_class(
tabular_checks.DateTrainTestLeakageDuplicates
)
TABULAR_DATE_TRAIN_TEST_LEAKAGE_OVERLAP = resolve_class(
tabular_checks.DateTrainTestLeakageOverlap
)
TABULAR_DOMINANT_FREQUENCY_CHANGE = resolve_class(
tabular_checks.DominantFrequencyChange
)
TABULAR_FEATURE_LABEL_CORRELATION_CHANGE = resolve_class(
tabular_checks.FeatureLabelCorrelationChange
)
TABULAR_INDEX_LEAKAGE = resolve_class(tabular_checks.IndexTrainTestLeakage)
TABULAR_NEW_LABEL_TRAIN_TEST = resolve_class(
tabular_checks.NewLabelTrainTest
)
TABULAR_STRING_MISMATCH_COMPARISON = resolve_class(
tabular_checks.StringMismatchComparison
)
TABULAR_TRAIN_TEST_FEATURE_DRIFT = resolve_class(
tabular_checks.TrainTestFeatureDrift
)
TABULAR_TRAIN_TEST_LABEL_DRIFT = resolve_class(
tabular_checks.TrainTestLabelDrift
)
TABULAR_TRAIN_TEST_SAMPLES_MIX = resolve_class(
tabular_checks.TrainTestSamplesMix
)
TABULAR_WHOLE_DATASET_DRIFT = resolve_class(
tabular_checks.WholeDatasetDrift
)
VISION_FEATURE_LABEL_CORRELATION_CHANGE = resolve_class(
vision_checks.FeatureLabelCorrelationChange
)
VISION_HEATMAP_COMPARISON = resolve_class(vision_checks.HeatmapComparison)
VISION_IMAGE_DATASET_DRIFT = resolve_class(vision_checks.ImageDatasetDrift)
VISION_IMAGE_PROPERTY_DRIFT = resolve_class(
vision_checks.ImagePropertyDrift
)
VISION_NEW_LABELS = resolve_class(vision_checks.NewLabels)
VISION_SIMILAR_IMAGE_LEAKAGE = resolve_class(
vision_checks.SimilarImageLeakage
)
VISION_TRAIN_TEST_LABEL_DRIFT = resolve_class(
vision_checks.TrainTestLabelDrift
)
DeepchecksDataIntegrityCheck (DeepchecksValidationCheck)
Categories of Deepchecks data integrity checks.
This list reflects the set of data integrity checks provided by Deepchecks:
All these checks inherit from deepchecks.tabular.SingleDatasetCheck
or
deepchecks.vision.SingleDatasetCheck
and require a single dataset as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataIntegrityCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks data integrity checks.
This list reflects the set of data integrity checks provided by Deepchecks:
* [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#data-integrity)
* [for computer vision](https://docs.deepchecks.com/en/stable/checks_gallery/vision.html#data-integrity)
All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
`deepchecks.vision.SingleDatasetCheck` and require a single dataset as input.
"""
TABULAR_COLUMNS_INFO = resolve_class(tabular_checks.ColumnsInfo)
TABULAR_CONFLICTING_LABELS = resolve_class(tabular_checks.ConflictingLabels)
TABULAR_DATA_DUPLICATES = resolve_class(tabular_checks.DataDuplicates)
TABULAR_FEATURE_FEATURE_CORRELATION = resolve_class(
FeatureFeatureCorrelation
)
TABULAR_FEATURE_LABEL_CORRELATION = resolve_class(
tabular_checks.FeatureLabelCorrelation
)
TABULAR_IDENTIFIER_LEAKAGE = resolve_class(tabular_checks.IdentifierLeakage)
TABULAR_IS_SINGLE_VALUE = resolve_class(tabular_checks.IsSingleValue)
TABULAR_MIXED_DATA_TYPES = resolve_class(tabular_checks.MixedDataTypes)
TABULAR_MIXED_NULLS = resolve_class(tabular_checks.MixedNulls)
TABULAR_OUTLIER_SAMPLE_DETECTION = resolve_class(
tabular_checks.OutlierSampleDetection
)
TABULAR_SPECIAL_CHARS = resolve_class(tabular_checks.SpecialCharacters)
TABULAR_STRING_LENGTH_OUT_OF_BOUNDS = resolve_class(
tabular_checks.StringLengthOutOfBounds
)
TABULAR_STRING_MISMATCH = resolve_class(tabular_checks.StringMismatch)
VISION_IMAGE_PROPERTY_OUTLIERS = resolve_class(
vision_checks.ImagePropertyOutliers
)
VISION_LABEL_PROPERTY_OUTLIERS = resolve_class(
vision_checks.LabelPropertyOutliers
)
DeepchecksModelDriftCheck (DeepchecksValidationCheck)
Categories of Deepchecks model drift checks.
This list includes a subset of the model evaluation checks provided by Deepchecks that require two datasets and a mandatory model as input:
All these checks inherit from deepchecks.tabular.TrainTestCheck
or
deepchecks.vision.TrainTestCheck
and require two datasets and a mandatory
model as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelDriftCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks model drift checks.
This list includes a subset of the model evaluation checks provided by
Deepchecks that require two datasets and a mandatory model as input:
* [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)
All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
`deepchecks.vision.TrainTestCheck` and require two datasets and a mandatory
model as input.
"""
TABULAR_BOOSTING_OVERFIT = resolve_class(tabular_checks.BoostingOverfit)
TABULAR_MODEL_ERROR_ANALYSIS = resolve_class(
tabular_checks.ModelErrorAnalysis
)
TABULAR_PERFORMANCE_REPORT = resolve_class(tabular_checks.PerformanceReport)
TABULAR_SIMPLE_MODEL_COMPARISON = resolve_class(
tabular_checks.SimpleModelComparison
)
TABULAR_TRAIN_TEST_PREDICTION_DRIFT = resolve_class(
tabular_checks.TrainTestPredictionDrift
)
TABULAR_UNUSED_FEATURES = resolve_class(tabular_checks.UnusedFeatures)
VISION_CLASS_PERFORMANCE = resolve_class(vision_checks.ClassPerformance)
VISION_MODEL_ERROR_ANALYSIS = resolve_class(
vision_checks.ModelErrorAnalysis
)
VISION_SIMPLE_MODEL_COMPARISON = resolve_class(
vision_checks.SimpleModelComparison
)
VISION_TRAIN_TEST_PREDICTION_DRIFT = resolve_class(
vision_checks.TrainTestPredictionDrift
)
DeepchecksModelValidationCheck (DeepchecksValidationCheck)
Categories of Deepchecks model validation checks.
This list includes a subset of the model evaluation checks provided by Deepchecks that require a single dataset and a mandatory model as input:
All these checks inherit from deepchecks.tabular.SingleDatasetCheck
or
`deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
model as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelValidationCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks model validation checks.
This list includes a subset of the model evaluation checks provided by
Deepchecks that require a single dataset and a mandatory model as input:
* [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)
All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
`deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
model as input.
"""
TABULAR_CALIBRATION_SCORE = resolve_class(tabular_checks.CalibrationScore)
TABULAR_CONFUSION_MATRIX_REPORT = resolve_class(
tabular_checks.ConfusionMatrixReport
)
TABULAR_MODEL_INFERENCE_TIME = resolve_class(
tabular_checks.ModelInferenceTime
)
TABULAR_REGRESSION_ERROR_DISTRIBUTION = resolve_class(
tabular_checks.RegressionErrorDistribution
)
TABULAR_REGRESSION_SYSTEMATIC_ERROR = resolve_class(
tabular_checks.RegressionSystematicError
)
TABULAR_ROC_REPORT = resolve_class(tabular_checks.RocReport)
TABULAR_SEGMENT_PERFORMANCE = resolve_class(
tabular_checks.SegmentPerformance
)
VISION_CONFUSION_MATRIX_REPORT = resolve_class(
vision_checks.ConfusionMatrixReport
)
VISION_IMAGE_SEGMENT_PERFORMANCE = resolve_class(
vision_checks.ImageSegmentPerformance
)
VISION_MEAN_AVERAGE_PRECISION_REPORT = resolve_class(
vision_checks.MeanAveragePrecisionReport
)
VISION_MEAN_AVERAGE_RECALL_REPORT = resolve_class(
vision_checks.MeanAverageRecallReport
)
VISION_ROBUSTNESS_REPORT = resolve_class(vision_checks.RobustnessReport)
VISION_SINGLE_DATASET_SCALAR_PERFORMANCE = resolve_class(
vision_checks.SingleDatasetScalarPerformance
)
DeepchecksValidationCheck (StrEnum)
Base class for all Deepchecks categories of validation checks.
This base class defines some conventions used for all enum values used to identify the various validation checks that can be performed with Deepchecks:
- enum values represent fully formed class paths pointing to Deepchecks BaseCheck subclasses
- all tabular data checks are located under the
deepchecks.tabular.checks
module sub-tree - all computer vision data checks are located under the
deepchecks.vision.checks
module sub-tree
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksValidationCheck(StrEnum):
"""Base class for all Deepchecks categories of validation checks.
This base class defines some conventions used for all enum values used to
identify the various validation checks that can be performed with
Deepchecks:
* enum values represent fully formed class paths pointing to Deepchecks
BaseCheck subclasses
* all tabular data checks are located under the
`deepchecks.tabular.checks` module sub-tree
* all computer vision data checks are located under the
`deepchecks.vision.checks` module sub-tree
"""
@classmethod
def validate_check_name(cls, check_name: str) -> None:
"""Validate a Deepchecks check identifier.
Args:
check_name: Identifies a builtin Deepchecks check. The identifier
must be formatted as `deepchecks.{tabular|vision}.checks.<...>.<class-name>`.
Raises:
ValueError: If the check identifier does not follow the convention
used by ZenML to identify Deepchecks builtin checks.
"""
if not re.match(
r"^deepchecks\.(tabular|vision)\.checks\.",
check_name,
):
raise ValueError(
f"The supplied Deepcheck check identifier does not follow the "
f"convention used by ZenML: `{check_name}`. The identifier "
f"must be formatted as `deepchecks.<tabular|vision>.checks...` "
f"and must be resolvable to a valid Deepchecks BaseCheck "
f"subclass."
)
@classmethod
def is_tabular_check(cls, check_name: str) -> bool:
"""Check if a validation check is applicable to tabular data.
Args:
check_name: Identifies a builtin Deepchecks check.
Returns:
True if the check is applicable to tabular data, otherwise False.
"""
cls.validate_check_name(check_name)
return check_name.startswith("deepchecks.tabular.")
@classmethod
def is_vision_check(cls, check_name: str) -> bool:
"""Check if a validation check is applicable to computer vision data.
Args:
check_name: Identifies a builtin Deepchecks check.
Returns:
True if the check is applicable to compute vision data, otherwise
False.
"""
cls.validate_check_name(check_name)
return check_name.startswith("deepchecks.vision.")
@classmethod
def get_check_class(cls, check_name: str) -> Type[BaseCheck]:
"""Get the Deepchecks check class associated with an enum value or a custom check name.
Args:
check_name: Identifies a builtin Deepchecks check. The identifier
must be formatted as `deepchecks.{tabular|vision}.checks.<class-name>`
and must be resolvable to a valid Deepchecks BaseCheck class.
Returns:
The Deepchecks check class associated with this enum value.
Raises:
ValueError: If the check name could not be converted to a valid
Deepchecks check class. This can happen for example if the enum
values fall out of sync with the Deepchecks code base or if a
custom check name is supplied that cannot be resolved to a valid
Deepchecks BaseCheck class.
"""
cls.validate_check_name(check_name)
try:
check_class = import_class_by_path(check_name)
except AttributeError:
raise ValueError(
f"Could not map the `{check_name}` check identifier to a valid "
f"Deepchecks check class."
)
if not issubclass(check_class, BaseCheck):
raise ValueError(
f"The `{check_name}` check identifier is mapped to an invalid "
f"data type. Expected a {str(BaseCheck)} subclass, but instead "
f"got: {str(check_class)}."
)
if check_name not in cls.values():
logger.warning(
f"You are using a custom Deepchecks check identifier that is "
f"not listed in the `{str(cls)}` enum type. This could lead "
f"to unexpected behavior."
)
return check_class
@property
def check_class(self) -> Type[BaseCheck]:
"""Convert the enum value to a valid Deepchecks check class.
Returns:
The Deepchecks check class associated with the enum value.
"""
return self.get_check_class(self.value)
visualizers
special
Deepchecks visualizer.
deepchecks_visualizer
Implementation of the Deepchecks visualizer.
DeepchecksVisualizer (BaseVisualizer)
The implementation of a Deepchecks Visualizer.
Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
class DeepchecksVisualizer(BaseVisualizer):
"""The implementation of a Deepchecks Visualizer."""
@abstractmethod
def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
"""Method to visualize components.
Args:
object: StepView fetched from run.get_step().
*args: Additional arguments (unused).
**kwargs: Additional keyword arguments (unused).
"""
for artifact_view in object.outputs.values():
# filter out anything but data analysis artifacts
if artifact_view.type == DataAnalysisArtifact.__name__:
artifact = artifact_view.read()
self.generate_report(artifact)
def generate_report(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Generate a Deepchecks Report.
Args:
result: A SuiteResult.
"""
print(result)
if Environment.in_notebook():
result.show()
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
result.save_as_html(f)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
generate_report(self, result)
Generate a Deepchecks Report.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result |
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
A SuiteResult. |
required |
Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
def generate_report(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Generate a Deepchecks Report.
Args:
result: A SuiteResult.
"""
print(result)
if Environment.in_notebook():
result.show()
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
result.save_as_html(f)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
visualize(self, object, *args, **kwargs)
Method to visualize components.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
object |
StepView |
StepView fetched from run.get_step(). |
required |
*args |
Any |
Additional arguments (unused). |
() |
**kwargs |
Any |
Additional keyword arguments (unused). |
{} |
Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
@abstractmethod
def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
"""Method to visualize components.
Args:
object: StepView fetched from run.get_step().
*args: Additional arguments (unused).
**kwargs: Additional keyword arguments (unused).
"""
for artifact_view in object.outputs.values():
# filter out anything but data analysis artifacts
if artifact_view.type == DataAnalysisArtifact.__name__:
artifact = artifact_view.read()
self.generate_report(artifact)