Skip to content

Whylogs

zenml.integrations.whylogs special

Initialization of the whylogs integration.

WhylogsIntegration (Integration)

Definition of whylogs integration for ZenML.

Source code in zenml/integrations/whylogs/__init__.py
class WhylogsIntegration(Integration):
    """Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""

    NAME = WHYLOGS
    REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.whylogs import materializers  # noqa
        from zenml.integrations.whylogs import secret_schemas  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Great Expectations integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.whylogs.flavors import (
            WhylogsDataValidatorFlavor,
        )

        return [WhylogsDataValidatorFlavor]

activate() classmethod

Activates the integration.

Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.whylogs import materializers  # noqa
    from zenml.integrations.whylogs import secret_schemas  # noqa

flavors() classmethod

Declare the stack component flavors for the Great Expectations integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Great Expectations integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.whylogs.flavors import (
        WhylogsDataValidatorFlavor,
    )

    return [WhylogsDataValidatorFlavor]

constants

Whylogs integration constants.

data_validators special

Initialization of the whylogs data validator for ZenML.

whylogs_data_validator

Implementation of the whylogs data validator.

WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)

Whylogs data validator stack component.

Attributes:

Name Type Description
authentication_secret

Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
    """Whylogs data validator stack component.

    Attributes:
        authentication_secret: Optional ZenML secret with Whylabs credentials.
            If configured, all the data profiles returned by all pipeline steps
            will automatically be uploaded to Whylabs in addition to being
            stored in the ZenML Artifact Store.
    """

    NAME: ClassVar[str] = "whylogs"
    FLAVOR: ClassVar[
        Type[BaseDataValidatorFlavor]
    ] = WhylogsDataValidatorFlavor

    @property
    def config(self) -> WhylogsDataValidatorConfig:
        """Returns the `WhylogsDataValidatorConfig` config.

        Returns:
            The configuration.
        """
        return cast(WhylogsDataValidatorConfig, self._config)

    @property
    def settings_class(self) -> Optional[Type["BaseSettings"]]:
        """Settings class for the Whylogs data validator.

        Returns:
            The settings class.
        """
        return WhylogsDataValidatorSettings

    def data_profiling(
        self,
        dataset: pd.DataFrame,
        comparison_dataset: Optional[pd.DataFrame] = None,
        profile_list: Optional[Sequence[str]] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        **kwargs: Any,
    ) -> DatasetProfileView:
        """Analyze a dataset and generate a data profile with whylogs.

        Args:
            dataset: Target dataset to be profiled.
            comparison_dataset: Optional dataset to be used for data profiles
                that require a baseline for comparison (e.g data drift profiles).
            profile_list: Optional list identifying the categories of whylogs
                data profiles to be generated (unused).
            dataset_timestamp: timestamp to associate with the generated
                dataset profile (Optional). The current time is used if not
                supplied.
            **kwargs: Extra keyword arguments (unused).

        Returns:
            A whylogs profile view object.
        """
        results = why.log(pandas=dataset)
        profile = results.profile()
        dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
        profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
        return profile.view()

    def upload_profile_view(
        self,
        profile_view: DatasetProfileView,
        dataset_id: Optional[str] = None,
    ) -> None:
        """Upload a whylogs data profile view to Whylabs, if configured to do so.

        Args:
            profile_view: Whylogs profile view to upload.
            dataset_id: Optional dataset identifier to use for the uploaded
                data profile. If omitted, a dataset identifier will be retrieved
                using other means, in order:
                    * the default dataset identifier configured in the Data
                    Validator secret
                    * a dataset ID will be generated automatically based on the
                    current pipeline/step information.

        Raises:
            ValueError: If the dataset ID was not provided and could not be
                retrieved or inferred from other sources.
        """
        secret = self.get_authentication_secret(
            expected_schema_type=WhylabsSecretSchema
        )
        if not secret:
            return

        dataset_id = dataset_id or secret.whylabs_default_dataset_id

        if not dataset_id:
            # use the current pipeline name and the step name to generate a
            # unique dataset name
            try:
                # get pipeline name and step name
                step_env = cast(
                    StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
                )
                dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
            except KeyError:
                raise ValueError(
                    "A dataset ID was not specified and could not be "
                    "generated from the current pipeline and step name."
                )

        # Instantiate WhyLabs Writer
        writer = WhyLabsWriter(
            org_id=secret.whylabs_default_org_id,
            api_key=secret.whylabs_api_key,
            dataset_id=dataset_id,
        )

        # pass a profile view to the writer's write method
        writer.write(profile=profile_view)

        logger.info(
            f"Uploaded data profile for dataset {dataset_id} to Whylabs."
        )
config: WhylogsDataValidatorConfig property readonly

Returns the WhylogsDataValidatorConfig config.

Returns:

Type Description
WhylogsDataValidatorConfig

The configuration.

settings_class: Optional[Type[BaseSettings]] property readonly

Settings class for the Whylogs data validator.

Returns:

Type Description
Optional[Type[BaseSettings]]

The settings class.

FLAVOR (BaseDataValidatorFlavor)

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]

The config class.

docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[WhylogsDataValidator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)

Analyze a dataset and generate a data profile with whylogs.

Parameters:

Name Type Description Default
dataset DataFrame

Target dataset to be profiled.

required
comparison_dataset Optional[pandas.core.frame.DataFrame]

Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles).

None
profile_list Optional[Sequence[str]]

Optional list identifying the categories of whylogs data profiles to be generated (unused).

None
dataset_timestamp Optional[datetime.datetime]

timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

None
**kwargs Any

Extra keyword arguments (unused).

{}

Returns:

Type Description
DatasetProfileView

A whylogs profile view object.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def data_profiling(
    self,
    dataset: pd.DataFrame,
    comparison_dataset: Optional[pd.DataFrame] = None,
    profile_list: Optional[Sequence[str]] = None,
    dataset_timestamp: Optional[datetime.datetime] = None,
    **kwargs: Any,
) -> DatasetProfileView:
    """Analyze a dataset and generate a data profile with whylogs.

    Args:
        dataset: Target dataset to be profiled.
        comparison_dataset: Optional dataset to be used for data profiles
            that require a baseline for comparison (e.g data drift profiles).
        profile_list: Optional list identifying the categories of whylogs
            data profiles to be generated (unused).
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
        **kwargs: Extra keyword arguments (unused).

    Returns:
        A whylogs profile view object.
    """
    results = why.log(pandas=dataset)
    profile = results.profile()
    dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
    profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
    return profile.view()
upload_profile_view(self, profile_view, dataset_id=None)

Upload a whylogs data profile view to Whylabs, if configured to do so.

Parameters:

Name Type Description Default
profile_view DatasetProfileView

Whylogs profile view to upload.

required
dataset_id Optional[str]

Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information.

None

Exceptions:

Type Description
ValueError

If the dataset ID was not provided and could not be retrieved or inferred from other sources.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def upload_profile_view(
    self,
    profile_view: DatasetProfileView,
    dataset_id: Optional[str] = None,
) -> None:
    """Upload a whylogs data profile view to Whylabs, if configured to do so.

    Args:
        profile_view: Whylogs profile view to upload.
        dataset_id: Optional dataset identifier to use for the uploaded
            data profile. If omitted, a dataset identifier will be retrieved
            using other means, in order:
                * the default dataset identifier configured in the Data
                Validator secret
                * a dataset ID will be generated automatically based on the
                current pipeline/step information.

    Raises:
        ValueError: If the dataset ID was not provided and could not be
            retrieved or inferred from other sources.
    """
    secret = self.get_authentication_secret(
        expected_schema_type=WhylabsSecretSchema
    )
    if not secret:
        return

    dataset_id = dataset_id or secret.whylabs_default_dataset_id

    if not dataset_id:
        # use the current pipeline name and the step name to generate a
        # unique dataset name
        try:
            # get pipeline name and step name
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
            dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
        except KeyError:
            raise ValueError(
                "A dataset ID was not specified and could not be "
                "generated from the current pipeline and step name."
            )

    # Instantiate WhyLabs Writer
    writer = WhyLabsWriter(
        org_id=secret.whylabs_default_org_id,
        api_key=secret.whylabs_api_key,
        dataset_id=dataset_id,
    )

    # pass a profile view to the writer's write method
    writer.write(profile=profile_view)

    logger.info(
        f"Uploaded data profile for dataset {dataset_id} to Whylabs."
    )

flavors special

WhyLabs whylogs integration flavors.

whylogs_data_validator_flavor

WhyLabs whylogs data validator flavor.

WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings) pydantic-model

Config for the whylogs data validator.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorConfig(  # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
    BaseDataValidatorConfig,
    AuthenticationConfigMixin,
    WhylogsDataValidatorSettings,
):
    """Config for the whylogs data validator."""
WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]

The config class.

docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[WhylogsDataValidator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

WhylogsDataValidatorSettings (BaseSettings) pydantic-model

Settings for the Whylogs data validator.

Attributes:

Name Type Description
enable_whylabs bool

If set to True for a step, all the whylogs data profile views returned by the step will automatically be uploaded to the Whylabs platform if Whylabs credentials are configured.

dataset_id Optional[str]

Dataset ID to use when uploading profiles to Whylabs.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorSettings(BaseSettings):
    """Settings for the Whylogs data validator.

    Attributes:
        enable_whylabs: If set to `True` for a step, all the whylogs data
            profile views returned by the step will automatically be uploaded
            to the Whylabs platform if Whylabs credentials are configured.
        dataset_id: Dataset ID to use when uploading profiles to Whylabs.
    """

    enable_whylabs: bool = False
    dataset_id: Optional[str] = None

materializers special

Initialization of the whylogs materializer.

whylogs_materializer

Implementation of the whylogs materializer.

WhylogsMaterializer (BaseMaterializer)

Materializer to read/write whylogs dataset profile views.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
class WhylogsMaterializer(BaseMaterializer):
    """Materializer to read/write whylogs dataset profile views."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (DatasetProfileView,)
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[
        ArtifactType
    ] = ArtifactType.DATA_ANALYSIS

    def load(self, data_type: Type[Any]) -> DatasetProfileView:
        """Reads and returns a whylogs dataset profile view.

        Args:
            data_type: The type of the data to read.

        Returns:
            A loaded whylogs dataset profile view object.
        """
        filepath = os.path.join(self.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        profile_view = DatasetProfileView.read(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)

        return profile_view

    def save(self, profile_view: DatasetProfileView) -> None:
        """Writes a whylogs dataset profile view.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        filepath = os.path.join(self.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        profile_view.write(temp_file)

        # Copy it into artifact store
        fileio.copy(temp_file, filepath)
        fileio.rmtree(temp_dir)

        try:
            self._upload_to_whylabs(profile_view)
        except Exception as e:
            logger.error(
                "Failed to upload whylogs profile view to Whylabs: %s", e
            )

    def save_visualizations(
        self,
        profile_view: DatasetProfileView,
    ) -> Dict[str, VisualizationType]:
        """Saves visualizations for the given whylogs dataset profile view.

        Args:
            profile_view: The whylogs dataset profile view to visualize.

        Returns:
            A dictionary of visualization URIs and their types.
        """
        # currently, whylogs doesn't support visualizing a single profile, so
        # we trick it by using the same profile twice, both as reference and
        # target, in a drift report
        visualization = NotebookProfileVisualizer()
        visualization.set_profiles(
            target_profile_view=profile_view,
            reference_profile_view=profile_view,
        )
        rendered_html = visualization.summary_drift_report()
        filepath = os.path.join(self.uri, HTML_FILENAME)
        with fileio.open(filepath, "w") as f:
            f.write(rendered_html.data)
        return {filepath: VisualizationType.HTML}

    def _upload_to_whylabs(self, profile_view: DatasetProfileView) -> None:
        """Uploads a whylogs dataset profile view to Whylabs.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        from zenml.environment import Environment
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )
        from zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor import (
            WhylogsDataValidatorSettings,
        )
        from zenml.steps import STEP_ENVIRONMENT_NAME, StepEnvironment

        try:
            data_validator = WhylogsDataValidator.get_active_data_validator()
        except TypeError:
            # no whylogs data validator is active
            return

        if not isinstance(data_validator, WhylogsDataValidator):
            # the active data validator is not a whylogs data validator
            return

        try:
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
        except KeyError:
            # we are not in a step environment
            return

        run_info = step_env.step_run_info
        settings = cast(
            WhylogsDataValidatorSettings, data_validator.get_settings(run_info)
        )

        if not settings.enable_whylabs:
            # whylabs is not enabled in the data validator
            return
        data_validator.upload_profile_view(
            profile_view, dataset_id=settings.dataset_id
        )
load(self, data_type)

Reads and returns a whylogs dataset profile view.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
DatasetProfileView

A loaded whylogs dataset profile view object.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def load(self, data_type: Type[Any]) -> DatasetProfileView:
    """Reads and returns a whylogs dataset profile view.

    Args:
        data_type: The type of the data to read.

    Returns:
        A loaded whylogs dataset profile view object.
    """
    filepath = os.path.join(self.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    profile_view = DatasetProfileView.read(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)

    return profile_view
save(self, profile_view)

Writes a whylogs dataset profile view.

Parameters:

Name Type Description Default
profile_view DatasetProfileView

A whylogs dataset profile view object.

required
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def save(self, profile_view: DatasetProfileView) -> None:
    """Writes a whylogs dataset profile view.

    Args:
        profile_view: A whylogs dataset profile view object.
    """
    filepath = os.path.join(self.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    profile_view.write(temp_file)

    # Copy it into artifact store
    fileio.copy(temp_file, filepath)
    fileio.rmtree(temp_dir)

    try:
        self._upload_to_whylabs(profile_view)
    except Exception as e:
        logger.error(
            "Failed to upload whylogs profile view to Whylabs: %s", e
        )
save_visualizations(self, profile_view)

Saves visualizations for the given whylogs dataset profile view.

Parameters:

Name Type Description Default
profile_view DatasetProfileView

The whylogs dataset profile view to visualize.

required

Returns:

Type Description
Dict[str, zenml.enums.VisualizationType]

A dictionary of visualization URIs and their types.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def save_visualizations(
    self,
    profile_view: DatasetProfileView,
) -> Dict[str, VisualizationType]:
    """Saves visualizations for the given whylogs dataset profile view.

    Args:
        profile_view: The whylogs dataset profile view to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    # currently, whylogs doesn't support visualizing a single profile, so
    # we trick it by using the same profile twice, both as reference and
    # target, in a drift report
    visualization = NotebookProfileVisualizer()
    visualization.set_profiles(
        target_profile_view=profile_view,
        reference_profile_view=profile_view,
    )
    rendered_html = visualization.summary_drift_report()
    filepath = os.path.join(self.uri, HTML_FILENAME)
    with fileio.open(filepath, "w") as f:
        f.write(rendered_html.data)
    return {filepath: VisualizationType.HTML}

secret_schemas special

Initialization for the Whylabs secret schema.

This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.

whylabs_secret_schema

Implementation for Seldon secret schemas.

WhylabsSecretSchema (BaseSecretSchema) pydantic-model

Whylabs credentials.

Attributes:

Name Type Description
whylabs_default_org_id str

the Whylabs organization ID.

whylabs_api_key str

Whylabs API key.

whylabs_default_dataset_id Optional[str]

default Whylabs dataset ID to use when logging data profiles.

Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py
class WhylabsSecretSchema(BaseSecretSchema):
    """Whylabs credentials.

    Attributes:
        whylabs_default_org_id: the Whylabs organization ID.
        whylabs_api_key: Whylabs API key.
        whylabs_default_dataset_id: default Whylabs dataset ID to use when
            logging data profiles.
    """

    TYPE: ClassVar[str] = WHYLABS_SECRET_SCHEMA_TYPE

    whylabs_default_org_id: str
    whylabs_api_key: str
    whylabs_default_dataset_id: Optional[str] = None

steps special

Initialization of the whylogs steps.

whylogs_profiler

Implementation of the whylogs profiler step.

get_whylogs_profiler_step(dataset_timestamp=None, dataset_id=None, enable_whylabs=True)

Shortcut function to create a new instance of the WhylogsProfilerStep step.

The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.

Parameters:

Name Type Description Default
dataset_timestamp Optional[datetime.datetime]

The timestamp of the dataset.

None
dataset_id Optional[str]

Optional dataset ID to use to upload the profile to Whylabs.

None
enable_whylabs bool

Whether to upload the generated profile to Whylabs.

True

Returns:

Type Description
BaseStep

a WhylogsProfilerStep step instance

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
def get_whylogs_profiler_step(
    dataset_timestamp: Optional[datetime.datetime] = None,
    dataset_id: Optional[str] = None,
    enable_whylabs: bool = True,
) -> BaseStep:
    """Shortcut function to create a new instance of the WhylogsProfilerStep step.

    The returned WhylogsProfilerStep can be used in a pipeline to generate a
    whylogs DatasetProfileView from a given pd.DataFrame and save it as an
    artifact.

    Args:
        dataset_timestamp: The timestamp of the dataset.
        dataset_id: Optional dataset ID to use to upload the profile to Whylabs.
        enable_whylabs: Whether to upload the generated profile to Whylabs.

    Returns:
        a WhylogsProfilerStep step instance
    """
    key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())
    settings = WhylogsDataValidatorSettings(
        enable_whylabs=enable_whylabs, dataset_id=dataset_id
    )
    step_instance = whylogs_profiler_step.with_options(
        parameters={"dataset_timestamp": dataset_timestamp},
        settings={key: settings},
    )
    return step_instance