Skip to content

Whylogs

zenml.integrations.whylogs special

Initialization of the whylogs integration.

WhylogsIntegration (Integration)

Definition of whylogs integration for ZenML.

Source code in zenml/integrations/whylogs/__init__.py
class WhylogsIntegration(Integration):
    """Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""

    NAME = WHYLOGS
    REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.whylogs import materializers  # noqa
        from zenml.integrations.whylogs import secret_schemas  # noqa
        from zenml.integrations.whylogs import visualizers  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Great Expectations integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.whylogs.flavors import (
            WhylogsDataValidatorFlavor,
        )

        return [WhylogsDataValidatorFlavor]

activate() classmethod

Activates the integration.

Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.whylogs import materializers  # noqa
    from zenml.integrations.whylogs import secret_schemas  # noqa
    from zenml.integrations.whylogs import visualizers  # noqa

flavors() classmethod

Declare the stack component flavors for the Great Expectations integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Great Expectations integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.whylogs.flavors import (
        WhylogsDataValidatorFlavor,
    )

    return [WhylogsDataValidatorFlavor]

constants

Whylogs integration constants.

data_validators special

Initialization of the whylogs data validator for ZenML.

whylogs_data_validator

Implementation of the whylogs data validator.

WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)

Whylogs data validator stack component.

Attributes:

Name Type Description
authentication_secret

Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
    """Whylogs data validator stack component.

    Attributes:
        authentication_secret: Optional ZenML secret with Whylabs credentials.
            If configured, all the data profiles returned by all pipeline steps
            will automatically be uploaded to Whylabs in addition to being
            stored in the ZenML Artifact Store.
    """

    NAME: ClassVar[str] = "whylogs"
    FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = WhylogsDataValidatorFlavor

    @property
    def config(self) -> WhylogsDataValidatorConfig:
        """Returns the `WhylogsDataValidatorConfig` config.

        Returns:
            The configuration.
        """
        return cast(WhylogsDataValidatorConfig, self._config)

    @property
    def settings_class(self) -> Optional[Type["BaseSettings"]]:
        """Settings class for the Whylogs data validator.

        Returns:
            The settings class.
        """
        return WhylogsDataValidatorSettings

    def prepare_step_run(self, info: "StepRunInfo") -> None:
        """Configures Whylabs logging.

        Args:
            info: Info about the step that will be executed.
        """
        settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
        if settings.enable_whylabs:
            os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
        if settings.dataset_id:
            os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id

    def cleanup_step_run(self, info: "StepRunInfo") -> None:
        """Resets Whylabs configuration.

        Args:
            info: Info about the step that was executed.
        """
        settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
        if settings.enable_whylabs:
            del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
        if settings.dataset_id:
            del os.environ[WHYLABS_DATASET_ID_ENV]

    def data_profiling(
        self,
        dataset: pd.DataFrame,
        comparison_dataset: Optional[pd.DataFrame] = None,
        profile_list: Optional[Sequence[str]] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        **kwargs: Any,
    ) -> DatasetProfileView:
        """Analyze a dataset and generate a data profile with whylogs.

        Args:
            dataset: Target dataset to be profiled.
            comparison_dataset: Optional dataset to be used for data profiles
                that require a baseline for comparison (e.g data drift profiles).
            profile_list: Optional list identifying the categories of whylogs
                data profiles to be generated (unused).
            dataset_timestamp: timestamp to associate with the generated
                dataset profile (Optional). The current time is used if not
                supplied.
            **kwargs: Extra keyword arguments (unused).

        Returns:
            A whylogs profile view object.
        """
        results = why.log(pandas=dataset)
        profile = results.profile()
        dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
        profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
        return profile.view()

    def upload_profile_view(
        self,
        profile_view: DatasetProfileView,
        dataset_id: Optional[str] = None,
    ) -> None:
        """Upload a whylogs data profile view to Whylabs, if configured to do so.

        Args:
            profile_view: Whylogs profile view to upload.
            dataset_id: Optional dataset identifier to use for the uploaded
                data profile. If omitted, a dataset identifier will be retrieved
                using other means, in order:
                    * the default dataset identifier configured in the Data
                    Validator secret
                    * a dataset ID will be generated automatically based on the
                    current pipeline/step information.

        Raises:
            ValueError: If the dataset ID was not provided and could not be
                retrieved or inferred from other sources.
        """
        secret = self.get_authentication_secret(
            expected_schema_type=WhylabsSecretSchema
        )
        if not secret:
            return

        dataset_id = dataset_id or secret.whylabs_default_dataset_id

        if not dataset_id:
            # use the current pipeline name and the step name to generate a
            # unique dataset name
            try:
                # get pipeline name and step name
                step_env = cast(
                    StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
                )
                dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
            except KeyError:
                raise ValueError(
                    "A dataset ID was not specified and could not be "
                    "generated from the current pipeline and step name."
                )

        # Instantiate WhyLabs Writer
        writer = WhyLabsWriter(
            org_id=secret.whylabs_default_org_id,
            api_key=secret.whylabs_api_key,
            dataset_id=dataset_id,
        )

        # pass a profile view to the writer's write method
        writer.write(profile=profile_view)
config: WhylogsDataValidatorConfig property readonly

Returns the WhylogsDataValidatorConfig config.

Returns:

Type Description
WhylogsDataValidatorConfig

The configuration.

settings_class: Optional[Type[BaseSettings]] property readonly

Settings class for the Whylogs data validator.

Returns:

Type Description
Optional[Type[BaseSettings]]

The settings class.

FLAVOR (BaseDataValidatorFlavor)

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]

The config class.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[WhylogsDataValidator]

The implementation class.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

cleanup_step_run(self, info)

Resets Whylabs configuration.

Parameters:

Name Type Description Default
info StepRunInfo

Info about the step that was executed.

required
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def cleanup_step_run(self, info: "StepRunInfo") -> None:
    """Resets Whylabs configuration.

    Args:
        info: Info about the step that was executed.
    """
    settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
    if settings.enable_whylabs:
        del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
    if settings.dataset_id:
        del os.environ[WHYLABS_DATASET_ID_ENV]
data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)

Analyze a dataset and generate a data profile with whylogs.

Parameters:

Name Type Description Default
dataset DataFrame

Target dataset to be profiled.

required
comparison_dataset Optional[pandas.core.frame.DataFrame]

Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles).

None
profile_list Optional[Sequence[str]]

Optional list identifying the categories of whylogs data profiles to be generated (unused).

None
dataset_timestamp Optional[datetime.datetime]

timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

None
**kwargs Any

Extra keyword arguments (unused).

{}

Returns:

Type Description
DatasetProfileView

A whylogs profile view object.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def data_profiling(
    self,
    dataset: pd.DataFrame,
    comparison_dataset: Optional[pd.DataFrame] = None,
    profile_list: Optional[Sequence[str]] = None,
    dataset_timestamp: Optional[datetime.datetime] = None,
    **kwargs: Any,
) -> DatasetProfileView:
    """Analyze a dataset and generate a data profile with whylogs.

    Args:
        dataset: Target dataset to be profiled.
        comparison_dataset: Optional dataset to be used for data profiles
            that require a baseline for comparison (e.g data drift profiles).
        profile_list: Optional list identifying the categories of whylogs
            data profiles to be generated (unused).
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
        **kwargs: Extra keyword arguments (unused).

    Returns:
        A whylogs profile view object.
    """
    results = why.log(pandas=dataset)
    profile = results.profile()
    dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
    profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
    return profile.view()
prepare_step_run(self, info)

Configures Whylabs logging.

Parameters:

Name Type Description Default
info StepRunInfo

Info about the step that will be executed.

required
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def prepare_step_run(self, info: "StepRunInfo") -> None:
    """Configures Whylabs logging.

    Args:
        info: Info about the step that will be executed.
    """
    settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
    if settings.enable_whylabs:
        os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
    if settings.dataset_id:
        os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id
upload_profile_view(self, profile_view, dataset_id=None)

Upload a whylogs data profile view to Whylabs, if configured to do so.

Parameters:

Name Type Description Default
profile_view DatasetProfileView

Whylogs profile view to upload.

required
dataset_id Optional[str]

Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information.

None

Exceptions:

Type Description
ValueError

If the dataset ID was not provided and could not be retrieved or inferred from other sources.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def upload_profile_view(
    self,
    profile_view: DatasetProfileView,
    dataset_id: Optional[str] = None,
) -> None:
    """Upload a whylogs data profile view to Whylabs, if configured to do so.

    Args:
        profile_view: Whylogs profile view to upload.
        dataset_id: Optional dataset identifier to use for the uploaded
            data profile. If omitted, a dataset identifier will be retrieved
            using other means, in order:
                * the default dataset identifier configured in the Data
                Validator secret
                * a dataset ID will be generated automatically based on the
                current pipeline/step information.

    Raises:
        ValueError: If the dataset ID was not provided and could not be
            retrieved or inferred from other sources.
    """
    secret = self.get_authentication_secret(
        expected_schema_type=WhylabsSecretSchema
    )
    if not secret:
        return

    dataset_id = dataset_id or secret.whylabs_default_dataset_id

    if not dataset_id:
        # use the current pipeline name and the step name to generate a
        # unique dataset name
        try:
            # get pipeline name and step name
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
            dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
        except KeyError:
            raise ValueError(
                "A dataset ID was not specified and could not be "
                "generated from the current pipeline and step name."
            )

    # Instantiate WhyLabs Writer
    writer = WhyLabsWriter(
        org_id=secret.whylabs_default_org_id,
        api_key=secret.whylabs_api_key,
        dataset_id=dataset_id,
    )

    # pass a profile view to the writer's write method
    writer.write(profile=profile_view)

flavors special

WhyLabs whylogs integration flavors.

whylogs_data_validator_flavor

WhyLabs whylogs data validator flavor.

WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings) pydantic-model

Config for the whylogs data validator.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorConfig(  # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
    BaseDataValidatorConfig,
    AuthenticationConfigMixin,
    WhylogsDataValidatorSettings,
):
    """Config for the whylogs data validator."""
WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]

The config class.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[WhylogsDataValidator]

The implementation class.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

WhylogsDataValidatorSettings (BaseSettings) pydantic-model

Settings for the Whylogs data validator.

Attributes:

Name Type Description
enable_whylabs bool

If set to True for a step, all the whylogs data profile views returned by the step will automatically be uploaded to the Whylabs platform if Whylabs credentials are configured.

dataset_id Optional[str]

Dataset ID to use when uploading profiles to Whylabs.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorSettings(BaseSettings):
    """Settings for the Whylogs data validator.

    Attributes:
        enable_whylabs: If set to `True` for a step, all the whylogs data
            profile views returned by the step will automatically be uploaded
            to the Whylabs platform if Whylabs credentials are configured.
        dataset_id: Dataset ID to use when uploading profiles to Whylabs.
    """

    enable_whylabs: bool = False
    dataset_id: Optional[str] = None

materializers special

Initialization of the whylogs materializer.

whylogs_materializer

Implementation of the whylogs materializer.

WhylogsMaterializer (BaseMaterializer)

Materializer to read/write whylogs dataset profile views.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
class WhylogsMaterializer(BaseMaterializer):
    """Materializer to read/write whylogs dataset profile views."""

    ASSOCIATED_TYPES = (DatasetProfileView,)
    ASSOCIATED_ARTIFACT_TYPES = (StatisticsArtifact,)

    def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
        """Reads and returns a whylogs dataset profile view.

        Args:
            data_type: The type of the data to read.

        Returns:
            A loaded whylogs dataset profile view object.
        """
        super().handle_input(data_type)
        filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        profile_view = DatasetProfileView.read(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)

        return profile_view

    def handle_return(self, profile_view: DatasetProfileView) -> None:
        """Writes a whylogs dataset profile view.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        super().handle_return(profile_view)
        filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        profile_view.write(temp_file)

        # Copy it into artifact store
        fileio.copy(temp_file, filepath)
        fileio.rmtree(temp_dir)

        # Use the data validator to upload the profile view to Whylabs,
        # if configured to do so. This logic is only enabled if the pipeline
        # step was decorated with the `enable_whylabs` decorator
        whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
        if not whylabs_enabled:
            return
        dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
        data_validator = cast(
            WhylogsDataValidator,
            WhylogsDataValidator.get_active_data_validator(),
        )
        data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)
handle_input(self, data_type)

Reads and returns a whylogs dataset profile view.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
DatasetProfileView

A loaded whylogs dataset profile view object.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
    """Reads and returns a whylogs dataset profile view.

    Args:
        data_type: The type of the data to read.

    Returns:
        A loaded whylogs dataset profile view object.
    """
    super().handle_input(data_type)
    filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    profile_view = DatasetProfileView.read(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)

    return profile_view
handle_return(self, profile_view)

Writes a whylogs dataset profile view.

Parameters:

Name Type Description Default
profile_view DatasetProfileView

A whylogs dataset profile view object.

required
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def handle_return(self, profile_view: DatasetProfileView) -> None:
    """Writes a whylogs dataset profile view.

    Args:
        profile_view: A whylogs dataset profile view object.
    """
    super().handle_return(profile_view)
    filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    profile_view.write(temp_file)

    # Copy it into artifact store
    fileio.copy(temp_file, filepath)
    fileio.rmtree(temp_dir)

    # Use the data validator to upload the profile view to Whylabs,
    # if configured to do so. This logic is only enabled if the pipeline
    # step was decorated with the `enable_whylabs` decorator
    whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
    if not whylabs_enabled:
        return
    dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
    data_validator = cast(
        WhylogsDataValidator,
        WhylogsDataValidator.get_active_data_validator(),
    )
    data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)

secret_schemas special

Initialization for the Whylabs secret schema.

This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.

whylabs_secret_schema

Implementation for Seldon secret schemas.

WhylabsSecretSchema (BaseSecretSchema) pydantic-model

Whylabs credentials.

Attributes:

Name Type Description
whylabs_default_org_id str

the Whylabs organization ID.

whylabs_api_key str

Whylabs API key.

whylabs_default_dataset_id Optional[str]

default Whylabs dataset ID to use when logging data profiles.

Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py
class WhylabsSecretSchema(BaseSecretSchema):
    """Whylabs credentials.

    Attributes:
        whylabs_default_org_id: the Whylabs organization ID.
        whylabs_api_key: Whylabs API key.
        whylabs_default_dataset_id: default Whylabs dataset ID to use when
            logging data profiles.
    """

    TYPE: ClassVar[str] = WHYLABS_SECRET_SCHEMA_TYPE

    whylabs_default_org_id: str
    whylabs_api_key: str
    whylabs_default_dataset_id: Optional[str] = None

steps special

Initialization of the whylogs steps.

whylogs_profiler

Implementation of the whylogs profiler step.

WhylogsProfilerParameters (BaseAnalyzerParameters) pydantic-model

Parameters class for the WhylogsProfiler step.

Attributes:

Name Type Description
dataset_timestamp Optional[datetime.datetime]

timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerParameters(BaseAnalyzerParameters):
    """Parameters class for the WhylogsProfiler step.

    Attributes:
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
    """

    dataset_timestamp: Optional[datetime.datetime]
WhylogsProfilerStep (BaseAnalyzerStep)

Generates a whylogs data profile from a given pd.DataFrame.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerStep(BaseAnalyzerStep):
    """Generates a whylogs data profile from a given pd.DataFrame."""

    @staticmethod
    def entrypoint(  # type: ignore[override]
        dataset: pd.DataFrame,
        params: WhylogsProfilerParameters,
    ) -> DatasetProfileView:
        """Main entrypoint function for the whylogs profiler.

        Args:
            dataset: pd.DataFrame, the given dataset
            params: the parameters of the step

        Returns:
            whylogs profile with statistics generated for the input dataset
        """
        data_validator = cast(
            WhylogsDataValidator,
            WhylogsDataValidator.get_active_data_validator(),
        )
        return data_validator.data_profiling(
            dataset, dataset_timestamp=params.dataset_timestamp
        )
PARAMETERS_CLASS (BaseAnalyzerParameters) pydantic-model

Parameters class for the WhylogsProfiler step.

Attributes:

Name Type Description
dataset_timestamp Optional[datetime.datetime]

timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerParameters(BaseAnalyzerParameters):
    """Parameters class for the WhylogsProfiler step.

    Attributes:
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
    """

    dataset_timestamp: Optional[datetime.datetime]
entrypoint(dataset, params) staticmethod

Main entrypoint function for the whylogs profiler.

Parameters:

Name Type Description Default
dataset DataFrame

pd.DataFrame, the given dataset

required
params WhylogsProfilerParameters

the parameters of the step

required

Returns:

Type Description
DatasetProfileView

whylogs profile with statistics generated for the input dataset

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
@staticmethod
def entrypoint(  # type: ignore[override]
    dataset: pd.DataFrame,
    params: WhylogsProfilerParameters,
) -> DatasetProfileView:
    """Main entrypoint function for the whylogs profiler.

    Args:
        dataset: pd.DataFrame, the given dataset
        params: the parameters of the step

    Returns:
        whylogs profile with statistics generated for the input dataset
    """
    data_validator = cast(
        WhylogsDataValidator,
        WhylogsDataValidator.get_active_data_validator(),
    )
    return data_validator.data_profiling(
        dataset, dataset_timestamp=params.dataset_timestamp
    )
whylogs_profiler_step(step_name, params, dataset_id=None)

Shortcut function to create a new instance of the WhylogsProfilerStep step.

The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.

Parameters:

Name Type Description Default
step_name str

The name of the step

required
params WhylogsProfilerParameters

The step parameters

required
dataset_id Optional[str]

Optional dataset ID to use to upload the profile to Whylabs.

None

Returns:

Type Description
BaseStep

a WhylogsProfilerStep step instance

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
def whylogs_profiler_step(
    step_name: str,
    params: WhylogsProfilerParameters,
    dataset_id: Optional[str] = None,
) -> BaseStep:
    """Shortcut function to create a new instance of the WhylogsProfilerStep step.

    The returned WhylogsProfilerStep can be used in a pipeline to generate a
    whylogs DatasetProfileView from a given pd.DataFrame and save it as an
    artifact.

    Args:
        step_name: The name of the step
        params: The step parameters
        dataset_id: Optional dataset ID to use to upload the profile to Whylabs.

    Returns:
        a WhylogsProfilerStep step instance
    """
    step_instance = WhylogsProfilerStep(name=step_name, params=params)
    key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())

    settings = WhylogsDataValidatorSettings(
        enable_whylabs=True, dataset_id=dataset_id
    )
    step_instance.configure(settings={key: settings})
    return step_instance

visualizers special

Initialization of the whylogs visualizer.

whylogs_visualizer

Implementation of the whylogs visualizer step.

WhylogsVisualizer (BaseVisualizer)

The implementation of a Whylogs Visualizer.

Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
class WhylogsVisualizer(BaseVisualizer):
    """The implementation of a Whylogs Visualizer."""

    def visualize(
        self,
        object: StepView,
        reference_step_view: Optional[StepView] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """Visualize whylogs dataset profiles present as outputs in the step view.

        Args:
            object: StepView fetched from run.get_step().
            reference_step_view: second StepView fetched from run.get_step() to
                use as a reference to visualize data drift
            *args: additional positional arguments to pass to the visualize
                method
            **kwargs: additional keyword arguments to pass to the visualize
                method
        """

        def extract_profile(
            step_view: StepView,
        ) -> Optional[DatasetProfileView]:
            """Extract a whylogs DatasetProfileView from a step view.

            Args:
                step_view: a step view

            Returns:
                A whylogs DatasetProfileView object loaded from the step view,
                if one could be found, otherwise None.
            """
            whylogs_artifact_datatype = (
                f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
            )
            for _, artifact_view in step_view.outputs.items():
                # filter out anything but whylogs dataset profile artifacts
                if artifact_view.data_type == whylogs_artifact_datatype:
                    profile = artifact_view.read()
                    return cast(DatasetProfileView, profile)
            return None

        profile = extract_profile(object)
        reference_profile: Optional[DatasetProfileView] = None
        if reference_step_view:
            reference_profile = extract_profile(reference_step_view)

        self.visualize_profile(profile, reference_profile)

    def visualize_profile(
        self,
        profile: DatasetProfileView,
        reference_profile: Optional[DatasetProfileView] = None,
    ) -> None:
        """Generate a visualization of one or two whylogs dataset profile.

        Args:
            profile: whylogs DatasetProfileView to visualize
            reference_profile: second optional DatasetProfileView to use to
                generate a data drift visualization
        """
        # currently, whylogs doesn't support visualizing a single profile, so
        # we trick it by using the same profile twice, both as reference and
        # target, in a drift report
        reference_profile = reference_profile or profile
        visualization = NotebookProfileVisualizer()
        visualization.set_profiles(
            target_profile_view=profile,
            reference_profile_view=reference_profile,
        )
        rendered_html = visualization.summary_drift_report()

        if Environment.in_notebook():
            from IPython.core.display import display

            display(rendered_html)
            for column in sorted(list(profile.get_columns().keys())):
                display(visualization.double_histogram(feature_name=column))
        else:
            logger.warning(
                "The magic functions are only usable in a Jupyter notebook."
            )
            with tempfile.NamedTemporaryFile(
                mode="w", delete=False, suffix=".html", encoding="utf-8"
            ) as f:
                f.write(rendered_html.data)
                url = f"file:///{f.name}"
            logger.info("Opening %s in a new browser.." % f.name)
            webbrowser.open(url, new=2)
visualize(self, object, reference_step_view=None, *args, **kwargs)

Visualize whylogs dataset profiles present as outputs in the step view.

Parameters:

Name Type Description Default
object StepView

StepView fetched from run.get_step().

required
reference_step_view Optional[zenml.post_execution.step.StepView]

second StepView fetched from run.get_step() to use as a reference to visualize data drift

None
*args Any

additional positional arguments to pass to the visualize method

()
**kwargs Any

additional keyword arguments to pass to the visualize method

{}
Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
def visualize(
    self,
    object: StepView,
    reference_step_view: Optional[StepView] = None,
    *args: Any,
    **kwargs: Any,
) -> None:
    """Visualize whylogs dataset profiles present as outputs in the step view.

    Args:
        object: StepView fetched from run.get_step().
        reference_step_view: second StepView fetched from run.get_step() to
            use as a reference to visualize data drift
        *args: additional positional arguments to pass to the visualize
            method
        **kwargs: additional keyword arguments to pass to the visualize
            method
    """

    def extract_profile(
        step_view: StepView,
    ) -> Optional[DatasetProfileView]:
        """Extract a whylogs DatasetProfileView from a step view.

        Args:
            step_view: a step view

        Returns:
            A whylogs DatasetProfileView object loaded from the step view,
            if one could be found, otherwise None.
        """
        whylogs_artifact_datatype = (
            f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
        )
        for _, artifact_view in step_view.outputs.items():
            # filter out anything but whylogs dataset profile artifacts
            if artifact_view.data_type == whylogs_artifact_datatype:
                profile = artifact_view.read()
                return cast(DatasetProfileView, profile)
        return None

    profile = extract_profile(object)
    reference_profile: Optional[DatasetProfileView] = None
    if reference_step_view:
        reference_profile = extract_profile(reference_step_view)

    self.visualize_profile(profile, reference_profile)
visualize_profile(self, profile, reference_profile=None)

Generate a visualization of one or two whylogs dataset profile.

Parameters:

Name Type Description Default
profile DatasetProfileView

whylogs DatasetProfileView to visualize

required
reference_profile Optional[whylogs.core.view.dataset_profile_view.DatasetProfileView]

second optional DatasetProfileView to use to generate a data drift visualization

None
Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
def visualize_profile(
    self,
    profile: DatasetProfileView,
    reference_profile: Optional[DatasetProfileView] = None,
) -> None:
    """Generate a visualization of one or two whylogs dataset profile.

    Args:
        profile: whylogs DatasetProfileView to visualize
        reference_profile: second optional DatasetProfileView to use to
            generate a data drift visualization
    """
    # currently, whylogs doesn't support visualizing a single profile, so
    # we trick it by using the same profile twice, both as reference and
    # target, in a drift report
    reference_profile = reference_profile or profile
    visualization = NotebookProfileVisualizer()
    visualization.set_profiles(
        target_profile_view=profile,
        reference_profile_view=reference_profile,
    )
    rendered_html = visualization.summary_drift_report()

    if Environment.in_notebook():
        from IPython.core.display import display

        display(rendered_html)
        for column in sorted(list(profile.get_columns().keys())):
            display(visualization.double_histogram(feature_name=column))
    else:
        logger.warning(
            "The magic functions are only usable in a Jupyter notebook."
        )
        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".html", encoding="utf-8"
        ) as f:
            f.write(rendered_html.data)
            url = f"file:///{f.name}"
        logger.info("Opening %s in a new browser.." % f.name)
        webbrowser.open(url, new=2)