Whylogs

`zenml.integrations.whylogs` `special`

Initialization of the whylogs integration.

`WhylogsIntegration (Integration)`

Definition of whylogs integration for ZenML.

Source code in zenml/integrations/whylogs/__init__.py

class WhylogsIntegration(Integration):
    """Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""

    NAME = WHYLOGS
    REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.whylogs import materializers  # noqa
        from zenml.integrations.whylogs import secret_schemas  # noqa
        from zenml.integrations.whylogs import visualizers  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Great Expectations integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.whylogs.flavors import (
            WhylogsDataValidatorFlavor,
        )

        return [WhylogsDataValidatorFlavor]

`activate()` `classmethod`

Activates the integration.

Source code in zenml/integrations/whylogs/__init__.py

@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.whylogs import materializers  # noqa
    from zenml.integrations.whylogs import secret_schemas  # noqa
    from zenml.integrations.whylogs import visualizers  # noqa

`flavors()` `classmethod`

Declare the stack component flavors for the Great Expectations integration.

Returns:

Type	Description
`List[Type[zenml.stack.flavor.Flavor]]`	List of stack component flavors for this integration.

Source code in zenml/integrations/whylogs/__init__.py

@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Great Expectations integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.whylogs.flavors import (
        WhylogsDataValidatorFlavor,
    )

    return [WhylogsDataValidatorFlavor]

`constants`

Whylogs integration constants.

`data_validators` `special`

Initialization of the whylogs data validator for ZenML.

`whylogs_data_validator`

Implementation of the whylogs data validator.

`WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)`

Whylogs data validator stack component.

Attributes:

Name	Type	Description
`authentication_secret`		Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
    """Whylogs data validator stack component.

    Attributes:
        authentication_secret: Optional ZenML secret with Whylabs credentials.
            If configured, all the data profiles returned by all pipeline steps
            will automatically be uploaded to Whylabs in addition to being
            stored in the ZenML Artifact Store.
    """

    NAME: ClassVar[str] = "whylogs"
    FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = WhylogsDataValidatorFlavor

    @property
    def config(self) -> WhylogsDataValidatorConfig:
        """Returns the `WhylogsDataValidatorConfig` config.

        Returns:
            The configuration.
        """
        return cast(WhylogsDataValidatorConfig, self._config)

    @property
    def settings_class(self) -> Optional[Type["BaseSettings"]]:
        """Settings class for the Whylogs data validator.

        Returns:
            The settings class.
        """
        return WhylogsDataValidatorSettings

    def prepare_step_run(self, info: "StepRunInfo") -> None:
        """Configures Whylabs logging.

        Args:
            info: Info about the step that will be executed.
        """
        settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
        if settings.enable_whylabs:
            os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
        if settings.dataset_id:
            os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id

    def cleanup_step_run(self, info: "StepRunInfo") -> None:
        """Resets Whylabs configuration.

        Args:
            info: Info about the step that was executed.
        """
        settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
        if settings.enable_whylabs:
            del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
        if settings.dataset_id:
            del os.environ[WHYLABS_DATASET_ID_ENV]

    def data_profiling(
        self,
        dataset: pd.DataFrame,
        comparison_dataset: Optional[pd.DataFrame] = None,
        profile_list: Optional[Sequence[str]] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        **kwargs: Any,
    ) -> DatasetProfileView:
        """Analyze a dataset and generate a data profile with whylogs.

        Args:
            dataset: Target dataset to be profiled.
            comparison_dataset: Optional dataset to be used for data profiles
                that require a baseline for comparison (e.g data drift profiles).
            profile_list: Optional list identifying the categories of whylogs
                data profiles to be generated (unused).
            dataset_timestamp: timestamp to associate with the generated
                dataset profile (Optional). The current time is used if not
                supplied.
            **kwargs: Extra keyword arguments (unused).

        Returns:
            A whylogs profile view object.
        """
        results = why.log(pandas=dataset)
        profile = results.profile()
        dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
        profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
        return profile.view()

    def upload_profile_view(
        self,
        profile_view: DatasetProfileView,
        dataset_id: Optional[str] = None,
    ) -> None:
        """Upload a whylogs data profile view to Whylabs, if configured to do so.

        Args:
            profile_view: Whylogs profile view to upload.
            dataset_id: Optional dataset identifier to use for the uploaded
                data profile. If omitted, a dataset identifier will be retrieved
                using other means, in order:
                    * the default dataset identifier configured in the Data
                    Validator secret
                    * a dataset ID will be generated automatically based on the
                    current pipeline/step information.

        Raises:
            ValueError: If the dataset ID was not provided and could not be
                retrieved or inferred from other sources.
        """
        secret = self.get_authentication_secret(
            expected_schema_type=WhylabsSecretSchema
        )
        if not secret:
            return

        dataset_id = dataset_id or secret.whylabs_default_dataset_id

        if not dataset_id:
            # use the current pipeline name and the step name to generate a
            # unique dataset name
            try:
                # get pipeline name and step name
                step_env = cast(
                    StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
                )
                dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
            except KeyError:
                raise ValueError(
                    "A dataset ID was not specified and could not be "
                    "generated from the current pipeline and step name."
                )

        # Instantiate WhyLabs Writer
        writer = WhyLabsWriter(
            org_id=secret.whylabs_default_org_id,
            api_key=secret.whylabs_api_key,
            dataset_id=dataset_id,
        )

        # pass a profile view to the writer's write method
        writer.write(profile=profile_view)

`config: WhylogsDataValidatorConfig` `property` `readonly`

Returns the WhylogsDataValidatorConfig config.

Returns:

Type	Description
`WhylogsDataValidatorConfig`	The configuration.

`settings_class: Optional[Type[BaseSettings]]` `property` `readonly`

Settings class for the Whylogs data validator.

Returns:

Type	Description
`Optional[Type[BaseSettings]]`	The settings class.

`FLAVOR (BaseDataValidatorFlavor)`

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator

config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type	Description
`Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]`	The config class.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type	Description
`Type[WhylogsDataValidator]`	The implementation class.

name: str property readonly

Name of the flavor.

Returns:

Type	Description
`str`	The name of the flavor.

`cleanup_step_run(self, info)`

Resets Whylabs configuration.

Parameters:

Name	Type	Description	Default
`info`	`StepRunInfo`	Info about the step that was executed.	required

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def cleanup_step_run(self, info: "StepRunInfo") -> None:
    """Resets Whylabs configuration.

    Args:
        info: Info about the step that was executed.
    """
    settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
    if settings.enable_whylabs:
        del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
    if settings.dataset_id:
        del os.environ[WHYLABS_DATASET_ID_ENV]

`data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)`

Analyze a dataset and generate a data profile with whylogs.

Parameters:

Name	Type	Description	Default
`dataset`	`DataFrame`	Target dataset to be profiled.	required
`comparison_dataset`	`Optional[pandas.core.frame.DataFrame]`	Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles).	`None`
`profile_list`	`Optional[Sequence[str]]`	Optional list identifying the categories of whylogs data profiles to be generated (unused).	`None`
`dataset_timestamp`	`Optional[datetime.datetime]`	timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.	`None`
`**kwargs`	`Any`	Extra keyword arguments (unused).	`{}`

Returns:

Type	Description
`DatasetProfileView`	A whylogs profile view object.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def data_profiling(
    self,
    dataset: pd.DataFrame,
    comparison_dataset: Optional[pd.DataFrame] = None,
    profile_list: Optional[Sequence[str]] = None,
    dataset_timestamp: Optional[datetime.datetime] = None,
    **kwargs: Any,
) -> DatasetProfileView:
    """Analyze a dataset and generate a data profile with whylogs.

    Args:
        dataset: Target dataset to be profiled.
        comparison_dataset: Optional dataset to be used for data profiles
            that require a baseline for comparison (e.g data drift profiles).
        profile_list: Optional list identifying the categories of whylogs
            data profiles to be generated (unused).
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
        **kwargs: Extra keyword arguments (unused).

    Returns:
        A whylogs profile view object.
    """
    results = why.log(pandas=dataset)
    profile = results.profile()
    dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
    profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
    return profile.view()

`prepare_step_run(self, info)`

Configures Whylabs logging.

Parameters:

Name	Type	Description	Default
`info`	`StepRunInfo`	Info about the step that will be executed.	required

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def prepare_step_run(self, info: "StepRunInfo") -> None:
    """Configures Whylabs logging.

    Args:
        info: Info about the step that will be executed.
    """
    settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
    if settings.enable_whylabs:
        os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
    if settings.dataset_id:
        os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id

`upload_profile_view(self, profile_view, dataset_id=None)`

Upload a whylogs data profile view to Whylabs, if configured to do so.

Parameters:

Name	Type	Description	Default
`profile_view`	`DatasetProfileView`	Whylogs profile view to upload.	required
`dataset_id`	`Optional[str]`	Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information.	`None`

Exceptions:

Type	Description
`ValueError`	If the dataset ID was not provided and could not be retrieved or inferred from other sources.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def upload_profile_view(
    self,
    profile_view: DatasetProfileView,
    dataset_id: Optional[str] = None,
) -> None:
    """Upload a whylogs data profile view to Whylabs, if configured to do so.

    Args:
        profile_view: Whylogs profile view to upload.
        dataset_id: Optional dataset identifier to use for the uploaded
            data profile. If omitted, a dataset identifier will be retrieved
            using other means, in order:
                * the default dataset identifier configured in the Data
                Validator secret
                * a dataset ID will be generated automatically based on the
                current pipeline/step information.

    Raises:
        ValueError: If the dataset ID was not provided and could not be
            retrieved or inferred from other sources.
    """
    secret = self.get_authentication_secret(
        expected_schema_type=WhylabsSecretSchema
    )
    if not secret:
        return

    dataset_id = dataset_id or secret.whylabs_default_dataset_id

    if not dataset_id:
        # use the current pipeline name and the step name to generate a
        # unique dataset name
        try:
            # get pipeline name and step name
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
            dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
        except KeyError:
            raise ValueError(
                "A dataset ID was not specified and could not be "
                "generated from the current pipeline and step name."
            )

    # Instantiate WhyLabs Writer
    writer = WhyLabsWriter(
        org_id=secret.whylabs_default_org_id,
        api_key=secret.whylabs_api_key,
        dataset_id=dataset_id,
    )

    # pass a profile view to the writer's write method
    writer.write(profile=profile_view)

`flavors` `special`

WhyLabs whylogs integration flavors.

`whylogs_data_validator_flavor`

WhyLabs whylogs data validator flavor.

`WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)` `pydantic-model`

Config for the whylogs data validator.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorConfig(  # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
    BaseDataValidatorConfig,
    AuthenticationConfigMixin,
    WhylogsDataValidatorSettings,
):
    """Config for the whylogs data validator."""

`WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)`

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator

`config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]` `property` `readonly`

Returns WhylogsDataValidatorConfig config class.

Returns:

Type	Description
`Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]`	The config class.

`implementation_class: Type[WhylogsDataValidator]` `property` `readonly`

Implementation class for this flavor.

Returns:

Type	Description
`Type[WhylogsDataValidator]`	The implementation class.

`name: str` `property` `readonly`

Name of the flavor.

Returns:

Type	Description
`str`	The name of the flavor.

`WhylogsDataValidatorSettings (BaseSettings)` `pydantic-model`

Settings for the Whylogs data validator.

Attributes:

Name	Type	Description
`enable_whylabs`	`bool`	If set to `True` for a step, all the whylogs data profile views returned by the step will automatically be uploaded to the Whylabs platform if Whylabs credentials are configured.
`dataset_id`	`Optional[str]`	Dataset ID to use when uploading profiles to Whylabs.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorSettings(BaseSettings):
    """Settings for the Whylogs data validator.

    Attributes:
        enable_whylabs: If set to `True` for a step, all the whylogs data
            profile views returned by the step will automatically be uploaded
            to the Whylabs platform if Whylabs credentials are configured.
        dataset_id: Dataset ID to use when uploading profiles to Whylabs.
    """

    enable_whylabs: bool = False
    dataset_id: Optional[str] = None

`materializers` `special`

Initialization of the whylogs materializer.

`whylogs_materializer`

Implementation of the whylogs materializer.

`WhylogsMaterializer (BaseMaterializer)`

Materializer to read/write whylogs dataset profile views.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

class WhylogsMaterializer(BaseMaterializer):
    """Materializer to read/write whylogs dataset profile views."""

    ASSOCIATED_TYPES = (DatasetProfileView,)
    ASSOCIATED_ARTIFACT_TYPES = (StatisticsArtifact,)

    def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
        """Reads and returns a whylogs dataset profile view.

        Args:
            data_type: The type of the data to read.

        Returns:
            A loaded whylogs dataset profile view object.
        """
        super().handle_input(data_type)
        filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        profile_view = DatasetProfileView.read(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)

        return profile_view

    def handle_return(self, profile_view: DatasetProfileView) -> None:
        """Writes a whylogs dataset profile view.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        super().handle_return(profile_view)
        filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        profile_view.write(temp_file)

        # Copy it into artifact store
        fileio.copy(temp_file, filepath)
        fileio.rmtree(temp_dir)

        # Use the data validator to upload the profile view to Whylabs,
        # if configured to do so. This logic is only enabled if the pipeline
        # step was decorated with the `enable_whylabs` decorator
        whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
        if not whylabs_enabled:
            return
        dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
        data_validator = cast(
            WhylogsDataValidator,
            WhylogsDataValidator.get_active_data_validator(),
        )
        data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)

`handle_input(self, data_type)`

Reads and returns a whylogs dataset profile view.

Parameters:

Name	Type	Description	Default
`data_type`	`Type[Any]`	The type of the data to read.	required

Returns:

Type	Description
`DatasetProfileView`	A loaded whylogs dataset profile view object.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
    """Reads and returns a whylogs dataset profile view.

    Args:
        data_type: The type of the data to read.

    Returns:
        A loaded whylogs dataset profile view object.
    """
    super().handle_input(data_type)
    filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    profile_view = DatasetProfileView.read(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)

    return profile_view

`handle_return(self, profile_view)`

Writes a whylogs dataset profile view.

Parameters:

Name	Type	Description	Default
`profile_view`	`DatasetProfileView`	A whylogs dataset profile view object.	required

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

def handle_return(self, profile_view: DatasetProfileView) -> None:
    """Writes a whylogs dataset profile view.

    Args:
        profile_view: A whylogs dataset profile view object.
    """
    super().handle_return(profile_view)
    filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    profile_view.write(temp_file)

    # Copy it into artifact store
    fileio.copy(temp_file, filepath)
    fileio.rmtree(temp_dir)

    # Use the data validator to upload the profile view to Whylabs,
    # if configured to do so. This logic is only enabled if the pipeline
    # step was decorated with the `enable_whylabs` decorator
    whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
    if not whylabs_enabled:
        return
    dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
    data_validator = cast(
        WhylogsDataValidator,
        WhylogsDataValidator.get_active_data_validator(),
    )
    data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)

`secret_schemas` `special`

Initialization for the Whylabs secret schema.

This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.

`whylabs_secret_schema`

Implementation for Seldon secret schemas.

`WhylabsSecretSchema (BaseSecretSchema)` `pydantic-model`

Whylabs credentials.

Attributes:

Name	Type	Description
`whylabs_default_org_id`	`str`	the Whylabs organization ID.
`whylabs_api_key`	`str`	Whylabs API key.
`whylabs_default_dataset_id`	`Optional[str]`	default Whylabs dataset ID to use when logging data profiles.

Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py

class WhylabsSecretSchema(BaseSecretSchema):
    """Whylabs credentials.

    Attributes:
        whylabs_default_org_id: the Whylabs organization ID.
        whylabs_api_key: Whylabs API key.
        whylabs_default_dataset_id: default Whylabs dataset ID to use when
            logging data profiles.
    """

    TYPE: ClassVar[str] = WHYLABS_SECRET_SCHEMA_TYPE

    whylabs_default_org_id: str
    whylabs_api_key: str
    whylabs_default_dataset_id: Optional[str] = None

`steps` `special`

Initialization of the whylogs steps.

`whylogs_profiler`

Implementation of the whylogs profiler step.

`WhylogsProfilerParameters (BaseAnalyzerParameters)` `pydantic-model`

Parameters class for the WhylogsProfiler step.

Attributes:

Name	Type	Description
`dataset_timestamp`	`Optional[datetime.datetime]`	timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

class WhylogsProfilerParameters(BaseAnalyzerParameters):
    """Parameters class for the WhylogsProfiler step.

    Attributes:
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
    """

    dataset_timestamp: Optional[datetime.datetime]

`WhylogsProfilerStep (BaseAnalyzerStep)`

Generates a whylogs data profile from a given pd.DataFrame.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

class WhylogsProfilerStep(BaseAnalyzerStep):
    """Generates a whylogs data profile from a given pd.DataFrame."""

    @staticmethod
    def entrypoint(  # type: ignore[override]
        dataset: pd.DataFrame,
        params: WhylogsProfilerParameters,
    ) -> DatasetProfileView:
        """Main entrypoint function for the whylogs profiler.

        Args:
            dataset: pd.DataFrame, the given dataset
            params: the parameters of the step

        Returns:
            whylogs profile with statistics generated for the input dataset
        """
        data_validator = cast(
            WhylogsDataValidator,
            WhylogsDataValidator.get_active_data_validator(),
        )
        return data_validator.data_profiling(
            dataset, dataset_timestamp=params.dataset_timestamp
        )

`PARAMETERS_CLASS (BaseAnalyzerParameters)` `pydantic-model`

Parameters class for the WhylogsProfiler step.

Attributes:

Name	Type	Description
`dataset_timestamp`	`Optional[datetime.datetime]`	timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

class WhylogsProfilerParameters(BaseAnalyzerParameters):
    """Parameters class for the WhylogsProfiler step.

    Attributes:
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
    """

    dataset_timestamp: Optional[datetime.datetime]

`entrypoint(dataset, params)` `staticmethod`

Main entrypoint function for the whylogs profiler.

Parameters:

Name	Type	Description	Default
`dataset`	`DataFrame`	pd.DataFrame, the given dataset	required
`params`	`WhylogsProfilerParameters`	the parameters of the step	required

Returns:

Type	Description
`DatasetProfileView`	whylogs profile with statistics generated for the input dataset

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

@staticmethod
def entrypoint(  # type: ignore[override]
    dataset: pd.DataFrame,
    params: WhylogsProfilerParameters,
) -> DatasetProfileView:
    """Main entrypoint function for the whylogs profiler.

    Args:
        dataset: pd.DataFrame, the given dataset
        params: the parameters of the step

    Returns:
        whylogs profile with statistics generated for the input dataset
    """
    data_validator = cast(
        WhylogsDataValidator,
        WhylogsDataValidator.get_active_data_validator(),
    )
    return data_validator.data_profiling(
        dataset, dataset_timestamp=params.dataset_timestamp
    )

`whylogs_profiler_step(step_name, params, dataset_id=None)`

Shortcut function to create a new instance of the WhylogsProfilerStep step.

The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.

Parameters:

Name	Type	Description	Default
`step_name`	`str`	The name of the step	required
`params`	`WhylogsProfilerParameters`	The step parameters	required
`dataset_id`	`Optional[str]`	Optional dataset ID to use to upload the profile to Whylabs.	`None`

Returns:

Type	Description
`BaseStep`	a WhylogsProfilerStep step instance

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

def whylogs_profiler_step(
    step_name: str,
    params: WhylogsProfilerParameters,
    dataset_id: Optional[str] = None,
) -> BaseStep:
    """Shortcut function to create a new instance of the WhylogsProfilerStep step.

    The returned WhylogsProfilerStep can be used in a pipeline to generate a
    whylogs DatasetProfileView from a given pd.DataFrame and save it as an
    artifact.

    Args:
        step_name: The name of the step
        params: The step parameters
        dataset_id: Optional dataset ID to use to upload the profile to Whylabs.

    Returns:
        a WhylogsProfilerStep step instance
    """
    step_instance = WhylogsProfilerStep(name=step_name, params=params)
    key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())

    settings = WhylogsDataValidatorSettings(
        enable_whylabs=True, dataset_id=dataset_id
    )
    step_instance.configure(settings={key: settings})
    return step_instance

`visualizers` `special`

Initialization of the whylogs visualizer.

`whylogs_visualizer`

Implementation of the whylogs visualizer step.

`WhylogsVisualizer (BaseVisualizer)`

The implementation of a Whylogs Visualizer.

Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py

class WhylogsVisualizer(BaseVisualizer):
    """The implementation of a Whylogs Visualizer."""

    def visualize(
        self,
        object: StepView,
        reference_step_view: Optional[StepView] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """Visualize whylogs dataset profiles present as outputs in the step view.

        Args:
            object: StepView fetched from run.get_step().
            reference_step_view: second StepView fetched from run.get_step() to
                use as a reference to visualize data drift
            *args: additional positional arguments to pass to the visualize
                method
            **kwargs: additional keyword arguments to pass to the visualize
                method
        """

        def extract_profile(
            step_view: StepView,
        ) -> Optional[DatasetProfileView]:
            """Extract a whylogs DatasetProfileView from a step view.

            Args:
                step_view: a step view

            Returns:
                A whylogs DatasetProfileView object loaded from the step view,
                if one could be found, otherwise None.
            """
            whylogs_artifact_datatype = (
                f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
            )
            for _, artifact_view in step_view.outputs.items():
                # filter out anything but whylogs dataset profile artifacts
                if artifact_view.data_type == whylogs_artifact_datatype:
                    profile = artifact_view.read()
                    return cast(DatasetProfileView, profile)
            return None

        profile = extract_profile(object)
        reference_profile: Optional[DatasetProfileView] = None
        if reference_step_view:
            reference_profile = extract_profile(reference_step_view)

        self.visualize_profile(profile, reference_profile)

    def visualize_profile(
        self,
        profile: DatasetProfileView,
        reference_profile: Optional[DatasetProfileView] = None,
    ) -> None:
        """Generate a visualization of one or two whylogs dataset profile.

        Args:
            profile: whylogs DatasetProfileView to visualize
            reference_profile: second optional DatasetProfileView to use to
                generate a data drift visualization
        """
        # currently, whylogs doesn't support visualizing a single profile, so
        # we trick it by using the same profile twice, both as reference and
        # target, in a drift report
        reference_profile = reference_profile or profile
        visualization = NotebookProfileVisualizer()
        visualization.set_profiles(
            target_profile_view=profile,
            reference_profile_view=reference_profile,
        )
        rendered_html = visualization.summary_drift_report()

        if Environment.in_notebook():
            from IPython.core.display import display

            display(rendered_html)
            for column in sorted(list(profile.get_columns().keys())):
                display(visualization.double_histogram(feature_name=column))
        else:
            logger.warning(
                "The magic functions are only usable in a Jupyter notebook."
            )
            with tempfile.NamedTemporaryFile(
                mode="w", delete=False, suffix=".html", encoding="utf-8"
            ) as f:
                f.write(rendered_html.data)
                url = f"file:///{f.name}"
            logger.info("Opening %s in a new browser.." % f.name)
            webbrowser.open(url, new=2)

`visualize(self, object, reference_step_view=None, *args, **kwargs)`

Visualize whylogs dataset profiles present as outputs in the step view.

Parameters:

Name	Type	Description	Default
`object`	`StepView`	StepView fetched from run.get_step().	required
`reference_step_view`	`Optional[zenml.post_execution.step.StepView]`	second StepView fetched from run.get_step() to use as a reference to visualize data drift	`None`
`*args`	`Any`	additional positional arguments to pass to the visualize method	`()`
`**kwargs`	`Any`	additional keyword arguments to pass to the visualize method	`{}`

Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py

def visualize(
    self,
    object: StepView,
    reference_step_view: Optional[StepView] = None,
    *args: Any,
    **kwargs: Any,
) -> None:
    """Visualize whylogs dataset profiles present as outputs in the step view.

    Args:
        object: StepView fetched from run.get_step().
        reference_step_view: second StepView fetched from run.get_step() to
            use as a reference to visualize data drift
        *args: additional positional arguments to pass to the visualize
            method
        **kwargs: additional keyword arguments to pass to the visualize
            method
    """

    def extract_profile(
        step_view: StepView,
    ) -> Optional[DatasetProfileView]:
        """Extract a whylogs DatasetProfileView from a step view.

        Args:
            step_view: a step view

        Returns:
            A whylogs DatasetProfileView object loaded from the step view,
            if one could be found, otherwise None.
        """
        whylogs_artifact_datatype = (
            f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
        )
        for _, artifact_view in step_view.outputs.items():
            # filter out anything but whylogs dataset profile artifacts
            if artifact_view.data_type == whylogs_artifact_datatype:
                profile = artifact_view.read()
                return cast(DatasetProfileView, profile)
        return None

    profile = extract_profile(object)
    reference_profile: Optional[DatasetProfileView] = None
    if reference_step_view:
        reference_profile = extract_profile(reference_step_view)

    self.visualize_profile(profile, reference_profile)

`visualize_profile(self, profile, reference_profile=None)`

Generate a visualization of one or two whylogs dataset profile.

Parameters:

Name	Type	Description	Default
`profile`	`DatasetProfileView`	whylogs DatasetProfileView to visualize	required
`reference_profile`	`Optional[whylogs.core.view.dataset_profile_view.DatasetProfileView]`	second optional DatasetProfileView to use to generate a data drift visualization	`None`

Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py

def visualize_profile(
    self,
    profile: DatasetProfileView,
    reference_profile: Optional[DatasetProfileView] = None,
) -> None:
    """Generate a visualization of one or two whylogs dataset profile.

    Args:
        profile: whylogs DatasetProfileView to visualize
        reference_profile: second optional DatasetProfileView to use to
            generate a data drift visualization
    """
    # currently, whylogs doesn't support visualizing a single profile, so
    # we trick it by using the same profile twice, both as reference and
    # target, in a drift report
    reference_profile = reference_profile or profile
    visualization = NotebookProfileVisualizer()
    visualization.set_profiles(
        target_profile_view=profile,
        reference_profile_view=reference_profile,
    )
    rendered_html = visualization.summary_drift_report()

    if Environment.in_notebook():
        from IPython.core.display import display

        display(rendered_html)
        for column in sorted(list(profile.get_columns().keys())):
            display(visualization.double_histogram(feature_name=column))
    else:
        logger.warning(
            "The magic functions are only usable in a Jupyter notebook."
        )
        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".html", encoding="utf-8"
        ) as f:
            f.write(rendered_html.data)
            url = f"file:///{f.name}"
        logger.info("Opening %s in a new browser.." % f.name)
        webbrowser.open(url, new=2)

Whylogs

zenml.integrations.whylogs special

WhylogsIntegration (Integration)

activate() classmethod

flavors() classmethod

constants

data_validators special

whylogs_data_validator

WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)

config: WhylogsDataValidatorConfig property readonly

settings_class: Optional[Type[BaseSettings]] property readonly

FLAVOR (BaseDataValidatorFlavor)

cleanup_step_run(self, info)

data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)

prepare_step_run(self, info)

upload_profile_view(self, profile_view, dataset_id=None)

flavors special

whylogs_data_validator_flavor

WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings) pydantic-model

WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)

config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

implementation_class: Type[WhylogsDataValidator] property readonly

name: str property readonly

WhylogsDataValidatorSettings (BaseSettings) pydantic-model

materializers special

whylogs_materializer

WhylogsMaterializer (BaseMaterializer)

handle_input(self, data_type)

handle_return(self, profile_view)

secret_schemas special

whylabs_secret_schema

WhylabsSecretSchema (BaseSecretSchema) pydantic-model

steps special

whylogs_profiler

WhylogsProfilerParameters (BaseAnalyzerParameters) pydantic-model

WhylogsProfilerStep (BaseAnalyzerStep)

PARAMETERS_CLASS (BaseAnalyzerParameters) pydantic-model

entrypoint(dataset, params) staticmethod

whylogs_profiler_step(step_name, params, dataset_id=None)

visualizers special

whylogs_visualizer

WhylogsVisualizer (BaseVisualizer)

visualize(self, object, reference_step_view=None, *args, **kwargs)

visualize_profile(self, profile, reference_profile=None)

`zenml.integrations.whylogs` `special`

`WhylogsIntegration (Integration)`

`activate()` `classmethod`

`flavors()` `classmethod`

`constants`

`data_validators` `special`

`whylogs_data_validator`

`WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)`

`config: WhylogsDataValidatorConfig` `property` `readonly`

`settings_class: Optional[Type[BaseSettings]]` `property` `readonly`

`FLAVOR (BaseDataValidatorFlavor)`

`cleanup_step_run(self, info)`

`data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)`

`prepare_step_run(self, info)`

`upload_profile_view(self, profile_view, dataset_id=None)`

`flavors` `special`

`whylogs_data_validator_flavor`

`WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)` `pydantic-model`

`WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)`

`config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]` `property` `readonly`

`implementation_class: Type[WhylogsDataValidator]` `property` `readonly`

`name: str` `property` `readonly`

`WhylogsDataValidatorSettings (BaseSettings)` `pydantic-model`

`materializers` `special`

`whylogs_materializer`

`WhylogsMaterializer (BaseMaterializer)`

`handle_input(self, data_type)`

`handle_return(self, profile_view)`

`secret_schemas` `special`

`whylabs_secret_schema`

`WhylabsSecretSchema (BaseSecretSchema)` `pydantic-model`

`steps` `special`

`whylogs_profiler`

`WhylogsProfilerParameters (BaseAnalyzerParameters)` `pydantic-model`

`WhylogsProfilerStep (BaseAnalyzerStep)`

`PARAMETERS_CLASS (BaseAnalyzerParameters)` `pydantic-model`

`entrypoint(dataset, params)` `staticmethod`

`whylogs_profiler_step(step_name, params, dataset_id=None)`

`visualizers` `special`

`whylogs_visualizer`

`WhylogsVisualizer (BaseVisualizer)`

`visualize(self, object, reference_step_view=None, *args, **kwargs)`

`visualize_profile(self, profile, reference_profile=None)`