Whylogs
zenml.integrations.whylogs
special
Initialization of the whylogs integration.
WhylogsIntegration (Integration)
Definition of whylogs integration for ZenML.
Source code in zenml/integrations/whylogs/__init__.py
class WhylogsIntegration(Integration):
"""Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""
NAME = WHYLOGS
REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.whylogs import materializers # noqa
from zenml.integrations.whylogs import secret_schemas # noqa
from zenml.integrations.whylogs import visualizers # noqa
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.whylogs.flavors import (
WhylogsDataValidatorFlavor,
)
return [WhylogsDataValidatorFlavor]
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.whylogs import materializers # noqa
from zenml.integrations.whylogs import secret_schemas # noqa
from zenml.integrations.whylogs import visualizers # noqa
flavors()
classmethod
Declare the stack component flavors for the Great Expectations integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.whylogs.flavors import (
WhylogsDataValidatorFlavor,
)
return [WhylogsDataValidatorFlavor]
constants
Whylogs integration constants.
data_validators
special
Initialization of the whylogs data validator for ZenML.
whylogs_data_validator
Implementation of the whylogs data validator.
WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)
Whylogs data validator stack component.
Attributes:
Name | Type | Description |
---|---|---|
authentication_secret |
Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
"""Whylogs data validator stack component.
Attributes:
authentication_secret: Optional ZenML secret with Whylabs credentials.
If configured, all the data profiles returned by all pipeline steps
will automatically be uploaded to Whylabs in addition to being
stored in the ZenML Artifact Store.
"""
NAME: ClassVar[str] = "whylogs"
FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = WhylogsDataValidatorFlavor
@property
def config(self) -> WhylogsDataValidatorConfig:
"""Returns the `WhylogsDataValidatorConfig` config.
Returns:
The configuration.
"""
return cast(WhylogsDataValidatorConfig, self._config)
@property
def settings_class(self) -> Optional[Type["BaseSettings"]]:
"""Settings class for the Whylogs data validator.
Returns:
The settings class.
"""
return WhylogsDataValidatorSettings
def prepare_step_run(self, info: "StepRunInfo") -> None:
"""Configures Whylabs logging.
Args:
info: Info about the step that will be executed.
"""
settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
if settings.enable_whylabs:
os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
if settings.dataset_id:
os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id
def cleanup_step_run(self, info: "StepRunInfo") -> None:
"""Resets Whylabs configuration.
Args:
info: Info about the step that was executed.
"""
settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
if settings.enable_whylabs:
del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
if settings.dataset_id:
del os.environ[WHYLABS_DATASET_ID_ENV]
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
dataset_timestamp: Optional[datetime.datetime] = None,
**kwargs: Any,
) -> DatasetProfileView:
"""Analyze a dataset and generate a data profile with whylogs.
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of whylogs
data profiles to be generated (unused).
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
**kwargs: Extra keyword arguments (unused).
Returns:
A whylogs profile view object.
"""
results = why.log(pandas=dataset)
profile = results.profile()
dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
return profile.view()
def upload_profile_view(
self,
profile_view: DatasetProfileView,
dataset_id: Optional[str] = None,
) -> None:
"""Upload a whylogs data profile view to Whylabs, if configured to do so.
Args:
profile_view: Whylogs profile view to upload.
dataset_id: Optional dataset identifier to use for the uploaded
data profile. If omitted, a dataset identifier will be retrieved
using other means, in order:
* the default dataset identifier configured in the Data
Validator secret
* a dataset ID will be generated automatically based on the
current pipeline/step information.
Raises:
ValueError: If the dataset ID was not provided and could not be
retrieved or inferred from other sources.
"""
secret = self.get_authentication_secret(
expected_schema_type=WhylabsSecretSchema
)
if not secret:
return
dataset_id = dataset_id or secret.whylabs_default_dataset_id
if not dataset_id:
# use the current pipeline name and the step name to generate a
# unique dataset name
try:
# get pipeline name and step name
step_env = cast(
StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
)
dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
except KeyError:
raise ValueError(
"A dataset ID was not specified and could not be "
"generated from the current pipeline and step name."
)
# Instantiate WhyLabs Writer
writer = WhyLabsWriter(
org_id=secret.whylabs_default_org_id,
api_key=secret.whylabs_api_key,
dataset_id=dataset_id,
)
# pass a profile view to the writer's write method
writer.write(profile=profile_view)
config: WhylogsDataValidatorConfig
property
readonly
Returns the WhylogsDataValidatorConfig
config.
Returns:
Type | Description |
---|---|
WhylogsDataValidatorConfig |
The configuration. |
settings_class: Optional[Type[BaseSettings]]
property
readonly
Settings class for the Whylogs data validator.
Returns:
Type | Description |
---|---|
Optional[Type[BaseSettings]] |
The settings class. |
FLAVOR (BaseDataValidatorFlavor)
Whylogs data validator flavor.
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
"""Whylogs data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return WHYLOGS_DATA_VALIDATOR_FLAVOR
@property
def config_class(self) -> Type[WhylogsDataValidatorConfig]:
"""Returns `WhylogsDataValidatorConfig` config class.
Returns:
The config class.
"""
return WhylogsDataValidatorConfig
@property
def implementation_class(self) -> Type["WhylogsDataValidator"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.whylogs.data_validators import (
WhylogsDataValidator,
)
return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]
property
readonly
Returns WhylogsDataValidatorConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] |
The config class. |
implementation_class: Type[WhylogsDataValidator]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[WhylogsDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
cleanup_step_run(self, info)
Resets Whylabs configuration.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
info |
StepRunInfo |
Info about the step that was executed. |
required |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def cleanup_step_run(self, info: "StepRunInfo") -> None:
"""Resets Whylabs configuration.
Args:
info: Info about the step that was executed.
"""
settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
if settings.enable_whylabs:
del os.environ[WHYLABS_LOGGING_ENABLED_ENV]
if settings.dataset_id:
del os.environ[WHYLABS_DATASET_ID_ENV]
data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)
Analyze a dataset and generate a data profile with whylogs.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
DataFrame |
Target dataset to be profiled. |
required |
comparison_dataset |
Optional[pandas.core.frame.DataFrame] |
Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles). |
None |
profile_list |
Optional[Sequence[str]] |
Optional list identifying the categories of whylogs data profiles to be generated (unused). |
None |
dataset_timestamp |
Optional[datetime.datetime] |
timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied. |
None |
**kwargs |
Any |
Extra keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
DatasetProfileView |
A whylogs profile view object. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
dataset_timestamp: Optional[datetime.datetime] = None,
**kwargs: Any,
) -> DatasetProfileView:
"""Analyze a dataset and generate a data profile with whylogs.
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of whylogs
data profiles to be generated (unused).
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
**kwargs: Extra keyword arguments (unused).
Returns:
A whylogs profile view object.
"""
results = why.log(pandas=dataset)
profile = results.profile()
dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
return profile.view()
prepare_step_run(self, info)
Configures Whylabs logging.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
info |
StepRunInfo |
Info about the step that will be executed. |
required |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def prepare_step_run(self, info: "StepRunInfo") -> None:
"""Configures Whylabs logging.
Args:
info: Info about the step that will be executed.
"""
settings = cast(WhylogsDataValidatorSettings, self.get_settings(info))
if settings.enable_whylabs:
os.environ[WHYLABS_LOGGING_ENABLED_ENV] = "true"
if settings.dataset_id:
os.environ[WHYLABS_DATASET_ID_ENV] = settings.dataset_id
upload_profile_view(self, profile_view, dataset_id=None)
Upload a whylogs data profile view to Whylabs, if configured to do so.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_view |
DatasetProfileView |
Whylogs profile view to upload. |
required |
dataset_id |
Optional[str] |
Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information. |
None |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset ID was not provided and could not be retrieved or inferred from other sources. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def upload_profile_view(
self,
profile_view: DatasetProfileView,
dataset_id: Optional[str] = None,
) -> None:
"""Upload a whylogs data profile view to Whylabs, if configured to do so.
Args:
profile_view: Whylogs profile view to upload.
dataset_id: Optional dataset identifier to use for the uploaded
data profile. If omitted, a dataset identifier will be retrieved
using other means, in order:
* the default dataset identifier configured in the Data
Validator secret
* a dataset ID will be generated automatically based on the
current pipeline/step information.
Raises:
ValueError: If the dataset ID was not provided and could not be
retrieved or inferred from other sources.
"""
secret = self.get_authentication_secret(
expected_schema_type=WhylabsSecretSchema
)
if not secret:
return
dataset_id = dataset_id or secret.whylabs_default_dataset_id
if not dataset_id:
# use the current pipeline name and the step name to generate a
# unique dataset name
try:
# get pipeline name and step name
step_env = cast(
StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
)
dataset_id = f"{step_env.pipeline_name}_{step_env.step_name}"
except KeyError:
raise ValueError(
"A dataset ID was not specified and could not be "
"generated from the current pipeline and step name."
)
# Instantiate WhyLabs Writer
writer = WhyLabsWriter(
org_id=secret.whylabs_default_org_id,
api_key=secret.whylabs_api_key,
dataset_id=dataset_id,
)
# pass a profile view to the writer's write method
writer.write(profile=profile_view)
flavors
special
WhyLabs whylogs integration flavors.
whylogs_data_validator_flavor
WhyLabs whylogs data validator flavor.
WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)
pydantic-model
Config for the whylogs data validator.
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorConfig( # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
BaseDataValidatorConfig,
AuthenticationConfigMixin,
WhylogsDataValidatorSettings,
):
"""Config for the whylogs data validator."""
WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)
Whylogs data validator flavor.
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
"""Whylogs data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return WHYLOGS_DATA_VALIDATOR_FLAVOR
@property
def config_class(self) -> Type[WhylogsDataValidatorConfig]:
"""Returns `WhylogsDataValidatorConfig` config class.
Returns:
The config class.
"""
return WhylogsDataValidatorConfig
@property
def implementation_class(self) -> Type["WhylogsDataValidator"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.whylogs.data_validators import (
WhylogsDataValidator,
)
return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]
property
readonly
Returns WhylogsDataValidatorConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] |
The config class. |
implementation_class: Type[WhylogsDataValidator]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[WhylogsDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
WhylogsDataValidatorSettings (BaseSettings)
pydantic-model
Settings for the Whylogs data validator.
Attributes:
Name | Type | Description |
---|---|---|
enable_whylabs |
bool |
If set to |
dataset_id |
Optional[str] |
Dataset ID to use when uploading profiles to Whylabs. |
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorSettings(BaseSettings):
"""Settings for the Whylogs data validator.
Attributes:
enable_whylabs: If set to `True` for a step, all the whylogs data
profile views returned by the step will automatically be uploaded
to the Whylabs platform if Whylabs credentials are configured.
dataset_id: Dataset ID to use when uploading profiles to Whylabs.
"""
enable_whylabs: bool = False
dataset_id: Optional[str] = None
materializers
special
Initialization of the whylogs materializer.
whylogs_materializer
Implementation of the whylogs materializer.
WhylogsMaterializer (BaseMaterializer)
Materializer to read/write whylogs dataset profile views.
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
class WhylogsMaterializer(BaseMaterializer):
"""Materializer to read/write whylogs dataset profile views."""
ASSOCIATED_TYPES = (DatasetProfileView,)
ASSOCIATED_ARTIFACT_TYPES = (StatisticsArtifact,)
def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
"""Reads and returns a whylogs dataset profile view.
Args:
data_type: The type of the data to read.
Returns:
A loaded whylogs dataset profile view object.
"""
super().handle_input(data_type)
filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
profile_view = DatasetProfileView.read(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return profile_view
def handle_return(self, profile_view: DatasetProfileView) -> None:
"""Writes a whylogs dataset profile view.
Args:
profile_view: A whylogs dataset profile view object.
"""
super().handle_return(profile_view)
filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
profile_view.write(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
# Use the data validator to upload the profile view to Whylabs,
# if configured to do so. This logic is only enabled if the pipeline
# step was decorated with the `enable_whylabs` decorator
whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
if not whylabs_enabled:
return
dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
data_validator = cast(
WhylogsDataValidator,
WhylogsDataValidator.get_active_data_validator(),
)
data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)
handle_input(self, data_type)
Reads and returns a whylogs dataset profile view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
DatasetProfileView |
A loaded whylogs dataset profile view object. |
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def handle_input(self, data_type: Type[Any]) -> DatasetProfileView:
"""Reads and returns a whylogs dataset profile view.
Args:
data_type: The type of the data to read.
Returns:
A loaded whylogs dataset profile view object.
"""
super().handle_input(data_type)
filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
profile_view = DatasetProfileView.read(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return profile_view
handle_return(self, profile_view)
Writes a whylogs dataset profile view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_view |
DatasetProfileView |
A whylogs dataset profile view object. |
required |
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def handle_return(self, profile_view: DatasetProfileView) -> None:
"""Writes a whylogs dataset profile view.
Args:
profile_view: A whylogs dataset profile view object.
"""
super().handle_return(profile_view)
filepath = os.path.join(self.artifact.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
profile_view.write(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
# Use the data validator to upload the profile view to Whylabs,
# if configured to do so. This logic is only enabled if the pipeline
# step was decorated with the `enable_whylabs` decorator
whylabs_enabled = os.environ.get(WHYLABS_LOGGING_ENABLED_ENV)
if not whylabs_enabled:
return
dataset_id = os.environ.get(WHYLABS_DATASET_ID_ENV)
data_validator = cast(
WhylogsDataValidator,
WhylogsDataValidator.get_active_data_validator(),
)
data_validator.upload_profile_view(profile_view, dataset_id=dataset_id)
secret_schemas
special
Initialization for the Whylabs secret schema.
This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.
whylabs_secret_schema
Implementation for Seldon secret schemas.
WhylabsSecretSchema (BaseSecretSchema)
pydantic-model
Whylabs credentials.
Attributes:
Name | Type | Description |
---|---|---|
whylabs_default_org_id |
str |
the Whylabs organization ID. |
whylabs_api_key |
str |
Whylabs API key. |
whylabs_default_dataset_id |
Optional[str] |
default Whylabs dataset ID to use when logging data profiles. |
Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py
class WhylabsSecretSchema(BaseSecretSchema):
"""Whylabs credentials.
Attributes:
whylabs_default_org_id: the Whylabs organization ID.
whylabs_api_key: Whylabs API key.
whylabs_default_dataset_id: default Whylabs dataset ID to use when
logging data profiles.
"""
TYPE: ClassVar[str] = WHYLABS_SECRET_SCHEMA_TYPE
whylabs_default_org_id: str
whylabs_api_key: str
whylabs_default_dataset_id: Optional[str] = None
steps
special
Initialization of the whylogs steps.
whylogs_profiler
Implementation of the whylogs profiler step.
WhylogsProfilerParameters (BaseAnalyzerParameters)
pydantic-model
Parameters class for the WhylogsProfiler step.
Attributes:
Name | Type | Description |
---|---|---|
dataset_timestamp |
Optional[datetime.datetime] |
timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied. |
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerParameters(BaseAnalyzerParameters):
"""Parameters class for the WhylogsProfiler step.
Attributes:
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
"""
dataset_timestamp: Optional[datetime.datetime]
WhylogsProfilerStep (BaseAnalyzerStep)
Generates a whylogs data profile from a given pd.DataFrame.
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerStep(BaseAnalyzerStep):
"""Generates a whylogs data profile from a given pd.DataFrame."""
@staticmethod
def entrypoint( # type: ignore[override]
dataset: pd.DataFrame,
params: WhylogsProfilerParameters,
) -> DatasetProfileView:
"""Main entrypoint function for the whylogs profiler.
Args:
dataset: pd.DataFrame, the given dataset
params: the parameters of the step
Returns:
whylogs profile with statistics generated for the input dataset
"""
data_validator = cast(
WhylogsDataValidator,
WhylogsDataValidator.get_active_data_validator(),
)
return data_validator.data_profiling(
dataset, dataset_timestamp=params.dataset_timestamp
)
PARAMETERS_CLASS (BaseAnalyzerParameters)
pydantic-model
Parameters class for the WhylogsProfiler step.
Attributes:
Name | Type | Description |
---|---|---|
dataset_timestamp |
Optional[datetime.datetime] |
timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied. |
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
class WhylogsProfilerParameters(BaseAnalyzerParameters):
"""Parameters class for the WhylogsProfiler step.
Attributes:
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
"""
dataset_timestamp: Optional[datetime.datetime]
entrypoint(dataset, params)
staticmethod
Main entrypoint function for the whylogs profiler.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
DataFrame |
pd.DataFrame, the given dataset |
required |
params |
WhylogsProfilerParameters |
the parameters of the step |
required |
Returns:
Type | Description |
---|---|
DatasetProfileView |
whylogs profile with statistics generated for the input dataset |
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
@staticmethod
def entrypoint( # type: ignore[override]
dataset: pd.DataFrame,
params: WhylogsProfilerParameters,
) -> DatasetProfileView:
"""Main entrypoint function for the whylogs profiler.
Args:
dataset: pd.DataFrame, the given dataset
params: the parameters of the step
Returns:
whylogs profile with statistics generated for the input dataset
"""
data_validator = cast(
WhylogsDataValidator,
WhylogsDataValidator.get_active_data_validator(),
)
return data_validator.data_profiling(
dataset, dataset_timestamp=params.dataset_timestamp
)
whylogs_profiler_step(step_name, params, dataset_id=None)
Shortcut function to create a new instance of the WhylogsProfilerStep step.
The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
WhylogsProfilerParameters |
The step parameters |
required |
dataset_id |
Optional[str] |
Optional dataset ID to use to upload the profile to Whylabs. |
None |
Returns:
Type | Description |
---|---|
BaseStep |
a WhylogsProfilerStep step instance |
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
def whylogs_profiler_step(
step_name: str,
params: WhylogsProfilerParameters,
dataset_id: Optional[str] = None,
) -> BaseStep:
"""Shortcut function to create a new instance of the WhylogsProfilerStep step.
The returned WhylogsProfilerStep can be used in a pipeline to generate a
whylogs DatasetProfileView from a given pd.DataFrame and save it as an
artifact.
Args:
step_name: The name of the step
params: The step parameters
dataset_id: Optional dataset ID to use to upload the profile to Whylabs.
Returns:
a WhylogsProfilerStep step instance
"""
step_instance = WhylogsProfilerStep(name=step_name, params=params)
key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())
settings = WhylogsDataValidatorSettings(
enable_whylabs=True, dataset_id=dataset_id
)
step_instance.configure(settings={key: settings})
return step_instance
visualizers
special
Initialization of the whylogs visualizer.
whylogs_visualizer
Implementation of the whylogs visualizer step.
WhylogsVisualizer (BaseVisualizer)
The implementation of a Whylogs Visualizer.
Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
class WhylogsVisualizer(BaseVisualizer):
"""The implementation of a Whylogs Visualizer."""
def visualize(
self,
object: StepView,
reference_step_view: Optional[StepView] = None,
*args: Any,
**kwargs: Any,
) -> None:
"""Visualize whylogs dataset profiles present as outputs in the step view.
Args:
object: StepView fetched from run.get_step().
reference_step_view: second StepView fetched from run.get_step() to
use as a reference to visualize data drift
*args: additional positional arguments to pass to the visualize
method
**kwargs: additional keyword arguments to pass to the visualize
method
"""
def extract_profile(
step_view: StepView,
) -> Optional[DatasetProfileView]:
"""Extract a whylogs DatasetProfileView from a step view.
Args:
step_view: a step view
Returns:
A whylogs DatasetProfileView object loaded from the step view,
if one could be found, otherwise None.
"""
whylogs_artifact_datatype = (
f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
)
for _, artifact_view in step_view.outputs.items():
# filter out anything but whylogs dataset profile artifacts
if artifact_view.data_type == whylogs_artifact_datatype:
profile = artifact_view.read()
return cast(DatasetProfileView, profile)
return None
profile = extract_profile(object)
reference_profile: Optional[DatasetProfileView] = None
if reference_step_view:
reference_profile = extract_profile(reference_step_view)
self.visualize_profile(profile, reference_profile)
def visualize_profile(
self,
profile: DatasetProfileView,
reference_profile: Optional[DatasetProfileView] = None,
) -> None:
"""Generate a visualization of one or two whylogs dataset profile.
Args:
profile: whylogs DatasetProfileView to visualize
reference_profile: second optional DatasetProfileView to use to
generate a data drift visualization
"""
# currently, whylogs doesn't support visualizing a single profile, so
# we trick it by using the same profile twice, both as reference and
# target, in a drift report
reference_profile = reference_profile or profile
visualization = NotebookProfileVisualizer()
visualization.set_profiles(
target_profile_view=profile,
reference_profile_view=reference_profile,
)
rendered_html = visualization.summary_drift_report()
if Environment.in_notebook():
from IPython.core.display import display
display(rendered_html)
for column in sorted(list(profile.get_columns().keys())):
display(visualization.double_histogram(feature_name=column))
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
f.write(rendered_html.data)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
visualize(self, object, reference_step_view=None, *args, **kwargs)
Visualize whylogs dataset profiles present as outputs in the step view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
object |
StepView |
StepView fetched from run.get_step(). |
required |
reference_step_view |
Optional[zenml.post_execution.step.StepView] |
second StepView fetched from run.get_step() to use as a reference to visualize data drift |
None |
*args |
Any |
additional positional arguments to pass to the visualize method |
() |
**kwargs |
Any |
additional keyword arguments to pass to the visualize method |
{} |
Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
def visualize(
self,
object: StepView,
reference_step_view: Optional[StepView] = None,
*args: Any,
**kwargs: Any,
) -> None:
"""Visualize whylogs dataset profiles present as outputs in the step view.
Args:
object: StepView fetched from run.get_step().
reference_step_view: second StepView fetched from run.get_step() to
use as a reference to visualize data drift
*args: additional positional arguments to pass to the visualize
method
**kwargs: additional keyword arguments to pass to the visualize
method
"""
def extract_profile(
step_view: StepView,
) -> Optional[DatasetProfileView]:
"""Extract a whylogs DatasetProfileView from a step view.
Args:
step_view: a step view
Returns:
A whylogs DatasetProfileView object loaded from the step view,
if one could be found, otherwise None.
"""
whylogs_artifact_datatype = (
f"{DatasetProfileView.__module__}.{DatasetProfileView.__name__}"
)
for _, artifact_view in step_view.outputs.items():
# filter out anything but whylogs dataset profile artifacts
if artifact_view.data_type == whylogs_artifact_datatype:
profile = artifact_view.read()
return cast(DatasetProfileView, profile)
return None
profile = extract_profile(object)
reference_profile: Optional[DatasetProfileView] = None
if reference_step_view:
reference_profile = extract_profile(reference_step_view)
self.visualize_profile(profile, reference_profile)
visualize_profile(self, profile, reference_profile=None)
Generate a visualization of one or two whylogs dataset profile.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile |
DatasetProfileView |
whylogs DatasetProfileView to visualize |
required |
reference_profile |
Optional[whylogs.core.view.dataset_profile_view.DatasetProfileView] |
second optional DatasetProfileView to use to generate a data drift visualization |
None |
Source code in zenml/integrations/whylogs/visualizers/whylogs_visualizer.py
def visualize_profile(
self,
profile: DatasetProfileView,
reference_profile: Optional[DatasetProfileView] = None,
) -> None:
"""Generate a visualization of one or two whylogs dataset profile.
Args:
profile: whylogs DatasetProfileView to visualize
reference_profile: second optional DatasetProfileView to use to
generate a data drift visualization
"""
# currently, whylogs doesn't support visualizing a single profile, so
# we trick it by using the same profile twice, both as reference and
# target, in a drift report
reference_profile = reference_profile or profile
visualization = NotebookProfileVisualizer()
visualization.set_profiles(
target_profile_view=profile,
reference_profile_view=reference_profile,
)
rendered_html = visualization.summary_drift_report()
if Environment.in_notebook():
from IPython.core.display import display
display(rendered_html)
for column in sorted(list(profile.get_columns().keys())):
display(visualization.double_histogram(feature_name=column))
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
f.write(rendered_html.data)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)