Facets

`zenml.integrations.facets` `special`

Facets integration for ZenML.

The Facets integration provides a simple way to visualize post-execution objects like PipelineView, PipelineRunView and StepView. These objects can be extended using the BaseVisualization class. This integration requires facets-overview be installed in your Python environment.

`FacetsIntegration (Integration)`

Definition of Facet integration for ZenML.

Source code in zenml/integrations/facets/__init__.py

class FacetsIntegration(Integration):
    """Definition of [Facet](https://pair-code.github.io/facets/) integration for ZenML."""

    NAME = FACETS
    REQUIREMENTS = ["facets-overview>=1.0.0", "IPython"]

`visualizers` `special`

Initialization of the Facet Visualizer.

`facet_statistics_visualizer`

Implementation of the Facet Statistics Visualizer.

`FacetStatisticsVisualizer (BaseVisualizer)`

Visualize and compare dataset statistics with Facets.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py

class FacetStatisticsVisualizer(BaseVisualizer):
    """Visualize and compare dataset statistics with Facets."""

    @abstractmethod
    def visualize(
        self,
        object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
        magic: bool = False,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """Method to visualize components.

        Args:
            object: Either a StepView fetched from run.get_step() whose outputs
                are all datasets that should be visualized, or a dict that maps
                dataset names to datasets.
            magic: Whether to render in a Jupyter notebook or not.
            *args: Additional arguments.
            **kwargs: Additional keyword arguments.
        """
        data_dict = object.outputs if isinstance(object, StepView) else object
        datasets = []
        for dataset_name, data in data_dict.items():
            df = data.read() if isinstance(data, ArtifactView) else data
            if type(df) is not pd.DataFrame:
                logger.warning(
                    "`%s` is not a pd.DataFrame. You can only visualize "
                    "statistics of steps that output pandas DataFrames. "
                    "Skipping this output.." % dataset_name
                )
            else:
                datasets.append({"name": dataset_name, "table": df})

        html_ = self.generate_html(datasets)
        self.generate_facet(html_, magic)

    def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
        """Generates html for facet.

        Args:
            datasets: List of dicts of DataFrames to be visualized as stats.

        Returns:
            HTML template with proto string embedded.
        """
        proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
            datasets
        )
        protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

        template = os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "stats.html",
        )
        html_template = io_utils.read_file_contents_as_string(template)

        html_ = html_template.replace("protostr", protostr)
        return html_

    def generate_facet(self, html_: str, magic: bool = False) -> None:
        """Generate a Facet Overview.

        Args:
            html_: HTML represented as a string.
            magic: Whether to magically materialize facet in a notebook.

        Raises:
            EnvironmentError: If magic is True and not in a notebook.
        """
        if magic:
            if not (Environment.in_notebook() or Environment.in_google_colab()):
                raise EnvironmentError(
                    "The magic functions are only usable in a Jupyter notebook."
                )
            display(HTML(html_))
        else:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
                io_utils.write_file_contents_as_string(f.name, html_)
                url = f"file:///{f.name}"
                logger.info("Opening %s in a new browser.." % f.name)
                webbrowser.open(url, new=2)

`generate_facet(self, html_, magic=False)`

Generate a Facet Overview.

Parameters:

Name	Type	Description	Default
`html_`	`str`	HTML represented as a string.	required
`magic`	`bool`	Whether to magically materialize facet in a notebook.	`False`

Exceptions:

Type	Description
`EnvironmentError`	If magic is True and not in a notebook.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py

def generate_facet(self, html_: str, magic: bool = False) -> None:
    """Generate a Facet Overview.

    Args:
        html_: HTML represented as a string.
        magic: Whether to magically materialize facet in a notebook.

    Raises:
        EnvironmentError: If magic is True and not in a notebook.
    """
    if magic:
        if not (Environment.in_notebook() or Environment.in_google_colab()):
            raise EnvironmentError(
                "The magic functions are only usable in a Jupyter notebook."
            )
        display(HTML(html_))
    else:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
            io_utils.write_file_contents_as_string(f.name, html_)
            url = f"file:///{f.name}"
            logger.info("Opening %s in a new browser.." % f.name)
            webbrowser.open(url, new=2)

`generate_html(self, datasets)`

Generates html for facet.

Parameters:

Name	Type	Description	Default
`datasets`	`List[Dict[str, pandas.core.frame.DataFrame]]`	List of dicts of DataFrames to be visualized as stats.	required

Returns:

Type	Description
`str`	HTML template with proto string embedded.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py

def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
    """Generates html for facet.

    Args:
        datasets: List of dicts of DataFrames to be visualized as stats.

    Returns:
        HTML template with proto string embedded.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html_template = io_utils.read_file_contents_as_string(template)

    html_ = html_template.replace("protostr", protostr)
    return html_

`visualize(self, object, magic=False, *args, **kwargs)`

Method to visualize components.

Parameters:

Name	Type	Description	Default
`object`	`Union[zenml.post_execution.step.StepView, Dict[str, Union[zenml.post_execution.artifact.ArtifactView, pandas.core.frame.DataFrame]]]`	Either a StepView fetched from run.get_step() whose outputs are all datasets that should be visualized, or a dict that maps dataset names to datasets.	required
`magic`	`bool`	Whether to render in a Jupyter notebook or not.	`False`
`*args`	`Any`	Additional arguments.	`()`
`**kwargs`	`Any`	Additional keyword arguments.	`{}`

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py

@abstractmethod
def visualize(
    self,
    object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
    magic: bool = False,
    *args: Any,
    **kwargs: Any,
) -> None:
    """Method to visualize components.

    Args:
        object: Either a StepView fetched from run.get_step() whose outputs
            are all datasets that should be visualized, or a dict that maps
            dataset names to datasets.
        magic: Whether to render in a Jupyter notebook or not.
        *args: Additional arguments.
        **kwargs: Additional keyword arguments.
    """
    data_dict = object.outputs if isinstance(object, StepView) else object
    datasets = []
    for dataset_name, data in data_dict.items():
        df = data.read() if isinstance(data, ArtifactView) else data
        if type(df) is not pd.DataFrame:
            logger.warning(
                "`%s` is not a pd.DataFrame. You can only visualize "
                "statistics of steps that output pandas DataFrames. "
                "Skipping this output.." % dataset_name
            )
        else:
            datasets.append({"name": dataset_name, "table": df})

    html_ = self.generate_html(datasets)
    self.generate_facet(html_, magic)

Facets

zenml.integrations.facets special

FacetsIntegration (Integration)

visualizers special

facet_statistics_visualizer

FacetStatisticsVisualizer (BaseVisualizer)

generate_facet(self, html_, magic=False)

generate_html(self, datasets)

visualize(self, object, magic=False, *args, **kwargs)

`zenml.integrations.facets` `special`

`FacetsIntegration (Integration)`

`visualizers` `special`

`facet_statistics_visualizer`

`FacetStatisticsVisualizer (BaseVisualizer)`

`generate_facet(self, html_, magic=False)`

`generate_html(self, datasets)`

`visualize(self, object, magic=False, *args, **kwargs)`