Skip to content

Facets

zenml.integrations.facets special

Facets integration for ZenML.

The Facets integration provides a simple way to visualize post-execution objects like PipelineView, PipelineRunView and StepView. These objects can be extended using the BaseVisualization class. This integration requires facets-overview be installed in your Python environment.

FacetsIntegration (Integration)

Definition of Facet integration for ZenML.

Source code in zenml/integrations/facets/__init__.py
class FacetsIntegration(Integration):
    """Definition of [Facet](https://pair-code.github.io/facets/) integration for ZenML."""

    NAME = FACETS
    REQUIREMENTS = ["facets-overview>=1.0.0", "IPython"]

visualizers special

Initialization of the Facet Visualizer.

facet_statistics_visualizer

Implementation of the Facet Statistics Visualizer.

FacetStatisticsVisualizer (BaseVisualizer)

Visualize and compare dataset statistics with Facets.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
class FacetStatisticsVisualizer(BaseVisualizer):
    """Visualize and compare dataset statistics with Facets."""

    @abstractmethod
    def visualize(
        self,
        object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
        magic: bool = False,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """Method to visualize components.

        Args:
            object: Either a StepView fetched from run.get_step() whose outputs
                are all datasets that should be visualized, or a dict that maps
                dataset names to datasets.
            magic: Whether to render in a Jupyter notebook or not.
            *args: Additional arguments.
            **kwargs: Additional keyword arguments.
        """
        data_dict = object.outputs if isinstance(object, StepView) else object
        datasets = []
        for dataset_name, data in data_dict.items():
            df = data.read() if isinstance(data, ArtifactView) else data
            if type(df) is not pd.DataFrame:
                logger.warning(
                    "`%s` is not a pd.DataFrame. You can only visualize "
                    "statistics of steps that output pandas DataFrames. "
                    "Skipping this output.." % dataset_name
                )
            else:
                datasets.append({"name": dataset_name, "table": df})

        html_ = self.generate_html(datasets)
        self.generate_facet(html_, magic)

    def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
        """Generates html for facet.

        Args:
            datasets: List of dicts of DataFrames to be visualized as stats.

        Returns:
            HTML template with proto string embedded.
        """
        proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
            datasets
        )
        protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

        template = os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "stats.html",
        )
        html_template = io_utils.read_file_contents_as_string(template)

        html_ = html_template.replace("protostr", protostr)
        return html_

    def generate_facet(self, html_: str, magic: bool = False) -> None:
        """Generate a Facet Overview.

        Args:
            html_: HTML represented as a string.
            magic: Whether to magically materialize facet in a notebook.

        Raises:
            EnvironmentError: If magic is True and not in a notebook.
        """
        if magic:
            if not (Environment.in_notebook() or Environment.in_google_colab()):
                raise EnvironmentError(
                    "The magic functions are only usable in a Jupyter notebook."
                )
            display(HTML(html_))
        else:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
                io_utils.write_file_contents_as_string(f.name, html_)
                url = f"file:///{f.name}"
                logger.info("Opening %s in a new browser.." % f.name)
                webbrowser.open(url, new=2)
generate_facet(self, html_, magic=False)

Generate a Facet Overview.

Parameters:

Name Type Description Default
html_ str

HTML represented as a string.

required
magic bool

Whether to magically materialize facet in a notebook.

False

Exceptions:

Type Description
EnvironmentError

If magic is True and not in a notebook.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
def generate_facet(self, html_: str, magic: bool = False) -> None:
    """Generate a Facet Overview.

    Args:
        html_: HTML represented as a string.
        magic: Whether to magically materialize facet in a notebook.

    Raises:
        EnvironmentError: If magic is True and not in a notebook.
    """
    if magic:
        if not (Environment.in_notebook() or Environment.in_google_colab()):
            raise EnvironmentError(
                "The magic functions are only usable in a Jupyter notebook."
            )
        display(HTML(html_))
    else:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
            io_utils.write_file_contents_as_string(f.name, html_)
            url = f"file:///{f.name}"
            logger.info("Opening %s in a new browser.." % f.name)
            webbrowser.open(url, new=2)
generate_html(self, datasets)

Generates html for facet.

Parameters:

Name Type Description Default
datasets List[Dict[str, pandas.core.frame.DataFrame]]

List of dicts of DataFrames to be visualized as stats.

required

Returns:

Type Description
str

HTML template with proto string embedded.

Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
    """Generates html for facet.

    Args:
        datasets: List of dicts of DataFrames to be visualized as stats.

    Returns:
        HTML template with proto string embedded.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html_template = io_utils.read_file_contents_as_string(template)

    html_ = html_template.replace("protostr", protostr)
    return html_
visualize(self, object, magic=False, *args, **kwargs)

Method to visualize components.

Parameters:

Name Type Description Default
object Union[zenml.post_execution.step.StepView, Dict[str, Union[zenml.post_execution.artifact.ArtifactView, pandas.core.frame.DataFrame]]]

Either a StepView fetched from run.get_step() whose outputs are all datasets that should be visualized, or a dict that maps dataset names to datasets.

required
magic bool

Whether to render in a Jupyter notebook or not.

False
*args Any

Additional arguments.

()
**kwargs Any

Additional keyword arguments.

{}
Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
@abstractmethod
def visualize(
    self,
    object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
    magic: bool = False,
    *args: Any,
    **kwargs: Any,
) -> None:
    """Method to visualize components.

    Args:
        object: Either a StepView fetched from run.get_step() whose outputs
            are all datasets that should be visualized, or a dict that maps
            dataset names to datasets.
        magic: Whether to render in a Jupyter notebook or not.
        *args: Additional arguments.
        **kwargs: Additional keyword arguments.
    """
    data_dict = object.outputs if isinstance(object, StepView) else object
    datasets = []
    for dataset_name, data in data_dict.items():
        df = data.read() if isinstance(data, ArtifactView) else data
        if type(df) is not pd.DataFrame:
            logger.warning(
                "`%s` is not a pd.DataFrame. You can only visualize "
                "statistics of steps that output pandas DataFrames. "
                "Skipping this output.." % dataset_name
            )
        else:
            datasets.append({"name": dataset_name, "table": df})

    html_ = self.generate_html(datasets)
    self.generate_facet(html_, magic)