Skip to content

Pytorch

zenml.integrations.pytorch special

Initialization of the PyTorch integration.

PytorchIntegration (Integration)

Definition of PyTorch integration for ZenML.

Source code in zenml/integrations/pytorch/__init__.py
class PytorchIntegration(Integration):
    """Definition of PyTorch integration for ZenML."""

    NAME = PYTORCH
    REQUIREMENTS = ["torch"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.pytorch import materializers  # noqa

activate() classmethod

Activates the integration.

Source code in zenml/integrations/pytorch/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.pytorch import materializers  # noqa

materializers special

Initialization of the PyTorch Materializer.

pytorch_dataloader_materializer

Implementation of the PyTorch DataLoader materializer.

PyTorchDataLoaderMaterializer (BaseMaterializer)

Materializer to read/write PyTorch dataloaders.

Source code in zenml/integrations/pytorch/materializers/pytorch_dataloader_materializer.py
class PyTorchDataLoaderMaterializer(BaseMaterializer):
    """Materializer to read/write PyTorch dataloaders."""

    ASSOCIATED_TYPES = (DataLoader,)
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA

    def load(self, data_type: Type[Any]) -> Any:
        """Reads and returns a PyTorch dataloader.

        Args:
            data_type: The type of the dataloader to load.

        Returns:
            A loaded PyTorch dataloader.
        """
        super().load(data_type)
        with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "rb") as f:
            return torch.load(f)

    def save(self, dataloader: Any) -> None:
        """Writes a PyTorch dataloader.

        Args:
            dataloader: A torch.utils.DataLoader or a dict to pass into dataloader.save
        """
        super().save(dataloader)

        # Save entire dataloader to artifact directory
        with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "wb") as f:
            torch.save(dataloader, f)

    def extract_metadata(self, dataloader: Any) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given `DataLoader` object.

        Args:
            dataloader: The `DataLoader` object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        super().extract_metadata(dataloader)
        return {
            "num_samples": len(dataloader.dataset),
            "batch_size": dataloader.batch_size,
            "num_batches": len(dataloader),
        }
extract_metadata(self, dataloader)

Extract metadata from the given DataLoader object.

Parameters:

Name Type Description Default
dataloader Any

The DataLoader object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/pytorch/materializers/pytorch_dataloader_materializer.py
def extract_metadata(self, dataloader: Any) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given `DataLoader` object.

    Args:
        dataloader: The `DataLoader` object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    super().extract_metadata(dataloader)
    return {
        "num_samples": len(dataloader.dataset),
        "batch_size": dataloader.batch_size,
        "num_batches": len(dataloader),
    }
load(self, data_type)

Reads and returns a PyTorch dataloader.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the dataloader to load.

required

Returns:

Type Description
Any

A loaded PyTorch dataloader.

Source code in zenml/integrations/pytorch/materializers/pytorch_dataloader_materializer.py
def load(self, data_type: Type[Any]) -> Any:
    """Reads and returns a PyTorch dataloader.

    Args:
        data_type: The type of the dataloader to load.

    Returns:
        A loaded PyTorch dataloader.
    """
    super().load(data_type)
    with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "rb") as f:
        return torch.load(f)
save(self, dataloader)

Writes a PyTorch dataloader.

Parameters:

Name Type Description Default
dataloader Any

A torch.utils.DataLoader or a dict to pass into dataloader.save

required
Source code in zenml/integrations/pytorch/materializers/pytorch_dataloader_materializer.py
def save(self, dataloader: Any) -> None:
    """Writes a PyTorch dataloader.

    Args:
        dataloader: A torch.utils.DataLoader or a dict to pass into dataloader.save
    """
    super().save(dataloader)

    # Save entire dataloader to artifact directory
    with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "wb") as f:
        torch.save(dataloader, f)

pytorch_module_materializer

Implementation of the PyTorch Module materializer.

PyTorchModuleMaterializer (BaseMaterializer)

Materializer to read/write Pytorch models.

Inspired by the guide: https://pytorch.org/tutorials/beginner/saving_loading_models.html

Source code in zenml/integrations/pytorch/materializers/pytorch_module_materializer.py
class PyTorchModuleMaterializer(BaseMaterializer):
    """Materializer to read/write Pytorch models.

    Inspired by the guide:
    https://pytorch.org/tutorials/beginner/saving_loading_models.html
    """

    ASSOCIATED_TYPES = (Module,)
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL

    def load(self, data_type: Type[Any]) -> Module:
        """Reads and returns a PyTorch model.

        Only loads the model, not the checkpoint.

        Args:
            data_type: The type of the model to load.

        Returns:
            A loaded pytorch model.
        """
        super().load(data_type)
        with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "rb") as f:
            return cast(Module, torch.load(f))

    def save(self, model: Module) -> None:
        """Writes a PyTorch model, as a model and a checkpoint.

        Args:
            model: A torch.nn.Module or a dict to pass into model.save
        """
        super().save(model)

        # Save entire model to artifact directory, This is the default behavior
        # for loading model in development phase (training, evaluation)
        with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "wb") as f:
            torch.save(model, f)

        # Also save model checkpoint to artifact directory,
        # This is the default behavior for loading model in production phase (inference)
        if isinstance(model, Module):
            with fileio.open(
                os.path.join(self.uri, CHECKPOINT_FILENAME), "wb"
            ) as f:
                torch.save(model.state_dict(), f)

    def extract_metadata(self, model: Module) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given `Model` object.

        Args:
            model: The `Model` object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        super().extract_metadata(model)
        return {**count_module_params(model)}
extract_metadata(self, model)

Extract metadata from the given Model object.

Parameters:

Name Type Description Default
model Module

The Model object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/pytorch/materializers/pytorch_module_materializer.py
def extract_metadata(self, model: Module) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given `Model` object.

    Args:
        model: The `Model` object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    super().extract_metadata(model)
    return {**count_module_params(model)}
load(self, data_type)

Reads and returns a PyTorch model.

Only loads the model, not the checkpoint.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the model to load.

required

Returns:

Type Description
Module

A loaded pytorch model.

Source code in zenml/integrations/pytorch/materializers/pytorch_module_materializer.py
def load(self, data_type: Type[Any]) -> Module:
    """Reads and returns a PyTorch model.

    Only loads the model, not the checkpoint.

    Args:
        data_type: The type of the model to load.

    Returns:
        A loaded pytorch model.
    """
    super().load(data_type)
    with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "rb") as f:
        return cast(Module, torch.load(f))
save(self, model)

Writes a PyTorch model, as a model and a checkpoint.

Parameters:

Name Type Description Default
model Module

A torch.nn.Module or a dict to pass into model.save

required
Source code in zenml/integrations/pytorch/materializers/pytorch_module_materializer.py
def save(self, model: Module) -> None:
    """Writes a PyTorch model, as a model and a checkpoint.

    Args:
        model: A torch.nn.Module or a dict to pass into model.save
    """
    super().save(model)

    # Save entire model to artifact directory, This is the default behavior
    # for loading model in development phase (training, evaluation)
    with fileio.open(os.path.join(self.uri, DEFAULT_FILENAME), "wb") as f:
        torch.save(model, f)

    # Also save model checkpoint to artifact directory,
    # This is the default behavior for loading model in production phase (inference)
    if isinstance(model, Module):
        with fileio.open(
            os.path.join(self.uri, CHECKPOINT_FILENAME), "wb"
        ) as f:
            torch.save(model.state_dict(), f)

utils

PyTorch utils.

count_module_params(module)

Get the total and trainable parameters of a module.

Parameters:

Name Type Description Default
module Module

The module to get the parameters of.

required

Returns:

Type Description
Dict[str, int]

A dictionary with the total and trainable parameters.

Source code in zenml/integrations/pytorch/utils.py
def count_module_params(module: torch.nn.Module) -> Dict[str, int]:
    """Get the total and trainable parameters of a module.

    Args:
        module: The module to get the parameters of.

    Returns:
        A dictionary with the total and trainable parameters.
    """
    total_params = sum([param.numel() for param in module.parameters()])
    trainable_params = sum(
        [param.numel() for param in module.parameters() if param.requires_grad]
    )
    return {
        "num_params": total_params,
        "num_trainable_params": trainable_params,
    }