Skip to content

Xgboost

zenml.integrations.xgboost special

Initialization of the XGBoost integration.

XgboostIntegration (Integration)

Definition of xgboost integration for ZenML.

Source code in zenml/integrations/xgboost/__init__.py
class XgboostIntegration(Integration):
    """Definition of xgboost integration for ZenML."""

    NAME = XGBOOST
    REQUIREMENTS = ["xgboost>=1.0.0"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.xgboost import materializers  # noqa

activate() classmethod

Activates the integration.

Source code in zenml/integrations/xgboost/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.xgboost import materializers  # noqa

materializers special

Initialization of the XGBoost materializers.

xgboost_booster_materializer

Implementation of an XGBoost booster materializer.

XgboostBoosterMaterializer (BaseMaterializer)

Materializer to read data to and from xgboost.Booster.

Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
class XgboostBoosterMaterializer(BaseMaterializer):
    """Materializer to read data to and from xgboost.Booster."""

    ASSOCIATED_TYPES = (xgb.Booster,)
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL

    def load(self, data_type: Type[Any]) -> xgb.Booster:
        """Reads a xgboost Booster model from a serialized JSON file.

        Args:
            data_type: A xgboost Booster type.

        Returns:
            A xgboost Booster object.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        booster = xgb.Booster()
        booster.load_model(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return booster

    def save(self, booster: xgb.Booster) -> None:
        """Creates a JSON serialization for a xgboost Booster model.

        Args:
            booster: A xgboost Booster model.
        """
        super().save(booster)

        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Make a temporary phantom artifact
        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            booster.save_model(f.name)
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)
load(self, data_type)

Reads a xgboost Booster model from a serialized JSON file.

Parameters:

Name Type Description Default
data_type Type[Any]

A xgboost Booster type.

required

Returns:

Type Description
Booster

A xgboost Booster object.

Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
def load(self, data_type: Type[Any]) -> xgb.Booster:
    """Reads a xgboost Booster model from a serialized JSON file.

    Args:
        data_type: A xgboost Booster type.

    Returns:
        A xgboost Booster object.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    booster = xgb.Booster()
    booster.load_model(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return booster
save(self, booster)

Creates a JSON serialization for a xgboost Booster model.

Parameters:

Name Type Description Default
booster Booster

A xgboost Booster model.

required
Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
def save(self, booster: xgb.Booster) -> None:
    """Creates a JSON serialization for a xgboost Booster model.

    Args:
        booster: A xgboost Booster model.
    """
    super().save(booster)

    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Make a temporary phantom artifact
    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        booster.save_model(f.name)
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)

xgboost_dmatrix_materializer

Implementation of the XGBoost dmatrix materializer.

XgboostDMatrixMaterializer (BaseMaterializer)

Materializer to read data to and from xgboost.DMatrix.

Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
class XgboostDMatrixMaterializer(BaseMaterializer):
    """Materializer to read data to and from xgboost.DMatrix."""

    ASSOCIATED_TYPES = (xgb.DMatrix,)
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA

    def load(self, data_type: Type[Any]) -> xgb.DMatrix:
        """Reads a xgboost.DMatrix binary file and loads it.

        Args:
            data_type: The datatype which should be read.

        Returns:
            Materialized xgboost matrix.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        matrix = xgb.DMatrix(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return matrix

    def save(self, matrix: xgb.DMatrix) -> None:
        """Creates a binary serialization for a xgboost.DMatrix object.

        Args:
            matrix: A xgboost.DMatrix object.
        """
        super().save(matrix)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Make a temporary phantom artifact
        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
            matrix.save_binary(f.name)
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)

    def extract_metadata(
        self, dataset: xgb.DMatrix
    ) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given `Dataset` object.

        Args:
            dataset: The `Dataset` object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        super().extract_metadata(dataset)
        return {
            "shape": (dataset.num_row(), dataset.num_col()),
        }
extract_metadata(self, dataset)

Extract metadata from the given Dataset object.

Parameters:

Name Type Description Default
dataset DMatrix

The Dataset object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def extract_metadata(
    self, dataset: xgb.DMatrix
) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given `Dataset` object.

    Args:
        dataset: The `Dataset` object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    super().extract_metadata(dataset)
    return {
        "shape": (dataset.num_row(), dataset.num_col()),
    }
load(self, data_type)

Reads a xgboost.DMatrix binary file and loads it.

Parameters:

Name Type Description Default
data_type Type[Any]

The datatype which should be read.

required

Returns:

Type Description
DMatrix

Materialized xgboost matrix.

Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def load(self, data_type: Type[Any]) -> xgb.DMatrix:
    """Reads a xgboost.DMatrix binary file and loads it.

    Args:
        data_type: The datatype which should be read.

    Returns:
        Materialized xgboost matrix.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    matrix = xgb.DMatrix(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return matrix
save(self, matrix)

Creates a binary serialization for a xgboost.DMatrix object.

Parameters:

Name Type Description Default
matrix DMatrix

A xgboost.DMatrix object.

required
Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def save(self, matrix: xgb.DMatrix) -> None:
    """Creates a binary serialization for a xgboost.DMatrix object.

    Args:
        matrix: A xgboost.DMatrix object.
    """
    super().save(matrix)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Make a temporary phantom artifact
    with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
        matrix.save_binary(f.name)
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)