Skip to content

Llama Index

zenml.integrations.llama_index special

Initialization of the Llama Index integration.

LlamaIndexIntegration (Integration)

Definition of Llama Index integration for ZenML.

Source code in zenml/integrations/llama_index/__init__.py
class LlamaIndexIntegration(Integration):
    """Definition of Llama Index integration for ZenML."""

    NAME = LLAMA_INDEX
    REQUIREMENTS = ["llama_index>=0.4.28,<0.6.0"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.llama_index import materializers  # noqa

activate() classmethod

Activates the integration.

Source code in zenml/integrations/llama_index/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.llama_index import materializers  # noqa

materializers special

Initialization of the Llama Index materializers.

document_materializer

Implementation of the llama-index document materializer.

LlamaIndexDocumentMaterializer (LangchainDocumentMaterializer)

Handle serialization and deserialization of llama-index documents.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
class LlamaIndexDocumentMaterializer(LangchainDocumentMaterializer):
    """Handle serialization and deserialization of llama-index documents."""

    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA
    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Document,)

    def load(self, data_type: Type[Any]) -> Any:
        """Reads a llama-index document from JSON.

        Args:
            data_type: The type of the data to read.

        Returns:
            The data read.
        """
        return Document.from_langchain_format(super().load(LCDocument))

    def save(self, data: Any) -> None:
        """Serialize a llama-index document as a Langchain document.

        Args:
            data: The data to store.
        """
        super().save(data.to_langchain_format())

    def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given Llama Index document.

        Args:
            data: The BaseModel object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        return super().extract_metadata(data.to_langchain_format())
extract_metadata(self, data)

Extract metadata from the given Llama Index document.

Parameters:

Name Type Description Default
data Any

The BaseModel object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given Llama Index document.

    Args:
        data: The BaseModel object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    return super().extract_metadata(data.to_langchain_format())
load(self, data_type)

Reads a llama-index document from JSON.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
Any

The data read.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def load(self, data_type: Type[Any]) -> Any:
    """Reads a llama-index document from JSON.

    Args:
        data_type: The type of the data to read.

    Returns:
        The data read.
    """
    return Document.from_langchain_format(super().load(LCDocument))
save(self, data)

Serialize a llama-index document as a Langchain document.

Parameters:

Name Type Description Default
data Any

The data to store.

required
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def save(self, data: Any) -> None:
    """Serialize a llama-index document as a Langchain document.

    Args:
        data: The data to store.
    """
    super().save(data.to_langchain_format())

gpt_index_materializer

Implementation of the llama-index GPT index materializer.

LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)

Materializer for llama_index GPT faiss indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer):
    """Materializer for llama_index GPT faiss indices."""

    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (GPTFaissIndex,)

    def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
        """Load a llama-index GPT faiss index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(
            save_path=filepath, faiss_index_save_path=faiss_filepath
        )

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return cast(GPTFaissIndex, index)

    def save(self, index: GPTFaissIndex) -> None:
        """Save a llama-index GPT faiss index to disk.

        Args:
            index: The index to save.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(
                save_path=f.name, faiss_index_save_path=faiss_filepath
            )
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)
load(self, data_type)

Load a llama-index GPT faiss index from disk.

Parameters:

Name Type Description Default
data_type Type[llama_index.indices.vector_store.vector_indices.GPTFaissIndex]

The type of the index.

required

Returns:

Type Description
GPTFaissIndex

The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
    """Load a llama-index GPT faiss index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(
        save_path=filepath, faiss_index_save_path=faiss_filepath
    )

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return cast(GPTFaissIndex, index)
save(self, index)

Save a llama-index GPT faiss index to disk.

Parameters:

Name Type Description Default
index GPTFaissIndex

The index to save.

required
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: GPTFaissIndex) -> None:
    """Save a llama-index GPT faiss index to disk.

    Args:
        index: The index to save.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(
            save_path=f.name, faiss_index_save_path=faiss_filepath
        )
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)
LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)

Materializer for llama_index GPT indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer):
    """Materializer for llama_index GPT indices."""

    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (BaseGPTIndex,)

    def load(self, data_type: Type[T]) -> T:
        """Loads a llama-index GPT index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(save_path=filepath)
        assert isinstance(index, data_type)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return index

    def save(self, index: T) -> None:
        """Save a llama-index GPT index to disk.

        Args:
            index: The index to save.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(save_path=f.name)
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)
load(self, data_type)

Loads a llama-index GPT index from disk.

Parameters:

Name Type Description Default
data_type Type[~T]

The type of the index.

required

Returns:

Type Description
~T

The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[T]) -> T:
    """Loads a llama-index GPT index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(save_path=filepath)
    assert isinstance(index, data_type)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return index
save(self, index)

Save a llama-index GPT index to disk.

Parameters:

Name Type Description Default
index ~T

The index to save.

required
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: T) -> None:
    """Save a llama-index GPT index to disk.

    Args:
        index: The index to save.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(save_path=f.name)
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)