Skip to content

Llama Index

zenml.integrations.llama_index special

Initialization of the Llama Index integration.

LlamaIndexIntegration (Integration)

Definition of Llama Index integration for ZenML.

Source code in zenml/integrations/llama_index/__init__.py
class LlamaIndexIntegration(Integration):
    """Definition of Llama Index integration for ZenML."""

    NAME = LLAMA_INDEX
    REQUIREMENTS = ["llama_index>=0.4.28"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.llama_index import materializers  # noqa

activate() classmethod

Activates the integration.

Source code in zenml/integrations/llama_index/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.llama_index import materializers  # noqa

materializers special

Initialization of the Llama Index materializers.

document_materializer

Implementation of the llama-index document materializer.

LlamaIndexDocumentMaterializer (BaseMaterializer)

Handle serialization and deserialization of llama-index documents.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
class LlamaIndexDocumentMaterializer(BaseMaterializer):
    """Handle serialization and deserialization of llama-index documents."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA
    ASSOCIATED_TYPES = (Document,)

    def __init__(self, **kwargs: Any) -> None:
        """Initializes the llama-index document materializer.

        Args:
            **kwargs: Keyword arguments.
        """
        super().__init__(**kwargs)
        self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)

    def load(self, data_type: Type[Document]) -> Document:
        """Reads a llama-index document from JSON.

        Args:
            data_type: The type of the data to read.

        Returns:
            The data read.
        """
        contents = super().load(data_type)
        data_path = os.path.join(self.uri, DEFAULT_FILENAME)
        contents = yaml_utils.read_json(data_path)
        langchain_document = LCDocument.parse_raw(contents)
        return Document.from_langchain_format(langchain_document)

    def save(self, data: Document) -> None:
        """Serialize a llama-index document as a Langchain document.

        Args:
            data: The data to store.
        """
        super().save(data)
        lc_doc = data.to_langchain_format()
        self._langchain_materializer.save(lc_doc)

    def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given Llama Index document.

        Args:
            data: The BaseModel object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        return self._langchain_materializer.extract_metadata(
            data.to_langchain_format()
        )
__init__(self, **kwargs) special

Initializes the llama-index document materializer.

Parameters:

Name Type Description Default
**kwargs Any

Keyword arguments.

{}
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def __init__(self, **kwargs: Any) -> None:
    """Initializes the llama-index document materializer.

    Args:
        **kwargs: Keyword arguments.
    """
    super().__init__(**kwargs)
    self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)
extract_metadata(self, data)

Extract metadata from the given Llama Index document.

Parameters:

Name Type Description Default
data Document

The BaseModel object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given Llama Index document.

    Args:
        data: The BaseModel object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    return self._langchain_materializer.extract_metadata(
        data.to_langchain_format()
    )
load(self, data_type)

Reads a llama-index document from JSON.

Parameters:

Name Type Description Default
data_type Type[llama_index.readers.schema.base.Document]

The type of the data to read.

required

Returns:

Type Description
Document

The data read.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def load(self, data_type: Type[Document]) -> Document:
    """Reads a llama-index document from JSON.

    Args:
        data_type: The type of the data to read.

    Returns:
        The data read.
    """
    contents = super().load(data_type)
    data_path = os.path.join(self.uri, DEFAULT_FILENAME)
    contents = yaml_utils.read_json(data_path)
    langchain_document = LCDocument.parse_raw(contents)
    return Document.from_langchain_format(langchain_document)
save(self, data)

Serialize a llama-index document as a Langchain document.

Parameters:

Name Type Description Default
data Document

The data to store.

required
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def save(self, data: Document) -> None:
    """Serialize a llama-index document as a Langchain document.

    Args:
        data: The data to store.
    """
    super().save(data)
    lc_doc = data.to_langchain_format()
    self._langchain_materializer.save(lc_doc)

gpt_index_materializer

Implementation of the llama-index GPT index materializer.

LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)

Materializer for llama_index GPT faiss indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer):
    """Materializer for llama_index GPT faiss indices."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
    ASSOCIATED_TYPES = (GPTFaissIndex,)

    def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
        """Load a llama-index GPT faiss index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(
            save_path=filepath, faiss_index_save_path=faiss_filepath
        )

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return cast(GPTFaissIndex, index)

    def save(self, index: GPTFaissIndex) -> None:
        """Save a llama-index GPT faiss index to disk.

        Args:
            index: The index to save.
        """
        super().save(index)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(
                save_path=f.name, faiss_index_save_path=faiss_filepath
            )
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)
load(self, data_type)

Load a llama-index GPT faiss index from disk.

Parameters:

Name Type Description Default
data_type Type[GPTFaissIndex]

The type of the index.

required

Returns:

Type Description
GPTFaissIndex

The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
    """Load a llama-index GPT faiss index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(
        save_path=filepath, faiss_index_save_path=faiss_filepath
    )

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return cast(GPTFaissIndex, index)
save(self, index)

Save a llama-index GPT faiss index to disk.

Parameters:

Name Type Description Default
index GPTFaissIndex

The index to save.

required
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: GPTFaissIndex) -> None:
    """Save a llama-index GPT faiss index to disk.

    Args:
        index: The index to save.
    """
    super().save(index)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(
            save_path=f.name, faiss_index_save_path=faiss_filepath
        )
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)
LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)

Materializer for llama_index GPT indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer):
    """Materializer for llama_index GPT indices."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
    ASSOCIATED_TYPES = (BaseGPTIndex,)

    def load(self, data_type: Type[T]) -> T:
        """Loads a llama-index GPT index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(save_path=filepath)
        assert isinstance(index, data_type)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return index

    def save(self, index: T) -> None:
        """Save a llama-index GPT index to disk.

        Args:
            index: The index to save.
        """
        super().save(index)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(save_path=f.name)
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)
load(self, data_type)

Loads a llama-index GPT index from disk.

Parameters:

Name Type Description Default
data_type Type[T]

The type of the index.

required

Returns:

Type Description
T

The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[T]) -> T:
    """Loads a llama-index GPT index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(save_path=filepath)
    assert isinstance(index, data_type)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return index
save(self, index)

Save a llama-index GPT index to disk.

Parameters:

Name Type Description Default
index T

The index to save.

required
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: T) -> None:
    """Save a llama-index GPT index to disk.

    Args:
        index: The index to save.
    """
    super().save(index)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(save_path=f.name)
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)