Llama Index

`zenml.integrations.llama_index` `special`

Initialization of the Llama Index integration.

`LlamaIndexIntegration (Integration)`

Definition of Llama Index integration for ZenML.

Source code in zenml/integrations/llama_index/__init__.py

class LlamaIndexIntegration(Integration):
    """Definition of Llama Index integration for ZenML."""

    NAME = LLAMA_INDEX
    REQUIREMENTS = ["llama_index>=0.4.28"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.llama_index import materializers  # noqa

`activate()` `classmethod`

Activates the integration.

Source code in zenml/integrations/llama_index/__init__.py

@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.llama_index import materializers  # noqa

`materializers` `special`

Initialization of the Llama Index materializers.

`document_materializer`

Implementation of the llama-index document materializer.

`LlamaIndexDocumentMaterializer (BaseMaterializer)`

Handle serialization and deserialization of llama-index documents.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py

class LlamaIndexDocumentMaterializer(BaseMaterializer):
    """Handle serialization and deserialization of llama-index documents."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA
    ASSOCIATED_TYPES = (Document,)

    def __init__(self, **kwargs: Any) -> None:
        """Initializes the llama-index document materializer.

        Args:
            **kwargs: Keyword arguments.
        """
        super().__init__(**kwargs)
        self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)

    def load(self, data_type: Type[Document]) -> Document:
        """Reads a llama-index document from JSON.

        Args:
            data_type: The type of the data to read.

        Returns:
            The data read.
        """
        contents = super().load(data_type)
        data_path = os.path.join(self.uri, DEFAULT_FILENAME)
        contents = yaml_utils.read_json(data_path)
        langchain_document = LCDocument.parse_raw(contents)
        return Document.from_langchain_format(langchain_document)

    def save(self, data: Document) -> None:
        """Serialize a llama-index document as a Langchain document.

        Args:
            data: The data to store.
        """
        super().save(data)
        lc_doc = data.to_langchain_format()
        self._langchain_materializer.save(lc_doc)

    def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given Llama Index document.

        Args:
            data: The BaseModel object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        return self._langchain_materializer.extract_metadata(
            data.to_langchain_format()
        )

`init(self, **kwargs)` `special`

Initializes the llama-index document materializer.

Parameters:

Name	Type	Description	Default
`**kwargs`	`Any`	Keyword arguments.	`{}`

Source code in zenml/integrations/llama_index/materializers/document_materializer.py

def __init__(self, **kwargs: Any) -> None:
    """Initializes the llama-index document materializer.

    Args:
        **kwargs: Keyword arguments.
    """
    super().__init__(**kwargs)
    self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)

`extract_metadata(self, data)`

Extract metadata from the given Llama Index document.

Parameters:

Name	Type	Description	Default
`data`	`Document`	The BaseModel object to extract metadata from.	required

Returns:

Type	Description
`Dict[str, MetadataType]`	The extracted metadata as a dictionary.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py

def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given Llama Index document.

    Args:
        data: The BaseModel object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    return self._langchain_materializer.extract_metadata(
        data.to_langchain_format()
    )

`load(self, data_type)`

Reads a llama-index document from JSON.

Parameters:

Name	Type	Description	Default
`data_type`	`Type[llama_index.readers.schema.base.Document]`	The type of the data to read.	required

Returns:

Type	Description
`Document`	The data read.

Source code in zenml/integrations/llama_index/materializers/document_materializer.py

def load(self, data_type: Type[Document]) -> Document:
    """Reads a llama-index document from JSON.

    Args:
        data_type: The type of the data to read.

    Returns:
        The data read.
    """
    contents = super().load(data_type)
    data_path = os.path.join(self.uri, DEFAULT_FILENAME)
    contents = yaml_utils.read_json(data_path)
    langchain_document = LCDocument.parse_raw(contents)
    return Document.from_langchain_format(langchain_document)

`save(self, data)`

Serialize a llama-index document as a Langchain document.

Parameters:

Name	Type	Description	Default
`data`	`Document`	The data to store.	required

Source code in zenml/integrations/llama_index/materializers/document_materializer.py

def save(self, data: Document) -> None:
    """Serialize a llama-index document as a Langchain document.

    Args:
        data: The data to store.
    """
    super().save(data)
    lc_doc = data.to_langchain_format()
    self._langchain_materializer.save(lc_doc)

`gpt_index_materializer`

Implementation of the llama-index GPT index materializer.

`LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)`

Materializer for llama_index GPT faiss indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer):
    """Materializer for llama_index GPT faiss indices."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
    ASSOCIATED_TYPES = (GPTFaissIndex,)

    def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
        """Load a llama-index GPT faiss index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(
            save_path=filepath, faiss_index_save_path=faiss_filepath
        )

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return cast(GPTFaissIndex, index)

    def save(self, index: GPTFaissIndex) -> None:
        """Save a llama-index GPT faiss index to disk.

        Args:
            index: The index to save.
        """
        super().save(index)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)
        faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(
                save_path=f.name, faiss_index_save_path=faiss_filepath
            )
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)

`load(self, data_type)`

Load a llama-index GPT faiss index from disk.

Parameters:

Name	Type	Description	Default
`data_type`	`Type[GPTFaissIndex]`	The type of the index.	required

Returns:

Type	Description
`GPTFaissIndex`	The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
    """Load a llama-index GPT faiss index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(
        save_path=filepath, faiss_index_save_path=faiss_filepath
    )

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return cast(GPTFaissIndex, index)

`save(self, index)`

Save a llama-index GPT faiss index to disk.

Parameters:

Name	Type	Description	Default
`index`	`GPTFaissIndex`	The index to save.	required

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

def save(self, index: GPTFaissIndex) -> None:
    """Save a llama-index GPT faiss index to disk.

    Args:
        index: The index to save.
    """
    super().save(index)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)
    faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(
            save_path=f.name, faiss_index_save_path=faiss_filepath
        )
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)

`LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)`

Materializer for llama_index GPT indices.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer):
    """Materializer for llama_index GPT indices."""

    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
    ASSOCIATED_TYPES = (BaseGPTIndex,)

    def load(self, data_type: Type[T]) -> T:
        """Loads a llama-index GPT index from disk.

        Args:
            data_type: The type of the index.

        Returns:
            The index.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)

        index = data_type.load_from_disk(save_path=filepath)
        assert isinstance(index, data_type)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return index

    def save(self, index: T) -> None:
        """Save a llama-index GPT index to disk.

        Args:
            index: The index to save.
        """
        super().save(index)
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False
        ) as f:
            index.save_to_disk(save_path=f.name)
            # Copy it into artifact store
            fileio.copy(f.name, filepath)

        # Close and remove the temporary file
        f.close()
        fileio.remove(f.name)

`load(self, data_type)`

Loads a llama-index GPT index from disk.

Parameters:

Name	Type	Description	Default
`data_type`	`Type[T]`	The type of the index.	required

Returns:

Type	Description
`T`	The index.

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

def load(self, data_type: Type[T]) -> T:
    """Loads a llama-index GPT index from disk.

    Args:
        data_type: The type of the index.

    Returns:
        The index.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)

    index = data_type.load_from_disk(save_path=filepath)
    assert isinstance(index, data_type)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return index

`save(self, index)`

Save a llama-index GPT index to disk.

Parameters:

Name	Type	Description	Default
`index`	`T`	The index to save.	required

Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py

def save(self, index: T) -> None:
    """Save a llama-index GPT index to disk.

    Args:
        index: The index to save.
    """
    super().save(index)
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".json", delete=False
    ) as f:
        index.save_to_disk(save_path=f.name)
        # Copy it into artifact store
        fileio.copy(f.name, filepath)

    # Close and remove the temporary file
    f.close()
    fileio.remove(f.name)

Llama Index

zenml.integrations.llama_index special

LlamaIndexIntegration (Integration)

activate() classmethod

materializers special

document_materializer

LlamaIndexDocumentMaterializer (BaseMaterializer)

__init__(self, **kwargs) special

extract_metadata(self, data)

load(self, data_type)

save(self, data)

gpt_index_materializer

LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)

load(self, data_type)

save(self, index)

LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)

load(self, data_type)

save(self, index)

`zenml.integrations.llama_index` `special`

`LlamaIndexIntegration (Integration)`

`activate()` `classmethod`

`materializers` `special`

`document_materializer`

`LlamaIndexDocumentMaterializer (BaseMaterializer)`

`init(self, **kwargs)` `special`

`extract_metadata(self, data)`

`load(self, data_type)`

`save(self, data)`

`gpt_index_materializer`

`LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)`

`load(self, data_type)`

`save(self, index)`

`LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)`

`load(self, data_type)`

`save(self, index)`