Source code for towhee.runtime.hub_ops.ann_insert

# Copyright 2023 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any
from towhee.runtime.factory import HubOp


[docs] class AnnInsert: """ The ANN Insert Operator is used to insert embeddings and create ANN indexes for fast similarity searches. """ faiss_index: HubOp = HubOp('ann_insert.faiss_index') """ Insert data into faiss. Only for local test. If you want to use a vector database in a production environment, you can use Milvus(https://github.com/milvus-io/milvus). __init__(self, data_dir: str, dimension: int = None): data_dir(`str`): Path to store data. dimension(`int`): The dimension of embedding. __call__(self, vec: 'ndarray', *args): vec(`ndarray`): embedding *args(`Any`): meta data. Example: .. code-block:: python from glob import glob from towhee import ops, pipe p = ( pipe.input('file_name') .map('file_name', 'img', ops.image_decode.cv2()) .map('img', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch32', modality='image')) .map('vec', 'vec', ops.towhee.np_normalize()) .map(('vec', 'file_name'), (), ops.ann_insert.faiss_index('./faiss', 512)) .output() ) fs = glob('./images/*.jpg') for f in fs: p(f) # Ensure data is written to disk. p.flush() """ milvus_client: HubOp = HubOp('ann_insert.milvus_client') """ Insert data into Milvus collections. Please make sure you have `created Milvus Collection <https://milvus.io/docs/create_collection.md>`_ before loading the data. __init__(self, host: str, port: int, collection_name: str, user: str = None, password: str = None): host(`str`): The host for Milvus. port(`str`): The port for Milvus. collection_name(`str`): The collection name for Milvus. user(`str`) The user for Zilliz Cloud, defaults to None. password(`str`): he password for Zilliz Cloud, defaults to None. __call__(self, *data) -> 'pymilvus.MutationResult': data(`list`) The data to insert into milvus. Example: .. code-block:: python import towhee from towhee import ops p = ( towhee.pipe.input('vec') .map('vec', (), ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='test_collection')) .output() ) p(vec) """ milvus_multi_collections: HubOp = HubOp('ann_insert.osschat_milvus') """ `milvus_multi_collections <https://towhee.io/ann-insert/osschat-milvus>`_ A client that can access multiple collections. __init__(self, host: str, port: int, user: str = None, password: str = None): host(`str`): The host for Milvus. port(`str`): The port for Milvus. user(`str`) The user for Zilliz Cloud, defaults to None. password(`str`): he password for Zilliz Cloud, defaults to None. __call__(self, collection_name: str, *data) -> 'pymilvus.MutationResult': collection_name(`str`): collection_name data(`list`): The data to insert into milvus. Example: .. code-block:: python from towhee import ops, pipe p = ( pipe.input('collection_name', 'vec') .map(('collection_name', 'vec'), (), ops.ann_insert.milvus_multi_collections(host='127.0.0.1', port='19530')) .output() ) p(vec) """ def __call__(self, *args: Any, **kwds: Any) -> Any: return HubOp('towhee.ann_insert')(*args, **kwds)