Milvus 使用文档
目录
1 安装
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| # 安装 Docker Compose curl -SL https://github.com/docker/compose/releases/download/v2.30.3/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
# 下载 Milvus 配置文件 wget https://github.com/milvus-io/milvus/releases/download/v2.6.9/milvus-standalone-docker-compose.yml -O docker-compose.yml
# 启动容器 sudo docker compose up -d
# 验证容器状态 Creating milvus-etcd ... done Creating milvus-minio ... done Creating milvus-standalone ... done
# 添加密码功能 # 修改配置文件添加: common: security: authorizationEnabled: true
# Python 客户端连接示例 from pymilvus import MilvusClient client = MilvusClient( uri='http://localhost:19530', token="root:Milvus" )
|
2 使用案例
2.1 文档搜索(RAG)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| import dashscope from dashscope import TextEmbedding from pymilvus import MilvusClient
dashscope.api_key = '' client = MilvusClient("http://127.0.0.1:19530")
def create_collection(collection_name): if client.has_collection(collection_name): client.drop_collection(collection_name) client.create_collection( collection_name=collection_name, dimension=1024, metric_type="IP", consistency_level="Bounded" )
def emb_text(text): return TextEmbedding.call( model="text-embedding-v4", input=text, dimension=1024 ).output['embeddings']
def insert_data(collection_name, data): return client.insert(collection_name=collection_name, data=data)
def search_data(collection_name): query_vectors = emb_text(["深度学习"])[0]['embedding'] res = client.search( collection_name=collection_name, data=[query_vectors], limit=2, output_fields=["text", "subject"] ) print(res)
if __name__ == '__main__': create_collection("demo_collection") documents = [ "人工智能是计算机科学的一个分支", "机器学习是实现人工智能的重要方法", "深度学习是机器学习的一个子领域" ] data = [ {"id": i, "vector": emb_text(documents)[i]['embedding'], "text": documents[i], "subject": "demo"} for i in range(len(documents)) ] insert_data("demo_collection", data) search_data("demo_collection")
|
2.2 图片检索(以图搜图)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| import base64 import os import dashscope from pymilvus import MilvusClient
def image_to_base64(image_path): with open(image_path, "rb") as f: return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}"
def emb_image(input): return dashscope.MultiModalEmbedding.call( model="tongyi-embedding-vision-plus", input=input ).output['embeddings'][0]['embedding']
if __name__ == '__main__': client = MilvusClient("http://localhost:19530") client.create_collection( collection_name="image_embeddings", dimension=1152, metric_type="IP" ) for file in os.listdir("data"): if file.endswith(".png"): embedding = emb_image([{"image": image_to_base64(f"data/{file}")}]) client.insert("image_embeddings", { "vector": embedding, "filename": file }) query_embedding = emb_image([{"image": image_to_base64("data/柯基1.png")}]) results = client.search( collection_name="image_embeddings", data=[query_embedding], limit=2, output_fields=["filename"] ) print(results)
|
2.3 以文搜图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| def emb_multimodal(input): return dashscope.MultiModalEmbedding.call( model="qwen3-vl-embedding", input=input, dimension=2560 ).output['embeddings'][0]['embedding']
if __name__ == '__main__': text_embedding = emb_multimodal(["柯基犬"]) results = client.search( collection_name="image_embeddings", data=[text_embedding], limit=2, output_fields=["filename"] ) print(results)
|
3 集成
后续更新计划,可参考官方集成文档:
https://docs.llamaindex.org.cn/en/stable/examples/vector_stores/MilvusIndexDemo/
Milvus 提供强大的数据建模功能,支持多种数据类型和属性模型,包括数字、字符、向量、数组、集合和JSON等。