Milvus 使用文档

目录

1 安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 安装 Docker Compose
curl -SL https://github.com/docker/compose/releases/download/v2.30.3/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose

# 下载 Milvus 配置文件
wget https://github.com/milvus-io/milvus/releases/download/v2.6.9/milvus-standalone-docker-compose.yml -O docker-compose.yml

# 启动容器
sudo docker compose up -d

# 验证容器状态
Creating milvus-etcd ... done
Creating milvus-minio ... done
Creating milvus-standalone ... done

# 添加密码功能
# 修改配置文件添加:
common:
security:
authorizationEnabled: true

# Python 客户端连接示例
from pymilvus import MilvusClient
client = MilvusClient(
uri='http://localhost:19530',
token="root:Milvus"
)

2 使用案例

2.1 文档搜索(RAG)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import dashscope
from dashscope import TextEmbedding
from pymilvus import MilvusClient

dashscope.api_key = ''
client = MilvusClient("http://127.0.0.1:19530")

def create_collection(collection_name):
if client.has_collection(collection_name):
client.drop_collection(collection_name)
client.create_collection(
collection_name=collection_name,
dimension=1024,
metric_type="IP",
consistency_level="Bounded"
)

def emb_text(text):
return TextEmbedding.call(
model="text-embedding-v4",
input=text,
dimension=1024
).output['embeddings']

def insert_data(collection_name, data):
return client.insert(collection_name=collection_name, data=data)

def search_data(collection_name):
query_vectors = emb_text(["深度学习"])[0]['embedding']
res = client.search(
collection_name=collection_name,
data=[query_vectors],
limit=2,
output_fields=["text", "subject"]
)
print(res)

# 使用示例
if __name__ == '__main__':
create_collection("demo_collection")
documents = [
"人工智能是计算机科学的一个分支",
"机器学习是实现人工智能的重要方法",
"深度学习是机器学习的一个子领域"
]
data = [
{"id": i, "vector": emb_text(documents)[i]['embedding'],
"text": documents[i], "subject": "demo"}
for i in range(len(documents))
]
insert_data("demo_collection", data)
search_data("demo_collection")

2.2 图片检索(以图搜图)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import base64
import os
import dashscope
from pymilvus import MilvusClient

def image_to_base64(image_path):
with open(image_path, "rb") as f:
return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}"

def emb_image(input):
return dashscope.MultiModalEmbedding.call(
model="tongyi-embedding-vision-plus",
input=input
).output['embeddings'][0]['embedding']

# 使用示例
if __name__ == '__main__':
client = MilvusClient("http://localhost:19530")
client.create_collection(
collection_name="image_embeddings",
dimension=1152,
metric_type="IP"
)

# 插入图片数据
for file in os.listdir("data"):
if file.endswith(".png"):
embedding = emb_image([{"image": image_to_base64(f"data/{file}")}])
client.insert("image_embeddings", {
"vector": embedding,
"filename": file
})

# 搜索示例
query_embedding = emb_image([{"image": image_to_base64("data/柯基1.png")}])
results = client.search(
collection_name="image_embeddings",
data=[query_embedding],
limit=2,
output_fields=["filename"]
)
print(results)

2.3 以文搜图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def emb_multimodal(input):
return dashscope.MultiModalEmbedding.call(
model="qwen3-vl-embedding",
input=input,
dimension=2560
).output['embeddings'][0]['embedding']

# 使用示例
if __name__ == '__main__':
# 文本搜索
text_embedding = emb_multimodal(["柯基犬"])
results = client.search(
collection_name="image_embeddings",
data=[text_embedding],
limit=2,
output_fields=["filename"]
)
print(results)

3 集成

后续更新计划,可参考官方集成文档:
https://docs.llamaindex.org.cn/en/stable/examples/vector_stores/MilvusIndexDemo/

Milvus 提供强大的数据建模功能,支持多种数据类型和属性模型,包括数字、字符、向量、数组、集合和JSON等。