快速开始
本指南介绍如何快速创建 Zilliz Cloud 集群并进行 CRUD 操作。
安装 SDK
Zilliz Cloud 兼容各类型的 Milvus SDK 和 RESTful API。您可以直接调用 RESTful API 或安装以下任意 SDK:
创建集群
您可以通过 RESTful API 或 Zilliz Cloud 控制台创建集群。
以下代码示例展示如何通过 RESTful API 创建一个 Free 集群。
curl --request POST \
--url "https://api.cloud.zilliz.com.cn/v2/clusters/createFree" \
--header "Authorization: Bearer ${API_KEY}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data-raw '{
"clusterName": "Free-01",
"projectId": "proj-xxxxxxxxxxxxxxxxxxxxxx",
"regionId": "gcp-us-west1"
}'
# {
# "code": 0,
# "data": {
# "clusterId": "inxx-xxxxxxxxxxxxxxx",
# "username": "db_xxxxxxxx",
# "password": "*************",
# "prompt": "successfully submitted, cluster is being created. You can access data about the creation progress and status of your cluster by DescribeCluster API. Once the cluster status is RUNNING, you may access your vector database using the SDK with the admin account and the initial password you specified."
# }
# }
以下代码示例展示如何通过 RESTful API 创建一个 Serverless 集群。
curl --request POST \
--url "https://api.cloud.zilliz.com.cn/v2/clusters/createServerless" \
--header "Authorization: Bearer ${API_KEY}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data-raw '{
"clusterName": "Serverless-05",
"projectId": "proj-xxxxxxxxxxxxxxxxxxxxxxx",
"regionId": "gcp-us-west1"
}'
# {
# "code": 0,
# "data": {
# "clusterId": "inxx-xxxxxxxxxxxxxxx",
# "username": "db_xxxxxxxx",
# "password": "*************",
# "prompt": "successfully submitted, cluster is being created. You can access data about the creation progress and status of your cluster by DescribeCluster API. Once the cluster status is RUNNING, you may access your vector database using the SDK with the admin account and the initial password you specified."
# }
# }
以下代码示例展示如何通过 RESTful API 创建一个集群。
curl --request POST \
--url "https://api.cloud.zilliz.com.cn/v2/clusters/createDedicated" \
--header "Authorization: Bearer ${API_KEY}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data-raw '{
"clusterName": "Cluster-05",
"projectId": "proj-xxxxxxxxxxxxxxxxxxxxxx",
"regionId": "aws-us-west-2",
"plan": "Standard",
"cuType": "Performance-optimized",
"cuSize": 1
}'
# {
# "code": 0,
# "data": {
# "clusterId": "inxx-xxxxxxxxxxxxxxx",
# "username": "db_admin",
# "password": "*************",
# "prompt": "successfully submitted, cluster is being created. You can access data about the creation progress and status of your cluster by DescribeCluster API. Once the cluster status is RUNNING, you may access your vector database using the SDK with the admin account and the initial password you specified."
# }
# }
在创建 Dedicated 集群之前,请先添加支付方式。
要获取云区域、API 密钥和项目 ID,请参阅Zilliz Cloud 控制台。有关如何创建集群,请参阅创建集群。
集群启动后,系统将提示您一次性下载集群凭证,请将凭证保存在安全位置,以便后续连接集群时使用。
另外,您也可以创建 API 密钥,用以连接集群,无需使用集群凭证。
连接 Zilliz Cloud 集群Milvus
获取集群凭证或 API 密钥后,您可以通过以下示例代码连接到集群。
- Python
- Java
- NodeJS
from pymilvus import MilvusClient, DataType
CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT"
TOKEN = "YOUR_CLUSTER_TOKEN"
# 1. Set up a Milvus client
client = MilvusClient(
uri=CLUSTER_ENDPOINT,
token=TOKEN
)
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.client.ConnectConfig;
String CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT";
String TOKEN = "YOUR_CLUSTER_TOKEN";
// 1. Connect to Milvus server
ConnectConfig connectConfig = ConnectConfig.builder()
.uri(CLUSTER_ENDPOINT)
.token(TOKEN)
.build();
MilvusClientV2 client = new MilvusClientV2(connectConfig);
const { MilvusClient, DataType, sleep } = require("@zilliz/milvus2-sdk-node")
const address = "YOUR_CLUSTER_ENDPOINT"
const token = "YOUR_CLUSTER_TOKEN"
// 1. Connect to the cluster
const client = new MilvusClient({address, token})
由于编程语言的差异,使用 Java 或 Node.js 时,需将代码置于主函数内。
创建 collection
在 Zilliz Cloud, 您需要将向量数据存储到 collection 中。同一个 collection 中的向量数据具有相同的维度和相似度测量指标。您可以通过以下几种方式创建 collection。
快速创建
要快速创建 collection,您只需指定 collection 名称和向量维度。
- Python
- Java
- NodeJS
- cURL
# 2. Create a collection in quick setup mode
client.create_collection(
collection_name="quick_setup",
dimension=5
)
import io.milvus.v2.service.collection.request.CreateCollectionReq;
// 2. Create a collection in quick setup mode
CreateCollectionReq quickSetupReq = CreateCollectionReq.builder()
.collectionName("quick_setup")
.dimension(5)
.build();
client.createCollection(quickSetupReq);
// 2. Create a collection
await client.createCollection({
collection_name: "quick_setup",
dimension: 5,
});
COLLECTION_NAME="quick_setup"
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "quick_setup",
"dimension": 5
}'
# {"code":200,"data":{}}
在以上配置中:
-
主键和向量字段均采用默认名称(即 id 和 vector);
-
默认采用 COSINE 度量类型;
-
主键字段仅接受整数型,且不会自动递增;
-
保留 JSON 字段 $meta 将用于存储未在 schema 中定义的其他字段和字段值。
自定义创建
自定义 collection 的 schema 时,您可以详细定义每个字段的属性,如字段名称、数据类型和其他属性。 通过自定义创建方式,您可以自由定制 schema 和索引参数。
- Python
- Java
- NodeJS
- cURL
# 3. Create a collection in customized setup mode
# 3.1. Create schema
schema = MilvusClient.create_schema(
auto_id=False,
enable_dynamic_field=True,
)
# 3.2. Add fields to schema
schema.add_field(field_name="my_id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="my_vector", datatype=DataType.FLOAT_VECTOR, dim=5)
# 3.3. Prepare index parameters
index_params = client.prepare_index_params()
# 3.4. Add indexes
index_params.add_index(
field_name="my_id"
)
index_params.add_index(
field_name="my_vector",
index_type="AUTOINDEX",
metric_type="IP"
)
# 3.5. Create a collection
client.create_collection(
collection_name="customized_setup",
schema=schema,
index_params=index_params
)
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
// 3.1 Create schema
CreateCollectionReq.CollectionSchema schema = client.createSchema();
// 3.2 Add fields to schema
AddFieldReq myId = AddFieldReq.builder()
.fieldName("my_id")
.dataType(DataType.Int64)
.isPrimaryKey(true)
.autoID(false)
.build();
schema.addField(myId);
AddFieldReq myVector = AddFieldReq.builder()
.fieldName("my_vector")
.dataType(DataType.FloatVector)
.dimension(5)
.build();
schema.addField(myVector);
// 3.3 Prepare index parameters
IndexParam indexParamForIdField = IndexParam.builder()
.fieldName("my_id")
.indexType(IndexParam.IndexType.STL_SORT)
.build();
IndexParam indexParamForVectorField = IndexParam.builder()
.fieldName("my_vector")
.indexType(IndexParam.IndexType.AUTOINDEX)
.metricType(IndexParam.MetricType.IP)
.build();
List<IndexParam> indexParams = new ArrayList<>();
indexParams.add(indexParamForIdField);
indexParams.add(indexParamForVectorField);
// 3.4 Create a collection with schema and index parameters
CreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()
.collectionName("customized_setup")
.collectionSchema(schema)
.indexParams(indexParams)
.build();
client.createCollection(customizedSetupReq);
// 3. Create a collection in customized setup mode
// 3.1 Define fields
const fields = [
{
name: "my_id",
data_type: DataType.Int64,
is_primary_key: true,
auto_id: false
},
{
name: "my_vector",
data_type: DataType.FloatVector,
dim: 5
},
]
// 3.2 Prepare index parameters
const index_params = [{
field_name: "my_vector",
index_type: "IVF_FLAT",
metric_type: "IP",
params: { nlist: 1024}
}]
// 3.3 Create a collection with fields and index parameters
await client.createCollection({
collection_name: "customized_setup_1",
fields: fields,
index_params: index_params,
})
COLLECTION_NAME="customized_setup"
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--d '{
"collectionName": "custom_setup",
"schema": {
"autoId": false,
"enabledDynamicField": false,
"fields": [
{
"fieldName": "my_id",
"dataType": "Int64",
"isPrimary": true
},
{
"fieldName": "my_vector",
"dataType": "FloatVector",
"elementTypeParams": {
"dim": "5"
}
}
]
}
}'
# {"code":200,"data":{}}
通过以上代码,您可以自由定义 collection 的各项属性,包括 schema 和索引参数等。
-
Schema
Schema 决定了 collection 的结构。除了上述代码添加的预定义字段外,您还可以启用或禁用以下功能:
-
Auto ID
是否自动递增 collection 的主键值。
-
Dynamic Field
是否使用保留 JSON 字段 $meta 来存储在 schema 中未定义的字段和字段值。
有关更多信息,请参阅Schema。
-
-
索引参数
索引参数将定义 Zilliz Cloud 如何处理 collection 中的数据。您可以为字段设置特定的索引类型和度量类型。
-
向量字段可以选择 AUTOINDEX 作为索引类型,并采用 COSINE、L2或 IP 作为度量类型(
metric_type
)。 -
标量字段,如主键字段,整数型使用 TRIE,字符串类型使用 STL_SORT。
有关更多信息,请参阅 AUTOINDEX。
-
通过上述代码创建的 collection 将自动加载(load)。如需管理非自动加载的 collection,请参阅管理 Collection (SDK)。
通过 RESTful API 创建的 collection 会自动完成加载(load)。
插入数据
无论采用何种方式创建,collection 都将被索引并加载(load)完毕。准备就绪后,可以开始插入示例数据。
- Python
- Java
- NodeJS
- cURL
# 4. Insert data into the collection
# 4.1. Prepare data
data=[
{"id": 0, "vector": [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], "color": "pink_8682"},
{"id": 1, "vector": [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], "color": "red_7025"},
{"id": 2, "vector": [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], "color": "orange_6781"},
{"id": 3, "vector": [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], "color": "pink_9298"},
{"id": 4, "vector": [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], "color": "red_4794"},
{"id": 5, "vector": [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], "color": "yellow_4222"},
{"id": 6, "vector": [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], "color": "red_9392"},
{"id": 7, "vector": [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], "color": "grey_8510"},
{"id": 8, "vector": [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], "color": "white_9381"},
{"id": 9, "vector": [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], "color": "purple_4976"}
]
# 4.2. Insert data
res = client.insert(
collection_name="quick_setup",
data=data
)
print(res)
# Output
#
# {
# "insert_count": 10,
# "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# }
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Arrays;
import com.alibaba.fastjson.JSONObject;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.response.InsertResp;
// 4. Insert data into the collection
// 4.1. Prepare data
List<JSONObject> insertData = Arrays.asList(
new JSONObject(Map.of("id", 0L, "vector", Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f), "color", "pink_8682")),
new JSONObject(Map.of("id", 1L, "vector", Arrays.asList(0.19886812562848388f, 0.06023560599112088f, 0.6976963061752597f, 0.2614474506242501f, 0.838729485096104f), "color", "red_7025")),
new JSONObject(Map.of("id", 2L, "vector", Arrays.asList(0.43742130801983836f, -0.5597502546264526f, 0.6457887650909682f, 0.7894058910881185f, 0.20785793220625592f), "color", "orange_6781")),
new JSONObject(Map.of("id", 3L, "vector", Arrays.asList(0.3172005263489739f, 0.9719044792798428f, -0.36981146090600725f, -0.4860894583077995f, 0.95791889146345f), "color", "pink_9298")),
new JSONObject(Map.of("id", 4L, "vector", Arrays.asList(0.4452349528804562f, -0.8757026943054742f, 0.8220779437047674f, 0.46406290649483184f, 0.30337481143159106f), "color", "red_4794")),
new JSONObject(Map.of("id", 5L, "vector", Arrays.asList(0.985825131989184f, -0.8144651566660419f, 0.6299267002202009f, 0.1206906911183383f, -0.1446277761879955f), "color", "yellow_4222")),
new JSONObject(Map.of("id", 6L, "vector", Arrays.asList(0.8371977790571115f, -0.015764369584852833f, -0.31062937026679327f, -0.562666951622192f, -0.8984947637863987f), "color", "red_9392")),
new JSONObject(Map.of("id", 7L, "vector", Arrays.asList(-0.33445148015177995f, -0.2567135004164067f, 0.8987539745369246f, 0.9402995886420709f, 0.5378064918413052f), "color", "grey_8510")),
new JSONObject(Map.of("id", 8L, "vector", Arrays.asList(0.39524717779832685f, 0.4000257286739164f, -0.5890507376891594f, -0.8650502298996872f, -0.6140360785406336f), "color", "white_9381")),
new JSONObject(Map.of("id", 9L, "vector", Arrays.asList(0.5718280481994695f, 0.24070317428066512f, -0.3737913482606834f, -0.06726932177492717f, -0.6980531615588608f), "color", "purple_4976"))
);
// 4.2. Insert data
InsertReq insertReq = InsertReq.builder()
.collectionName("quick_setup")
.data(insertData)
.build();
InsertResp res = client.insert(insertReq);
System.out.println(JSONObject.toJSON(res));
// Output:
// {"insertCnt": 10}
// 4. Insert data into the collection
var data = [
{id: 0, vector: [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592], color: "pink_8682"},
{id: 1, vector: [0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104], color: "red_7025"},
{id: 2, vector: [0.43742130801983836, -0.5597502546264526, 0.6457887650909682, 0.7894058910881185, 0.20785793220625592], color: "orange_6781"},
{id: 3, vector: [0.3172005263489739, 0.9719044792798428, -0.36981146090600725, -0.4860894583077995, 0.95791889146345], color: "pink_9298"},
{id: 4, vector: [0.4452349528804562, -0.8757026943054742, 0.8220779437047674, 0.46406290649483184, 0.30337481143159106], color: "red_4794"},
{id: 5, vector: [0.985825131989184, -0.8144651566660419, 0.6299267002202009, 0.1206906911183383, -0.1446277761879955], color: "yellow_4222"},
{id: 6, vector: [0.8371977790571115, -0.015764369584852833, -0.31062937026679327, -0.562666951622192, -0.8984947637863987], color: "red_9392"},
{id: 7, vector: [-0.33445148015177995, -0.2567135004164067, 0.8987539745369246, 0.9402995886420709, 0.5378064918413052], color: "grey_8510"},
{id: 8, vector: [0.39524717779832685, 0.4000257286739164, -0.5890507376891594, -0.8650502298996872, -0.6140360785406336], color: "white_9381"},
{id: 9, vector: [0.5718280481994695, 0.24070317428066512, -0.3737913482606834, -0.06726932177492717, -0.6980531615588608], color: "purple_4976"}
]
res = await client.insert({
collection_name: "quick_setup",
data: data
})
console.log(res.insert_cnt)
// Output
//
// 10
curl -s --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--d '{
"collectionName": "quick_setup",
"data": [
{"vector": [0.3847391566891949, -0.5163308707041789, -0.5295937262122905, -0.3592193314357348, 0.9108593166893231], "color": "grey_4070"},
{"vector": [-0.3909198248479646, -0.8726174312444843, 0.4981267572657442, -0.9392508698102204, -0.5470572556090092], "color": "black_3737"},
{"vector": [-0.9098169905660276, -0.9307025336058208, -0.5308685343695865, -0.3852032359431963, -0.8050806646961366], "color": "yellow_7436"},
{"vector": [-0.05064204615748724, 0.6058571389881378, 0.26812302147792155, 0.4862225881265785, -0.27042586524166445], "color": "grey_9883"},
{"vector": [-0.8610792440629793, 0.5278969698864726, 0.09065723848982965, -0.8685651142668274, 0.5912780986996793], "color": "green_8111"},
{"vector": [0.4814454540587043, -0.23573937400668377, -0.14938260011601723, 0.08275006479687019, 0.6726732239961157], "color": "orange_2725"},
{"vector": [0.9763298348098068, 0.5777919290849443, 0.9579310732153326, 0.8951091168874232, 0.46917481926682525], "color": "black_6073"},
{"vector": [0.326134221411539, 0.6870356809753577, 0.7977120714123429, 0.4305198158670587, -0.14894148480426983], "color": "purple_1285"},
{"vector": [0.8709056428858379, 0.021264532993509055, -0.8042932327188321, -0.007299919034885249, 0.14411861700299666], "color": "green_3127"},
{"vector": [-0.8182282159972083, -0.7882247281939101, -0.1870871133115657, 0.07914806834708976, 0.9825978431531959], "color": "blue_6372"}
]
}'
# {
# "code": 200,
# "data": {
# "insertCount": 10,
# "insertIds": [
# "448985546440864743",
# "448985546440864744",
# "448985546440864745",
# "448985546440864746",
# "448985546440864747",
# "448985546440864748",
# "448985546440864749",
# "448985546440864750",
# "448985546440864751",
# "448985546440864752"
# ]
# }
# }
假设您已通过快速创建的方式完成了 collection 创建。通过以上代码:
-
插入的数据为字典列表,每个字典代表一条数据记录,即 entity。
-
每个字典包含一个名为 color 的非 schema 定义字段。
-
每个字典包含与预定义和动态字段相对应的键值。
通过 RESTful API 创建的 collection 启用了 AutoID,因此插入数据时应跳过主键字段。
增加数据量
如果您希望仅基于已插入的 10 个 entity 进行 search 或 query,则可以跳过此步骤。为帮助您更深入了解 Zilliz Cloud 集群或 的搜索性能,可使用以下代码片段向 collection 中随机添加更多 entity。
- Python
- Java
- NodeJS
- cURL
import time
# 5. Insert more data into the collection
# 5.1. Prepare data
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
data = [ {
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": f"{random.choice(colors)}_{str(random.randint(1000, 9999))}"
} for i in range(1000) ]
# 5.2. Insert data
res = client.insert(
collection_name="quick_setup",
data=data[10:]
)
print(res)
# Output
#
# {
# "insert_count": 990
# }
# Wait for a while
time.sleep(5)
// 5. Insert more data for the sake of search
// 5.1 Prepare data
insertData = new ArrayList<>();
List<String> colors = Arrays.asList("green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey");
for (int i = 10; i < 1000; i++) {
Random rand = new Random();
JSONObject row = new JSONObject();
row.put("id", Long.valueOf(i));
row.put("vector", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));
row.put("color", colors.get(rand.nextInt(colors.size()-1)) + '_' + rand.nextInt(1000));
insertData.add(row);
}
// 5.2 Insert data
insertReq = InsertReq.builder()
.collectionName("quick_setup")
.data(insertData)
.build();
res = client.insert(insertReq);
System.out.println(JSONObject.toJSON(res));
// Output:
// {"insertCnt": 990}
// 5.3 Wait for a while to ensure data is indexed
Thread.sleep(5000);
// 5. Insert more records
data = []
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
for (i =5; i < 1000; i++) {
vector = [(Math.random() * (0.99 - 0.01) + 0.01), (Math.random() * (0.99 - 0.01) + 0.01), (Math.random() * (0.99 - 0.01) + 0.01), (Math.random() * (0.99 - 0.01) + 0.01), (Math.random() * (0.99 - 0.01) + 0.01)]
color = colors[Math.floor(Math.random() * colors.length)] + "_" + Math.floor(Math.random() * (9999 - 1000) + 1000)
data.push({id: i, vector: vector, color: color})
}
res = await client.insert({
collection_name: "quick_setup",
data: data
})
console.log(res.insert_cnt)
// Output
//
// 995
await sleep(5000)
- Bash Code
- Code for Generating Random Floats
# 7. Insert more fields
for i in {1..10}; do
DATA=$(python random_floats.py)
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data-raw "{
\"collectionName\": \"quick_setup\",
\"data\": ${DATA}
}"
sleep 1
done
# The above script inserts 1,000 records in an iteration of 10 times.
# The following is the response of a single request
# {
# "code": 200,
# "data": {
# "insertCount": 100,
# "insertIds": [
# "448985546440864754",
# "448985546440864755",
# "448985546440864756",
# "448985546440864757",
# "448985546440864758",
# "448985546440864759",
# "448985546440864760",
# "448985546440864761",
# "448985546440864762",
# "448985546440864763",
# (there are 90 more insertIds)
# ]
# }
# }
# random_floats.py
import random, json
from sys import argv
if __name__ == '__main__':
data = []
colors = ['red', 'green', 'blue', 'yellow', 'orange', 'purple']
for i in range(100):
data.append({
'vector': [random.uniform(-1, 1) for _ in range(5)],
'color': random.choice(colors) + '_' + str(random.randint(1000, 9999))
})
print(json.dumps(data))
每次调用插入RESTful API,最多可批量插入 100 个 entity。
相似性搜索(search)
您可以基于一条或多条向量 embedding 执行相似性搜索(search)。
插入操作是异步的,立即进行搜索可能得到空结果集。建议插入数据后稍等几秒钟。
Single-vector search
query_vectors
变量是一个列表,包含代表 5 维向量 embedding 的浮点数子列表。
- Python
- Java
- NodeJS
- cURL
# 6.1. Prepare query vectors
query_vectors = [
[0.041732933, 0.013779674, -0.027564144, -0.013061441, 0.009748648]
]
# 6.2. Start search
res = client.search(
collection_name="quick_setup", # target collection
data=query_vectors, # query vectors
limit=3, # number of returned entities
)
print(res)
# Output
#
# [
# [
# {
# "id": 551,
# "distance": 0.08821295201778412,
# "entity": {}
# },
# {
# "id": 296,
# "distance": 0.0800950899720192,
# "entity": {}
# },
# {
# "id": 43,
# "distance": 0.07794742286205292,
# "entity": {}
# }
# ]
# ]
import io.milvus.v2.service.vector.request.SearchReq;
import io.milvus.v2.service.vector.response.SearchResp;
// 6. Search with a single vector
List<List<Float>> singleVectorSearchData = new ArrayList<>();
singleVectorSearchData.add(Arrays.asList(0.041732933f, 0.013779674f, -0.027564144f, -0.013061441f, 0.009748648f));
SearchReq searchReq = SearchReq.builder()
.collectionName("quick_setup")
.data(singleVectorSearchData)
.topK(3)
.build();
SearchResp singleVectorSearchRes = client.search(searchReq);
System.out.println(JSONObject.toJSON(singleVectorSearchRes));
// Output:
// {"searchResults": [[
// {
// "distance": 0.77929854,
// "id": 90,
// "entity": {}
// },
// {
// "distance": 0.76438016,
// "id": 252,
// "entity": {}
// },
// {
// "distance": 0.76274073,
// "id": 727,
// "entity": {}
// }
// ]]}
// 6. Search with a single vector
const query_vector = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]
res = await client.search({
collection_name: "quick_setup",
vectors: query_vector,
limit: 5,
})
console.log(res.results)
// Output
//
// [
// { score: 1, id: '0' },
// { score: 0.749187171459198, id: '160' },
// { score: 0.7374353408813477, id: '109' },
// { score: 0.7352343797683716, id: '120' },
// { score: 0.7103434205055237, id: '721' }
// ]
# 8. Conduct a single vector search
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "quick_setup",
"data": [
[0.3847391566891949, -0.5163308707041789, -0.5295937262122905, -0.3592193314357348, 0.9108593166893231]
],
"annsField": "vector",
"limit": 3
}'
# {
# "code": 200,
# "data": [
# {
# "distance": 0,
# "id": 448985546440864743
# },
# {
# "distance": 8.83172,
# "id": 448985546440865160
# },
# {
# "distance": 10.112098,
# "id": 448985546440864927
# }
# ]
# }
输出结果是一个列表,内含三个字典型的子列表,字典中包含返回 entity ID 和距离。
Bulk-vector search
您也可以在 query_vectors
中添加多个向量 embedding 进行 bulk-vector search。
- Python
- Java
- NodeJS
- cURL
# 7. Search with multiple vectors
# 7.1. Prepare query vectors
query_vectors = [
[0.041732933, 0.013779674, -0.027564144, -0.013061441, 0.009748648],
[0.0039737443, 0.003020432, -0.0006188639, 0.03913546, -0.00089768134]
]
# 7.2. Start search
res = client.search(
collection_name="quick_setup",
data=query_vectors,
limit=3,
)
print(res)
# Output
#
# [
# [
# {
# "id": 551,
# "distance": 0.08821295201778412,
# "entity": {}
# },
# {
# "id": 296,
# "distance": 0.0800950899720192,
# "entity": {}
# },
# {
# "id": 43,
# "distance": 0.07794742286205292,
# "entity": {}
# }
# ],
# [
# {
# "id": 730,
# "distance": 0.04431751370429993,
# "entity": {}
# },
# {
# "id": 333,
# "distance": 0.04231833666563034,
# "entity": {}
# },
# {
# "id": 232,
# "distance": 0.04221535101532936,
# "entity": {}
# }
# ]
# ]
// 7. Search with multiple vectors
List<List<Float>> multiVectorSearchData = new ArrayList<>();
multiVectorSearchData.add(Arrays.asList(0.041732933f, 0.013779674f, -0.027564144f, -0.013061441f, 0.009748648f));
multiVectorSearchData.add(Arrays.asList(0.0039737443f, 0.003020432f, -0.0006188639f, 0.03913546f, -0.00089768134f));
searchReq = SearchReq.builder()
.collectionName("quick_setup")
.data(multiVectorSearchData)
.topK(3)
.build();
SearchResp multiVectorSearchRes = client.search(searchReq);
System.out.println(JSONObject.toJSON(multiVectorSearchRes));
// Output:
// {"searchResults": [
// [
// {
// "distance": 0.77929854,
// "id": 90,
// "entity": {}
// },
// {
// "distance": 0.76438016,
// "id": 252,
// "entity": {}
// },
// {
// "distance": 0.76274073,
// "id": 727,
// "entity": {}
// }
// ],
// [
// {
// "distance": 0.96298015,
// "id": 767,
// "entity": {}
// },
// {
// "distance": 0.94215965,
// "id": 140,
// "entity": {}
// },
// {
// "distance": 0.9297105,
// "id": 467,
// "entity": {}
// }
// ]
// ]}
// 7. Search with multiple vectors
const query_vectors = [
[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592],
[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]
]
res = await client.search({
collection_name: "quick_setup",
vectors: query_vectors,
limit: 5,
})
console.log(res.results)
// Output
//
// [
// [
// { score: 1, id: '0' },
// { score: 0.749187171459198, id: '160' },
// { score: 0.7374353408813477, id: '109' },
// { score: 0.7352343797683716, id: '120' },
// { score: 0.7103434205055237, id: '721' }
// ],
// [
// { score: 0.9999998807907104, id: '1' },
// { score: 0.983799934387207, id: '247' },
// { score: 0.9833251237869263, id: '851' },
// { score: 0.982724666595459, id: '871' },
// { score: 0.9819263219833374, id: '80' }
// ]
// ]
# 8. Conduct a single vector search
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/search" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "quick_setup",
"data": [
[0.3847391566891949, -0.5163308707041789, -0.5295937262122905, -0.3592193314357348, 0.9108593166893231],
[0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104]
],
"annsField": "vector",
"limit": 3
}'
# {
# "code": 200,
# "data": [
# {
# "distance": 0,
# "id": 448985546440864743
# },
# {
# "distance": 8.83172,
# "id": 448985546440865160
# },
# {
# "distance": 10.112098,
# "id": 448985546440864927
# }
# ]
# }