跳到主要内容

Get 和 Scalar Query

本指南介绍如何通过 ID 获取(get) entity 并执行标量查询(scalar query)。Scalar query 是指可以根据指定的过滤条件筛选匹配的 entity。

概览

Scalar query 使用布尔表达式按标量条件过滤 collection 内的 entity。查询结果将返回符合条件的 entity。与基于距离进行向量搜索(search)不同,scalar query 依据特定标准筛选 entity。

在 Zilliz Cloud 上,过滤器(filter)是由字段名称和运算符组成的字符串。本指南包含多个 filter 示例,详细运算符信息请参阅运算符快速参考

开始前

以下代码示例展示如何连接到 Zilliz Cloud 集群,快速创建collection,并向其中插入 1000 条随机生成的 entity。

步骤 1:创建 collection

import random, time
from pymilvus import MilvusClient

CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT"
TOKEN = "YOUR_CLUSTER_TOKEN"

# 1. Set up a Milvus client
client = MilvusClient(
uri=CLUSTER_ENDPOINT,
token=TOKEN
)

# 2. Create a collection
client.create_collection(
collection_name="quick_setup",
dimension=5,
)

步骤 2:随机插入 entity

# 3. Insert randomly generated vectors 
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
data = []

for i in range(1000):
current_color = random.choice(colors)
current_tag = random.randint(1000, 9999)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

print(data[0])

# Output
#
# {
# "id": 0,
# "vector": [
# 0.7371107800002366,
# -0.7290389773227746,
# 0.38367002049157417,
# 0.36996000494220627,
# -0.3641898951462792
# ],
# "color": "yellow",
# "tag": 6781,
# "color_tag": "yellow_6781"
# }

res = client.insert(
collection_name="quick_setup",
data=data
)

print(res)

# Output
#
# {
# "insert_count": 1000,
# "ids": [
# 0,
# 1,
# 2,
# 3,
# 4,
# 5,
# 6,
# 7,
# 8,
# 9,
# "(990 more items hidden)"
# ]
# }

步骤 3:创建 partition 并插入更多 entity

# 4. Create partitions and insert more entities
client.create_partition(
collection_name="quick_setup",
partition_name="partitionA"
)

client.create_partition(
collection_name="quick_setup",
partition_name="partitionB"
)

data = []

for i in range(1000, 1500):
current_color = random.choice(colors)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

res = client.insert(
collection_name="quick_setup",
data=data,
partition_name="partitionA"
)

print(res)

# Output
#
# {
# "insert_count": 500,
# "ids": [
# 1000,
# 1001,
# 1002,
# 1003,
# 1004,
# 1005,
# 1006,
# 1007,
# 1008,
# 1009,
# "(490 more items hidden)"
# ]
# }

data = []

for i in range(1500, 2000):
current_color = random.choice(colors)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

res = client.insert(
collection_name="quick_setup",
data=data,
partition_name="partitionB"
)

print(res)

# Output
#
# {
# "insert_count": 500,
# "ids": [
# 1500,
# 1501,
# 1502,
# 1503,
# 1504,
# 1505,
# 1506,
# 1507,
# 1508,
# 1509,
# "(490 more items hidden)"
# ]
# }

按 ID 获取(get)entity

如果您已知 entity ID,可以使用 get() 方法获取对应的 entity 信息:

# 5. Get entities by ID
res = client.get(
collection_name="quick_setup",
ids=[0, 1, 2]
)

print(res)

# Output
#
# [
# {
# "id": 0,
# "vector": [
# 0.7371108,
# -0.72903895,
# 0.38367003,
# 0.36996,
# -0.3641899
# ],
# "color": "yellow",
# "tag": 6781,
# "color_tag": "yellow_6781"
# },
# {
# "id": 1,
# "vector": [
# -0.10924426,
# -0.7659806,
# 0.8613359,
# 0.65219676,
# -0.06385158
# ],
# "color": "pink",
# "tag": 1023,
# "color_tag": "pink_1023"
# },
# {
# "id": 2,
# "vector": [
# 0.402096,
# -0.74742633,
# -0.901683,
# 0.6292514,
# 0.77286446
# ],
# "color": "blue",
# "tag": 3972,
# "color_tag": "blue_3972"
# }
# ]

从 partition 中获取(get) entity

您也可以从指定的 partition 中获取 entity。

# 5. Get entities from partitions
res = client.get(
collection_name="quick_setup",
ids=[1000, 1001, 1002],
partition_names=["partitionA"]
)

print(res)

# Output
#
# [
# {
# "color": "green",
# "tag": 1995,
# "color_tag": "green_1995",
# "id": 1000,
# "vector": [
# 0.7807706,
# 0.8083741,
# 0.17276904,
# -0.8580777,
# 0.024156934
# ]
# },
# {
# "color": "red",
# "tag": 1995,
# "color_tag": "red_1995",
# "id": 1001,
# "vector": [
# 0.065074645,
# -0.44882354,
# -0.29479212,
# -0.19798489,
# -0.77542555
# ]
# },
# {
# "color": "green",
# "tag": 1995,
# "color_tag": "green_1995",
# "id": 1002,
# "vector": [
# 0.027934508,
# -0.44199976,
# -0.40262738,
# -0.041511405,
# 0.024782438
# ]
# }
# ]

使用基本运算符

在本节中,您将找到如何在 scalar query 中使用基本运算符的示例。这些 filter 也可以应用于向量搜索数据删除

  • 筛选 tag 在 1000 到 1500 的 entity

    # 6. Use basic operators

    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter="1000 < tag < 1500",
    output_fields=["color_tag"],
    # highlight-end
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "id": 1,
    # "color_tag": "pink_1023"
    # },
    # {
    # "id": 41,
    # "color_tag": "red_1483"
    # },
    # {
    # "id": 44,
    # "color_tag": "grey_1146"
    # }
    # ]
  • 筛选 colorbrown 的 entity

    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter='color == "brown"',
    output_fields=["color_tag"],
    # highlight-end
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "brown_5343",
    # "id": 15
    # },
    # {
    # "color_tag": "brown_3167",
    # "id": 27
    # },
    # {
    # "color_tag": "brown_3100",
    # "id": 30
    # }
    # ]
  • 筛选 color 不为 greenpurple 的 entity

    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter='color not in ["green", "purple"]',
    output_fields=["color_tag"],
    # highlight-end
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "yellow_6781",
    # "id": 0
    # },
    # {
    # "color_tag": "pink_1023",
    # "id": 1
    # },
    # {
    # "color_tag": "blue_3972",
    # "id": 2
    # }
    # ]
  • 筛选 color_tagred 开始的 entity

    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter='color_tag like "red%"',
    output_fields=["color_tag"],
    # highlight-end
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "red_6443",
    # "id": 17
    # },
    # {
    # "color_tag": "red_1483",
    # "id": 41
    # },
    # {
    # "color_tag": "red_4348",
    # "id": 47
    # }
    # ]
  • 筛选 colorredtag 值在 1000 到 1500 的 entity

    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter='(color == "red") and (1000 < tag < 1500)',
    output_fields=["color_tag"],
    # highlight-end
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "red_1483",
    # "id": 41
    # },
    # {
    # "color_tag": "red_1100",
    # "id": 94
    # },
    # {
    # "color_tag": "red_1343",
    # "id": 526
    # }
    # ]

使用高级运算符

在本节中,您将找到如何在 scalar query 中使用高级运算符的示例。这些 filter 也可以应用于向量搜索数据删除

统计 entity 数量

  • 统计 collection 中所有的 entity 数量

    # 7. Use advanced operators

    # Count the total number of entities in a collection
    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    output_fields=["count(*)"]
    # highlight-end
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 2000
    # }
    # ]
  • 统计 collection 中的某个 partition 下的 entity 数量

    # Count the number of entities in a partition
    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    output_fields=["count(*)"],
    partition_names=["partitionA"]
    # highlight-end
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 500
    # }
    # ]
  • 统计符合指定标量条件的 entity 数量

    # Count the number of entities that match a specific filter
    res = client.query(
    collection_name="quick_setup",
    # highlight-start
    filter='(color == "red") and (1000 < tag < 1500)',
    output_fields=["count(*)"],
    # highlight-end
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 3
    # }
    # ]

运算符快速参考

基本运算符

在 Zilliz Cloud 中,布尔表达式是指由字段和运算符组成的字符串。下表列举了 Zilliz Cloud 支持的基本运算符。

运算符

描述

and (&&)

当两个操作数都为真时返回真

or (||)

当任一操作数为真时返回真

+, -, *, /

加法、减法、乘法和除法

**

幂运算

%

取模

<, >

小于、大于

==, !=

等于、不等于

<=, >=

小于或等于、大于或等于

not

反转给定条件的结果

like

使用通配符运算符比较相似值。

例如,使用 like "prefix%" 匹配以 "prefix" 开头的字符串。

in

测试表达式是否匹配值列表中的任何值。

高级运算符

  • count(*)

    统计 collection 中 entity 的数量。使用 count(*) 作为输出字段可以以获取 collection 或 partition 中 entity 的数量。

    📘说明

    count(*) 适用于已加载的 collection。如果要使用该运算符,应将其作为输出的唯一字段。