Use raw query strings as input. Below we use the ~
flag to indicate that the full text query is optional. We also choose the BM25 scorer and return document scores along with the result.
v.set_filter("(~(@job:engineer))")
v.scorer("BM25").with_scores()
index.query(v)
[{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS',
'score': 1.8181817787737895,
'vector_distance': '0',
'user': 'john',
'credit_score': 'high',
'age': '18',
'job': 'engineer',
'office_location': '-122.4194,37.7749'},
{'id': 'user_queries_docs:01JMJJHE2899024DYPXT6424N9',
'score': 0.0,
'vector_distance': '0',
'user': 'derrick',
'credit_score': 'low',
'age': '14',
'job': 'doctor',
'office_location': '-122.4194,37.7749'},
{'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT',
'score': 1.8181817787737895,
'vector_distance': '0',
'user': 'john',
'credit_score': 'high',
'age': '18',
'job': 'engineer',
'office_location': '-122.4194,37.7749'},
{'id': 'user_queries_docs:01JMJJPEYD544WB1TKDBJ3Z3J9',
'score': 0.0,
'vector_distance': '0',
'user': 'derrick',
'credit_score': 'low',
'age': '14',
'job': 'doctor',
'office_location': '-122.4194,37.7749'},
{'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ',
'score': 1.8181817787737895,
'vector_distance': '0.109129190445',
'user': 'tyler',
'credit_score': 'high',
'age': '100',
'job': 'engineer',
'office_location': '-122.0839,37.3861'},
{'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND',
'score': 1.8181817787737895,
'vector_distance': '0.109129190445',
'user': 'tyler',
'credit_score': 'high',
'age': '100',
'job': 'engineer',
'office_location': '-122.0839,37.3861'},
{'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V',
'score': 0.0,
'vector_distance': '0.158808946609',
'user': 'tim',
'credit_score': 'high',
'age': '12',
'job': 'dermatologist',
'office_location': '-122.0839,37.3861'},
{'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ',
'score': 0.0,
'vector_distance': '0.158808946609',
'user': 'tim',
'credit_score': 'high',
'age': '12',
'job': 'dermatologist',
'office_location': '-122.0839,37.3861'},
{'id': 'user_queries_docs:01JMJJHE28NR7KF0EZEA433T2J',
'score': 0.0,
'vector_distance': '0.217882037163',
'user': 'taimur',
'credit_score': 'low',
'age': '15',
'job': 'CEO',
'office_location': '-122.0839,37.3861'},
{'id': 'user_queries_docs:01JMJJPEYD9EAVGJ2AZ8K9VX7Q',
'score': 0.0,
'vector_distance': '0.217882037163',
'user': 'taimur',
'credit_score': 'low',
'age': '15',
'job': 'CEO',
'office_location': '-122.0839,37.3861'}]
Geographic Filters
Geographic filters are filters that are applied to geographic fields. These filters are used to find results that are within a certain distance of a given point. The distance is specified in kilometers, miles, meters, or feet. A radius can also be specified to find results within a certain radius of a given point.
from redisvl.query.filter import Geo, GeoRadius
# within 10 km of San Francisco office
geo_filter = Geo("office_location") == GeoRadius(-122.4194, 37.7749, 10, "km")
v.set_filter(geo_filter)
result_print(index.query(v))
score | vector_distance | user | credit_score | age | job | office_location |
---|
0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 |
0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 |
0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.4545454446934474 | 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.4545454446934474 | 0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
# within 100 km Radius of San Francisco office
geo_filter = Geo("office_location") == GeoRadius(-122.4194, 37.7749, 100, "km")
v.set_filter(geo_filter)
result_print(index.query(v))
score | vector_distance | user | credit_score | age | job | office_location |
---|
0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 |
0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.4545454446934474 | 0 | john | high | 18 | engineer | -122.4194,37.7749 |
0.4545454446934474 | 0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.4545454446934474 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.4545454446934474 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.4545454446934474 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.4545454446934474 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.4545454446934474 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.4545454446934474 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
# not within 10 km Radius of San Francisco office
geo_filter = Geo("office_location") != GeoRadius(-122.4194, 37.7749, 10, "km")
v.set_filter(geo_filter)
result_print(index.query(v))
score | vector_distance | user | credit_score | age | job | office_location |
---|
0.0 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.0 | 0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.0 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.0 | 0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.0 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.0 | 0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.0 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
0.0 | 0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
Combining Filters
In this example, we will combine a numeric filter with a tag filter. We will search for users that are between the ages of 20 and 30 and have a job of "engineer".
Intersection ("and")
t = Tag("credit_score") == "high"
low = Num("age") >= 18
high = Num("age") <= 100
combined = t & low & high
v = VectorQuery([0.1, 0.1, 0.5],
"user_embedding",
return_fields=["user", "credit_score", "age", "job", "office_location"],
filter_expression=combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0 | john | high | 18 | engineer | -122.4194,37.7749 |
0 | john | high | 18 | engineer | -122.4194,37.7749 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
Union ("or")
The union of two queries is the set of all results that are returned by either of the two queries. The union of two queries is performed using the |
operator.
low = Num("age") < 18
high = Num("age") > 93
combined = low | high
v.set_filter(combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
Dynamic Combination
There are often situations where you may or may not want to use a filter in a
given query. As shown above, filters will except the None
type and revert
to a wildcard filter essentially returning all results.
The same goes for filter combinations which enables rapid reuse of filters in
requests with different parameters as shown below. This removes the need for
a number of "if-then" conditionals to test for the empty case.
def make_filter(age=None, credit=None, job=None):
flexible_filter = (
(Num("age") > age) &
(Tag("credit_score") == credit) &
(Text("job") % job)
)
return flexible_filter
# all parameters
combined = make_filter(age=18, credit="high", job="engineer")
v.set_filter(combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
# just age and credit_score
combined = make_filter(age=18, credit="high")
v.set_filter(combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
# just age
combined = make_filter(age=18)
v.set_filter(combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.266666650772 | nancy | high | 94 | doctor | -122.4194,37.7749 |
0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
0.653301358223 | joe | medium | 35 | dentist | -122.0839,37.3861 |
# no filters
combined = make_filter()
v.set_filter(combined)
result_print(index.query(v))
vector_distance | user | credit_score | age | job | office_location |
---|
0 | john | high | 18 | engineer | -122.4194,37.7749 |
0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0 | john | high | 18 | engineer | -122.4194,37.7749 |
0 | derrick | low | 14 | doctor | -122.4194,37.7749 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.109129190445 | tyler | high | 100 | engineer | -122.0839,37.3861 |
0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.158808946609 | tim | high | 12 | dermatologist | -122.0839,37.3861 |
0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
0.217882037163 | taimur | low | 15 | CEO | -122.0839,37.3861 |
Non-vector Queries
In some cases, you may not want to run a vector query, but just use a FilterExpression
similar to a SQL query. The FilterQuery
class enable this functionality. It is similar to the VectorQuery
class but soley takes a FilterExpression
.
from redisvl.query import FilterQuery
has_low_credit = Tag("credit_score") == "low"
filter_query = FilterQuery(
return_fields=["user", "credit_score", "age", "job", "location"],
filter_expression=has_low_credit
)
results = index.query(filter_query)
result_print(results)
user | credit_score | age | job |
---|
derrick | low | 14 | doctor |
taimur | low | 15 | CEO |
derrick | low | 14 | doctor |
taimur | low | 15 | CEO |
Count Queries
In some cases, you may need to use a FilterExpression
to execute a CountQuery
that simply returns the count of the number of entities in the pertaining set. It is similar to the FilterQuery
class but does not return the values of the underlying data.
from redisvl.query import CountQuery
has_low_credit = Tag("credit_score") == "low"
filter_query = CountQuery(filter_expression=has_low_credit)
count = index.query(filter_query)
print(f"{count} records match the filter expression {str(has_low_credit)} for the given index.")
4 records match the filter expression @credit_score:{low} for the given index.
Range Queries
Range Queries are a useful method to perform a vector search where only results within a vector distance_threshold
are returned. This enables the user to find all records within their dataset that are similar to a query vector where "similar" is defined by a quantitative value.
from redisvl.query import RangeQuery
range_query = RangeQuery(
vector=[0.1, 0.1, 0.5],
vector_field_name="user_embedding",
return_fields=["user", "credit_score", "age", "job", "location"],
distance_threshold=0.2
)
# same as the vector query or filter query
results = index.query(range_query)
result_print(results)
vector_distance | user | credit_score | age | job |
---|
0 | john | high | 18 | engineer |
0 | derrick | low | 14 | doctor |
0 | john | high | 18 | engineer |
0 | derrick | low | 14 | doctor |
0.109129190445 | tyler | high | 100 | engineer |
0.109129190445 | tyler | high | 100 | engineer |
0.158808946609 | tim | high | 12 | dermatologist |
0.158808946609 | tim | high | 12 | dermatologist |
We can also change the distance threshold of the query object between uses if we like. Here we will set distance_threshold==0.1
. This means that the query object will return all matches that are within 0.1 of the query object. This is a small distance, so we expect to get fewer matches than before.
range_query.set_distance_threshold(0.1)
result_print(index.query(range_query))
vector_distance | user | credit_score | age | job |
---|
0 | john | high | 18 | engineer |
0 | derrick | low | 14 | doctor |
0 | john | high | 18 | engineer |
0 | derrick | low | 14 | doctor |
Range queries can also be used with filters like any other query type. The following limits the results to only include records with a job
of engineer
while also being within the vector range (aka distance).
is_engineer = Text("job") == "engineer"
range_query.set_filter(is_engineer)
result_print(index.query(range_query))
vector_distance | user | credit_score | age | job |
---|
0 | john | high | 18 | engineer |
0 | john | high | 18 | engineer |
Advanced Query Modifiers
See all modifier options available on the query API docs: https://redis.io/docs/latest/integrate/redisvl/api/query
# Sort by a different field and change dialect
v = VectorQuery(
vector=[0.1, 0.1, 0.5],
vector_field_name="user_embedding",
return_fields=["user", "credit_score", "age", "job", "office_location"],
num_results=5,
filter_expression=is_engineer
).sort_by("age", asc=False).dialect(3)
result = index.query(v)
result_print(result)
vector_distance | age | user | credit_score | job | office_location |
---|
0.109129190445 | 100 | tyler | high | engineer | -122.0839,37.3861 |
0.109129190445 | 100 | tyler | high | engineer | -122.0839,37.3861 |
0 | 18 | john | high | engineer | -122.4194,37.7749 |
0 | 18 | john | high | engineer | -122.4194,37.7749 |
Raw Redis Query String
Sometimes it's helpful to convert these classes into their raw Redis query strings.
# check out the complex query from above
str(v)
'@job:("engineer")=>[KNN 5 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY age DESC DIALECT 3 LIMIT 0 5'
t = Tag("credit_score") == "high"
str(t)
'@credit_score:{high}'
t = Tag("credit_score") == "high"
low = Num("age") >= 18
high = Num("age") <= 100
combined = t & low & high
str(combined)
'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])'
The RedisVL SearchIndex
class exposes a search()
method which is a simple wrapper around the FT.SEARCH
API.
Provide any valid Redis query string.
results = index.search(str(t))
for r in results.docs:
print(r.__dict__)
{'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28EX13NEE7BGBM8FH3', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDAN0M3V7EQEVPS6HX', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'}