You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.0 KiB
38 lines
1.0 KiB
# app/embedding_cache.py
|
|
import json
|
|
import redis
|
|
|
|
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
|
|
|
|
|
|
def get_or_encode(query: str, model) -> list:
|
|
"""
|
|
Returns embedding from Redis cache if available,
|
|
otherwise encodes with model and caches it.
|
|
Always call this with the NORMALIZED query.
|
|
"""
|
|
key = f"emb:{query}"
|
|
|
|
# Check Redis first
|
|
cached = redis_client.get(key)
|
|
if cached:
|
|
print(f"Embedding HIT: {query}")
|
|
return json.loads(cached)
|
|
|
|
# Cache miss — encode and store
|
|
print(f"Encoding: {query}")
|
|
embedding = model.encode(query).tolist()
|
|
redis_client.setex(key, 86400, json.dumps(embedding)) # TTL = 24hrs
|
|
print(f"Embedding cached: {query}")
|
|
|
|
return embedding
|
|
""""
|
|
|
|
Simple and clean — one function `get_or_encode()` that you call everywhere instead of `model.encode()` directly.
|
|
|
|
How it works:
|
|
|
|
First call → model.encode() runs → stored in Redis
|
|
Second call → returned from Redis → model.encode() never runs
|
|
|
|
""" |