You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.0 KiB

4 days ago
# app/embedding_cache.py
import json
import redis
redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True)
def get_or_encode(query: str, model) -> list:
"""
Returns embedding from Redis cache if available,
otherwise encodes with model and caches it.
Always call this with the NORMALIZED query.
"""
key = f"emb:{query}"
# Check Redis first
cached = redis_client.get(key)
if cached:
print(f"Embedding HIT: {query}")
return json.loads(cached)
# Cache miss — encode and store
print(f"Encoding: {query}")
embedding = model.encode(query).tolist()
redis_client.setex(key, 86400, json.dumps(embedding)) # TTL = 24hrs
print(f"Embedding cached: {query}")
return embedding
""""
Simple and clean one function `get_or_encode()` that you call everywhere instead of `model.encode()` directly.
How it works:
First call model.encode() runs stored in Redis
Second call returned from Redis model.encode() never runs
"""