# app/embedding_cache.py import json import redis redis_client = redis.Redis(host="localhost", port=6379, db=0, decode_responses=True) def get_or_encode(query: str, model) -> list: """ Returns embedding from Redis cache if available, otherwise encodes with model and caches it. Always call this with the NORMALIZED query. """ key = f"emb:{query}" # Check Redis first cached = redis_client.get(key) if cached: print(f"Embedding HIT: {query}") return json.loads(cached) # Cache miss — encode and store print(f"Encoding: {query}") embedding = model.encode(query).tolist() redis_client.setex(key, 86400, json.dumps(embedding)) # TTL = 24hrs print(f"Embedding cached: {query}") return embedding """" Simple and clean — one function `get_or_encode()` that you call everywhere instead of `model.encode()` directly. How it works: First call → model.encode() runs → stored in Redis Second call → returned from Redis → model.encode() never runs """