Caching
Distributed Caching
Distributed cache invalidation and consistency.
Distributed Caching
When you have multiple servers, caching becomes complicated. Each server has its own local cache, leading to inconsistency. Distributed caching solves this problem.
Cache Invalidation Challenges
The Cache Coherence Problem
Server A Cache: user:123 → {name: "Alice", email: "old@email.com"}
Server B Cache: user:123 → {name: "Alice", email: "old@email.com"}
User updates email via Server A:
Database: user:123 → {name: "Alice", email: "new@email.com"}
Result: Server B still has stale data!Invalidating Across Nodes
// Bad: Each server invalidates only its local cache
async function updateUser(userId, data) {
await db.users.update(userId, data);
localCache.delete(`user:${userId}`); // Only clears this server's cache
}
// Better: Invalidate across all servers
async function updateUser(userId, data) {
await db.users.update(userId, data);
// Notify all servers to invalidate
await messageBus.publish('cache:invalidate', {
key: `user:${userId}`,
timestamp: Date.now()
});
}
// All servers listen for invalidation events
messageBus.subscribe('cache:invalidate', async (event) => {
// Only invalidate if our cache is older than the update
const cachedTime = localCache.getTime(event.key);
if (cachedTime < event.timestamp) {
localCache.delete(event.key);
}
});Distributed Cache Architectures
1. Centralized Cache (Redis Cluster)
All application servers connect to a central cache cluster.
// Redis Cluster setup
const redis = new Redis.Cluster([
{ host: 'cache-1.example.com', port: 6379 },
{ host: 'cache-2.example.com', port: 6379 },
{ host: 'cache-3.example.com', port: 6379 }
], {
enableOfflineQueue: false,
retryDelayOnFailover: 100
});
// Usage is the same as single Redis
await redis.set('user:123', userData);
await redis.expire('user:123', 3600);Pros:
- Strong consistency guarantees
- Simple application code
- Data persists across server restarts
Cons:
- Network latency for every cache operation
- Single point of failure (mitigated by clustering)
- Bandwidth bottleneck at scale
2. Multi-Level Cache (Local + Distributed)
Combine fast local cache with consistent distributed cache.
class MultiLevelCache {
constructor(localCache, distributedCache) {
this.local = localCache;
this.distributed = distributedCache;
}
async get(key) {
// Check local first (fastest)
let value = await this.local.get(key);
if (value) return value;
// Check distributed cache
value = await this.distributed.get(key);
if (value) {
// Populate local cache for next request
await this.local.set(key, value, { ttl: 60 }); // Short local TTL
}
return value;
}
async set(key, value, options = {}) {
// Set both caches
await Promise.all([
this.local.set(key, value, { ttl: Math.min(options.ttl, 300) }), // Max 5 min local
this.distributed.set(key, value, options)
]);
}
async invalidate(key) {
await Promise.all([
this.local.delete(key),
this.distributed.delete(key)
]);
// Notify other nodes to clear local cache
await this.notifyInvalidation(key);
}
}3. Write-Through Distributed Cache
Every write goes through the cache layer.
class WriteThroughCache {
constructor(cache, database) {
this.cache = cache;
this.db = database;
}
async update(table, id, data) {
const key = `${table}:${id}`;
// Update database first (source of truth)
await this.db.query(`UPDATE ${table} SET ? WHERE id = ?`, [data, id]);
// Update distributed cache
await this.cache.set(key, data);
// Invalidate any local caches
await this.broadcastInvalidation(key);
return data;
}
async get(table, id) {
const key = `${table}:${id}`;
// Try cache first
let data = await this.cache.get(key);
if (data) return data;
// Load from database
data = await this.db.query(`SELECT * FROM ${table} WHERE id = ?`, [id]);
// Populate cache
if (data) {
await this.cache.set(key, data);
}
return data;
}
}Consistency Patterns
Read-After-Write Consistency
class ConsistentCache {
async updateUser(userId, updates) {
const userKey = `user:${userId}`;
// 1. Update database (single transaction)
const updated = await this.db.transaction(async (trx) => {
const user = await trx('users').where({ id: userId }).first();
const merged = { ...user, ...updates };
await trx('users').where({ id: userId }).update(merged);
return merged;
});
// 2. Invalidate all related cache keys
await this.invalidateMultiple([
`user:${userId}`,
`user:${userId}:profile`,
`user:${userId}:settings`
]);
// 3. Write new data to cache (optional - can lazy load)
await this.cache.set(`user:${userId}`, updated);
return updated;
}
async invalidateMultiple(keys) {
// Batch invalidation for efficiency
await Promise.all(keys.map(key => Promise.all([
this.localCache?.delete(key),
this.distributedCache.delete(key),
this.notifyInvalidation(key)
])));
}
}Versioned Cache Keys
Include a version number in cache keys to handle updates.
class VersionedCache {
async getData(type, id) {
// Get current version
const version = await this.distributedCache.get(`version:${type}:${id}`) || 0;
const cacheKey = `${type}:${id}:v${version}`;
let data = await this.localCache.get(cacheKey);
if (data) return data;
data = await this.distributedCache.get(cacheKey);
if (data) {
await this.localCache.set(cacheKey, data);
return data;
}
// Load from database
data = await this.loadFromDatabase(type, id);
// Store with current version
await Promise.all([
this.localCache.set(cacheKey, data),
this.distributedCache.set(cacheKey, data)
]);
return data;
}
async invalidate(type, id) {
// Increment version - old cached data becomes "stale"
const newVersion = await this.distributedCache.incr(`version:${type}:${id}`);
// Notify other nodes
await this.notifyVersionUpdate(type, id, newVersion);
}
}Cache Coordination Mechanisms
Redis Pub/Sub for Invalidation
class CacheCoordinator {
constructor(redis, localCache) {
this.redis = redis;
this.localCache = localCache;
this.nodeId = process.env.NODE_ID || 'unknown';
// Subscribe to invalidation channel
this.redis.subscribe('cache:invalidate');
this.redis.on('message', (channel, message) => {
if (channel === 'cache:invalidate') {
this.handleInvalidation(JSON.parse(message));
}
});
}
async invalidate(key, sourceNodeId = this.nodeId) {
// Clear local cache
await this.localCache.delete(key);
// Notify all nodes
await this.redis.publish('cache:invalidate', JSON.stringify({
key,
sourceNodeId,
timestamp: Date.now()
}));
}
handleInvalidation(event) {
// Don't process our own invalidations
if (event.sourceNodeId === this.nodeId) return;
// Clear local cache
this.localCache.delete(event.key);
}
}Consistent Hashing for Key Distribution
class ConsistentHashRing {
constructor(nodes = [], replicas = 150) {
this.replicas = replicas;
this.ring = new Map();
this.sortedKeys = [];
nodes.forEach(node => this.addNode(node));
}
addNode(node) {
for (let i = 0; i < this.replicas; i++) {
const key = this.hash(`${node}:${i}`);
this.ring.set(key, node);
this.sortedKeys.push(key);
}
this.sortedKeys.sort();
}
removeNode(node) {
for (let i = 0; i < this.replicas; i++) {
const key = this.hash(`${node}:${i}`);
this.ring.delete(key);
this.sortedKeys = this.sortedKeys.filter(k => k !== key);
}
}
getNode(key) {
if (this.sortedKeys.length === 0) return null;
const hash = this.hash(key);
const index = this.sortedKeys.findIndex(k => k >= hash);
if (index === 0) return this.ring.get(this.sortedKeys[0]);
if (index === -1) return this.ring.get(this.sortedKeys[this.sortedKeys.length - 1]);
return this.ring.get(this.sortedKeys[index]);
}
hash(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return hash;
}
}Distributed Cache Pitfalls
-
Thundering Herd: When cache expires, all nodes rush to refresh
- Use staggered expiration times
- Implement request coalescing
-
Split Brain: Network partitions cause inconsistent caches
- Implement health checks and automatic failover
- Consider eventual consistency models
-
Memory Pressure: One node can affect the entire cluster
- Monitor memory usage across all nodes
- Set appropriate eviction policies
When to Use Distributed Caching
Use when:
- You have multiple application servers
- Data changes infrequently but is read often
- You need consistency across nodes
Skip when:
- Single server deployment
- Data changes frequently
- Cache invalidation is more expensive than recomputation