Caching ClamAV scan results by file hash in Node.js
In some upload workflows the same file is uploaded multiple times: a shared document sent by many team members, a standard template, or a logo that every new user submits during onboarding. Scanning an identical file repeatedly wastes CPU and adds latency. A simple cache keyed on the file's SHA-256 hash eliminates redundant scans.
The logic is: compute the hash of the uploaded bytes, check if a verdict for that hash is already in the cache, and return it immediately without invoking pompelmi. On a cache miss, scan normally and store the result.
When caching is safe — and when it is not
Caching scan results is safe under one condition: the cached verdict must be tied to a specific database version. A file that was clean against an old database may be malicious against a newer one once fresh signatures are published.
Verdict.Clean indefinitely.
Always set a TTL that forces a re-scan within a reasonable window (24–48
hours is a sensible default). When the ClamAV database updates, flush the
cache.
Caching Verdict.Malicious is safe to keep longer — a file that
was malicious will remain malicious. Caching Verdict.ScanError
is discouraged — re-scan on the next attempt in case the error was transient.
Install
npm install pompelmi ioredis
Cache helper
// lib/scanWithCache.js
const { createHash, createReadStream } = require('crypto');
const { scan, Verdict } = require('pompelmi');
const Redis = require('ioredis');
const redis = new Redis({
host: process.env.REDIS_HOST || '127.0.0.1',
port: 6379,
});
const CLEAN_TTL_SECONDS = 60 * 60 * 24; // 24 hours — re-scan if DB might have updated
const MALICIOUS_TTL_SECONDS = 60 * 60 * 24 * 7; // 7 days — malicious stays malicious
async function fileHash(filePath) {
return new Promise((resolve, reject) => {
const hash = createHash('sha256');
const stream = createReadStream(filePath);
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
async function scanWithCache(filePath, options = {}) {
const hash = await fileHash(filePath);
const cacheKey = `scan:${hash}`;
// Check cache first
const cached = await redis.get(cacheKey);
if (cached) {
const label = cached; // 'Clean' | 'Malicious'
if (label === 'Clean') return Verdict.Clean;
if (label === 'Malicious') return Verdict.Malicious;
// ScanError is never cached — fall through to rescan
}
// Cache miss — run the actual scan
const verdict = await scan(filePath, options);
// Store result (never cache ScanError)
if (verdict === Verdict.Clean) {
await redis.set(cacheKey, 'Clean', 'EX', CLEAN_TTL_SECONDS);
} else if (verdict === Verdict.Malicious) {
await redis.set(cacheKey, 'Malicious', 'EX', MALICIOUS_TTL_SECONDS);
}
return verdict;
}
module.exports = { scanWithCache };
Using the cache in an upload endpoint
Drop in scanWithCache anywhere you currently call
scan() directly:
const express = require('express');
const multer = require('multer');
const { Verdict } = require('pompelmi');
const { scanWithCache } = require('./lib/scanWithCache');
const fs = require('fs');
const os = require('os');
const app = express();
const upload = multer({ dest: os.tmpdir(), limits: { fileSize: 50 * 1024 * 1024 } });
app.post('/upload', upload.single('file'), async (req, res) => {
if (!req.file) return res.status(400).json({ error: 'No file provided.' });
const tmpPath = req.file.path;
try {
const verdict = await scanWithCache(tmpPath);
if (verdict === Verdict.Malicious) {
return res.status(400).json({ error: 'Malware detected. Upload rejected.' });
}
if (verdict === Verdict.ScanError) {
return res.status(422).json({ error: 'Scan incomplete. Upload rejected.' });
}
return res.json({ status: 'ok', name: req.file.originalname });
} finally {
if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
}
});
Cache invalidation after database updates
When freshclam downloads a new virus database, previously clean files may
now be malicious. Flush all scan: keys from Redis whenever
the database updates:
// flush-scan-cache.js — run after each freshclam update
const Redis = require('ioredis');
const redis = new Redis();
async function flushScanCache() {
let cursor = '0';
let deleted = 0;
do {
const [next, keys] = await redis.scan(cursor, 'MATCH', 'scan:*', 'COUNT', 100);
cursor = next;
if (keys.length) {
await redis.del(...keys);
deleted += keys.length;
}
} while (cursor !== '0');
console.log(`Flushed ${deleted} cached scan results.`);
}
flushScanCache().catch(console.error);
Call this script from your freshclam post-update hook or from the Kubernetes CronJob that runs freshclam. See Keeping ClamAV virus definitions current in production for the freshclam cron/CronJob setup.
In-memory alternative (no Redis)
For small deployments or single-process Node.js apps, an in-process LRU cache avoids the Redis dependency. Results are lost on restart, which is acceptable — files will simply be re-scanned:
// Simple in-memory cache with a fixed TTL and cap
const cache = new Map();
const MAX_TTL = 24 * 60 * 60 * 1000; // 24 h in ms
const MAX_ENTRIES = 10_000;
function getCached(hash) {
const entry = cache.get(hash);
if (!entry) return null;
if (Date.now() - entry.ts > MAX_TTL) { cache.delete(hash); return null; }
return entry.verdict;
}
function setCached(hash, verdict) {
if (cache.size >= MAX_ENTRIES) {
// Evict the oldest entry
cache.delete(cache.keys().next().value);
}
cache.set(hash, { verdict, ts: Date.now() });
}
Next steps
- Want broader performance improvements beyond caching? See Optimising ClamAV scan performance in Node.js.
- Keeping the database current so caches stay valid? See Keeping ClamAV virus definitions current in production.
- Need async scanning instead of synchronous? See Background virus scanning with BullMQ.