Caching ClamAV scan results by file hash in Node.js

In some upload workflows the same file is uploaded multiple times: a shared document sent by many team members, a standard template, or a logo that every new user submits during onboarding. Scanning an identical file repeatedly wastes CPU and adds latency. A simple cache keyed on the file's SHA-256 hash eliminates redundant scans.

The logic is: compute the hash of the uploaded bytes, check if a verdict for that hash is already in the cache, and return it immediately without invoking pompelmi. On a cache miss, scan normally and store the result.

When caching is safe — and when it is not

Caching scan results is safe under one condition: the cached verdict must be tied to a specific database version. A file that was clean against an old database may be malicious against a newer one once fresh signatures are published.

Do not cache Verdict.Clean indefinitely. Always set a TTL that forces a re-scan within a reasonable window (24–48 hours is a sensible default). When the ClamAV database updates, flush the cache.

Caching Verdict.Malicious is safe to keep longer — a file that was malicious will remain malicious. Caching Verdict.ScanError is discouraged — re-scan on the next attempt in case the error was transient.

Install

npm install pompelmi ioredis

Cache helper

// lib/scanWithCache.js
const { createHash, createReadStream } = require('crypto');
const { scan, Verdict } = require('pompelmi');
const Redis = require('ioredis');

const redis = new Redis({
  host: process.env.REDIS_HOST || '127.0.0.1',
  port: 6379,
});

const CLEAN_TTL_SECONDS     = 60 * 60 * 24;  // 24 hours — re-scan if DB might have updated
const MALICIOUS_TTL_SECONDS = 60 * 60 * 24 * 7; // 7 days — malicious stays malicious

async function fileHash(filePath) {
  return new Promise((resolve, reject) => {
    const hash   = createHash('sha256');
    const stream = createReadStream(filePath);
    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end',  () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
}

async function scanWithCache(filePath, options = {}) {
  const hash     = await fileHash(filePath);
  const cacheKey = `scan:${hash}`;

  // Check cache first
  const cached = await redis.get(cacheKey);
  if (cached) {
    const label = cached; // 'Clean' | 'Malicious'
    if (label === 'Clean')     return Verdict.Clean;
    if (label === 'Malicious') return Verdict.Malicious;
    // ScanError is never cached — fall through to rescan
  }

  // Cache miss — run the actual scan
  const verdict = await scan(filePath, options);

  // Store result (never cache ScanError)
  if (verdict === Verdict.Clean) {
    await redis.set(cacheKey, 'Clean', 'EX', CLEAN_TTL_SECONDS);
  } else if (verdict === Verdict.Malicious) {
    await redis.set(cacheKey, 'Malicious', 'EX', MALICIOUS_TTL_SECONDS);
  }

  return verdict;
}

module.exports = { scanWithCache };

Using the cache in an upload endpoint

Drop in scanWithCache anywhere you currently call scan() directly:

const express = require('express');
const multer  = require('multer');
const { Verdict } = require('pompelmi');
const { scanWithCache } = require('./lib/scanWithCache');
const fs = require('fs');
const os = require('os');

const app    = express();
const upload = multer({ dest: os.tmpdir(), limits: { fileSize: 50 * 1024 * 1024 } });

app.post('/upload', upload.single('file'), async (req, res) => {
  if (!req.file) return res.status(400).json({ error: 'No file provided.' });

  const tmpPath = req.file.path;
  try {
    const verdict = await scanWithCache(tmpPath);

    if (verdict === Verdict.Malicious) {
      return res.status(400).json({ error: 'Malware detected. Upload rejected.' });
    }
    if (verdict === Verdict.ScanError) {
      return res.status(422).json({ error: 'Scan incomplete. Upload rejected.' });
    }

    return res.json({ status: 'ok', name: req.file.originalname });

  } finally {
    if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
  }
});

Cache invalidation after database updates

When freshclam downloads a new virus database, previously clean files may now be malicious. Flush all scan: keys from Redis whenever the database updates:

// flush-scan-cache.js — run after each freshclam update
const Redis = require('ioredis');
const redis = new Redis();

async function flushScanCache() {
  let cursor = '0';
  let deleted = 0;

  do {
    const [next, keys] = await redis.scan(cursor, 'MATCH', 'scan:*', 'COUNT', 100);
    cursor = next;
    if (keys.length) {
      await redis.del(...keys);
      deleted += keys.length;
    }
  } while (cursor !== '0');

  console.log(`Flushed ${deleted} cached scan results.`);
}

flushScanCache().catch(console.error);

Call this script from your freshclam post-update hook or from the Kubernetes CronJob that runs freshclam. See Keeping ClamAV virus definitions current in production for the freshclam cron/CronJob setup.

In-memory alternative (no Redis)

For small deployments or single-process Node.js apps, an in-process LRU cache avoids the Redis dependency. Results are lost on restart, which is acceptable — files will simply be re-scanned:

// Simple in-memory cache with a fixed TTL and cap
const cache   = new Map();
const MAX_TTL = 24 * 60 * 60 * 1000;  // 24 h in ms
const MAX_ENTRIES = 10_000;

function getCached(hash) {
  const entry = cache.get(hash);
  if (!entry) return null;
  if (Date.now() - entry.ts > MAX_TTL) { cache.delete(hash); return null; }
  return entry.verdict;
}

function setCached(hash, verdict) {
  if (cache.size >= MAX_ENTRIES) {
    // Evict the oldest entry
    cache.delete(cache.keys().next().value);
  }
  cache.set(hash, { verdict, ts: Date.now() });
}

Next steps

Want broader performance improvements beyond caching? See Optimising ClamAV scan performance in Node.js.
Keeping the database current so caches stay valid? See Keeping ClamAV virus definitions current in production.
Need async scanning instead of synchronous? See Background virus scanning with BullMQ.