Caching ClamAV scan results by file hash in Node.js

In some upload workflows the same file is uploaded multiple times: a shared document sent by many team members, a standard template, or a logo that every new user submits during onboarding. Scanning an identical file repeatedly wastes CPU and adds latency. A simple cache keyed on the file's SHA-256 hash eliminates redundant scans.

The logic is: compute the hash of the uploaded bytes, check if a verdict for that hash is already in the cache, and return it immediately without invoking pompelmi. On a cache miss, scan normally and store the result.

When caching is safe — and when it is not

Caching scan results is safe under one condition: the cached verdict must be tied to a specific database version. A file that was clean against an old database may be malicious against a newer one once fresh signatures are published.

Do not cache Verdict.Clean indefinitely. Always set a TTL that forces a re-scan within a reasonable window (24–48 hours is a sensible default). When the ClamAV database updates, flush the cache.

Caching Verdict.Malicious is safe to keep longer — a file that was malicious will remain malicious. Caching Verdict.ScanError is discouraged — re-scan on the next attempt in case the error was transient.

Install

npm install pompelmi ioredis

Cache helper

// lib/scanWithCache.js
const { createHash, createReadStream } = require('crypto');
const { scan, Verdict } = require('pompelmi');
const Redis = require('ioredis');

const redis = new Redis({
  host: process.env.REDIS_HOST || '127.0.0.1',
  port: 6379,
});

const CLEAN_TTL_SECONDS     = 60 * 60 * 24;  // 24 hours — re-scan if DB might have updated
const MALICIOUS_TTL_SECONDS = 60 * 60 * 24 * 7; // 7 days — malicious stays malicious

async function fileHash(filePath) {
  return new Promise((resolve, reject) => {
    const hash   = createHash('sha256');
    const stream = createReadStream(filePath);
    stream.on('data', (chunk) => hash.update(chunk));
    stream.on('end',  () => resolve(hash.digest('hex')));
    stream.on('error', reject);
  });
}

async function scanWithCache(filePath, options = {}) {
  const hash     = await fileHash(filePath);
  const cacheKey = `scan:${hash}`;

  // Check cache first
  const cached = await redis.get(cacheKey);
  if (cached) {
    const label = cached; // 'Clean' | 'Malicious'
    if (label === 'Clean')     return Verdict.Clean;
    if (label === 'Malicious') return Verdict.Malicious;
    // ScanError is never cached — fall through to rescan
  }

  // Cache miss — run the actual scan
  const verdict = await scan(filePath, options);

  // Store result (never cache ScanError)
  if (verdict === Verdict.Clean) {
    await redis.set(cacheKey, 'Clean', 'EX', CLEAN_TTL_SECONDS);
  } else if (verdict === Verdict.Malicious) {
    await redis.set(cacheKey, 'Malicious', 'EX', MALICIOUS_TTL_SECONDS);
  }

  return verdict;
}

module.exports = { scanWithCache };

Using the cache in an upload endpoint

Drop in scanWithCache anywhere you currently call scan() directly:

const express = require('express');
const multer  = require('multer');
const { Verdict } = require('pompelmi');
const { scanWithCache } = require('./lib/scanWithCache');
const fs = require('fs');
const os = require('os');

const app    = express();
const upload = multer({ dest: os.tmpdir(), limits: { fileSize: 50 * 1024 * 1024 } });

app.post('/upload', upload.single('file'), async (req, res) => {
  if (!req.file) return res.status(400).json({ error: 'No file provided.' });

  const tmpPath = req.file.path;
  try {
    const verdict = await scanWithCache(tmpPath);

    if (verdict === Verdict.Malicious) {
      return res.status(400).json({ error: 'Malware detected. Upload rejected.' });
    }
    if (verdict === Verdict.ScanError) {
      return res.status(422).json({ error: 'Scan incomplete. Upload rejected.' });
    }

    return res.json({ status: 'ok', name: req.file.originalname });

  } finally {
    if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
  }
});

Cache invalidation after database updates

When freshclam downloads a new virus database, previously clean files may now be malicious. Flush all scan: keys from Redis whenever the database updates:

// flush-scan-cache.js — run after each freshclam update
const Redis = require('ioredis');
const redis = new Redis();

async function flushScanCache() {
  let cursor = '0';
  let deleted = 0;

  do {
    const [next, keys] = await redis.scan(cursor, 'MATCH', 'scan:*', 'COUNT', 100);
    cursor = next;
    if (keys.length) {
      await redis.del(...keys);
      deleted += keys.length;
    }
  } while (cursor !== '0');

  console.log(`Flushed ${deleted} cached scan results.`);
}

flushScanCache().catch(console.error);

Call this script from your freshclam post-update hook or from the Kubernetes CronJob that runs freshclam. See Keeping ClamAV virus definitions current in production for the freshclam cron/CronJob setup.

In-memory alternative (no Redis)

For small deployments or single-process Node.js apps, an in-process LRU cache avoids the Redis dependency. Results are lost on restart, which is acceptable — files will simply be re-scanned:

// Simple in-memory cache with a fixed TTL and cap
const cache   = new Map();
const MAX_TTL = 24 * 60 * 60 * 1000;  // 24 h in ms
const MAX_ENTRIES = 10_000;

function getCached(hash) {
  const entry = cache.get(hash);
  if (!entry) return null;
  if (Date.now() - entry.ts > MAX_TTL) { cache.delete(hash); return null; }
  return entry.verdict;
}

function setCached(hash, verdict) {
  if (cache.size >= MAX_ENTRIES) {
    // Evict the oldest entry
    cache.delete(cache.keys().next().value);
  }
  cache.set(hash, { verdict, ts: Date.now() });
}

Next steps