Building a file scanning REST API with Node.js and pompelmi

Instead of embedding antivirus scanning into every application that handles uploads, you can build a single scanning microservice that all your apps call. This keeps ClamAV in one place, makes updates and monitoring centralized, and lets non-Node.js services use the scanner too.

This guide walks through building a minimal but production-ready scanning API: a POST endpoint that accepts a file, scans it, and returns a JSON verdict.

This guide assumes ClamAV is installed. If not, see How to install ClamAV on macOS, Linux and Windows.

Install

mkdir scan-api && cd scan-api
npm init -y
npm install express multer pompelmi

API design

Method	Path	Description
POST	/scan	Accept a multipart file upload, scan it, return a verdict.
GET	/health	Return 200 if the service and ClamAV are ready, 503 otherwise.

Response format

// Success — file scanned
{
  "verdict":     "clean",         // "clean" | "malicious" | "scan_error"
  "safe":        true,            // boolean shorthand
  "filename":    "report.pdf",
  "size":        48219            // bytes
}

// Error — missing file or server fault
{
  "error": "No file provided."
}

Server implementation

// server.js
'use strict';

const express = require('express');
const multer  = require('multer');
const { scan, Verdict } = require('pompelmi');
const fs = require('fs');
const os = require('os');

const app    = express();
const upload = multer({
  dest:   os.tmpdir(),
  limits: {
    fileSize: parseInt(process.env.MAX_FILE_SIZE) || 100 * 1024 * 1024,  // 100 MB default
    files:    1
  }
});

// Map pompelmi Symbol verdicts to JSON-friendly strings
function verdictString(v) {
  if (v === Verdict.Clean)      return 'clean';
  if (v === Verdict.Malicious)  return 'malicious';
  if (v === Verdict.ScanError)  return 'scan_error';
  return 'unknown';
}

// POST /scan
app.post('/scan', upload.single('file'), async (req, res) => {
  if (!req.file) {
    return res.status(400).json({ error: 'No file provided. Send a multipart/form-data request with field name "file".' });
  }

  const { path: tmpPath, originalname, size } = req.file;

  try {
    const verdict = await scan(tmpPath);
    const label   = verdictString(verdict);

    // Delete temp file regardless of verdict
    fs.unlinkSync(tmpPath);

    const statusCode = verdict === Verdict.Clean ? 200
                     : verdict === Verdict.Malicious ? 200   // 200 with verdict — caller decides what to do
                     : 200;

    return res.status(statusCode).json({
      verdict:  label,
      safe:     verdict === Verdict.Clean,
      filename: originalname,
      size
    });

  } catch (err) {
    if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
    return res.status(500).json({ error: 'Scan failed: ' + err.message });
  }
});

// Multer size limit error
app.use((err, req, res, next) => {
  if (err.code === 'LIMIT_FILE_SIZE') {
    return res.status(413).json({ error: 'File exceeds the maximum allowed size.' });
  }
  next(err);
});

const PORT = process.env.PORT || 4000;
app.listen(PORT, () => console.log('Scan API listening on port', PORT));

The /scan endpoint always returns HTTP 200 with a JSON body — even for malicious files. This is intentional: the HTTP status code reflects whether the request succeeded (it did — the file was scanned), not whether the file was clean. Callers inspect the verdict field to decide what to do next.

Health endpoint

The health endpoint verifies that clamscan is available and the virus database has been downloaded. Load balancers and container orchestrators use this to determine whether the service is ready to accept traffic.

const { execFile } = require('child_process');

// GET /health
app.get('/health', (req, res) => {
  execFile('clamscan', ['--version'], { timeout: 5000 }, (err, stdout) => {
    if (err) {
      return res.status(503).json({
        status:  'unavailable',
        reason:  'clamscan binary not found or failed: ' + err.message
      });
    }

    res.json({
      status:  'ok',
      clamav:  stdout.trim()   // e.g. "ClamAV 1.3.1/27392/..."
    });
  });
});

Calling the API from another service

curl

curl -s -F "file=@/path/to/file.pdf" http://localhost:4000/scan | jq .

Node.js with form-data

npm install form-data node-fetch

const FormData = require('form-data');
const fetch    = require('node-fetch');
const fs       = require('fs');

async function scanWithApi(filePath) {
  const form = new FormData();
  form.append('file', fs.createReadStream(filePath));

  const response = await fetch('http://scan-api:4000/scan', {
    method:  'POST',
    body:    form,
    headers: form.getHeaders()
  });

  const result = await response.json();

  if (!response.ok) {
    throw new Error('Scan API error: ' + (result.error || response.statusText));
  }

  return result;   // { verdict, safe, filename, size }
}

scanWithApi('./uploads/document.docx').then(result => {
  if (!result.safe) {
    console.error('File rejected:', result.verdict);
  } else {
    console.log('File is safe.');
  }
});

Python

import requests

def scan_file(path: str) -> dict:
    with open(path, 'rb') as f:
        response = requests.post(
            'http://scan-api:4000/scan',
            files={'file': f}
        )
    response.raise_for_status()
    return response.json()

result = scan_file('./uploads/report.pdf')
print(result['verdict'])   # 'clean', 'malicious', or 'scan_error'

Dockerize the scanning API

# Dockerfile
FROM node:20-slim

RUN apt-get update \
 && apt-get install -y --no-install-recommends clamav \
 && rm -rf /var/lib/apt/lists/*

# Download virus definitions at build time
# Mount a persistent volume in production to avoid re-downloading on every deploy
RUN freshclam --no-warnings || true

WORKDIR /app
COPY package*.json ./
RUN npm ci --omit=dev
COPY server.js .

ENV PORT=4000
EXPOSE 4000
CMD ["node", "server.js"]

# docker-compose.yml — run alongside your main application
version: '3.9'

services:
  app:
    build: ./app
    environment:
      - SCAN_API_URL=http://scan-api:4000
    depends_on:
      scan-api:
        condition: service_healthy

  scan-api:
    build: ./scan-api
    environment:
      - MAX_FILE_SIZE=104857600   # 100 MB
    healthcheck:
      test: ['CMD', 'wget', '-qO-', 'http://localhost:4000/health']
      interval: 30s
      start_period: 60s

For a more advanced deployment using a shared clamd daemon instead of per-request clamscan processes, see Running pompelmi with ClamAV in Docker Compose.