Building a file scanning REST API with Node.js and pompelmi
Instead of embedding antivirus scanning into every application that handles uploads, you can build a single scanning microservice that all your apps call. This keeps ClamAV in one place, makes updates and monitoring centralized, and lets non-Node.js services use the scanner too.
This guide walks through building a minimal but production-ready scanning API: a POST endpoint that accepts a file, scans it, and returns a JSON verdict.
Install
mkdir scan-api && cd scan-api npm init -y npm install express multer pompelmi
API design
| Method | Path | Description |
|---|---|---|
| POST | /scan | Accept a multipart file upload, scan it, return a verdict. |
| GET | /health | Return 200 if the service and ClamAV are ready, 503 otherwise. |
Response format
// Success — file scanned
{
"verdict": "clean", // "clean" | "malicious" | "scan_error"
"safe": true, // boolean shorthand
"filename": "report.pdf",
"size": 48219 // bytes
}
// Error — missing file or server fault
{
"error": "No file provided."
}
Server implementation
// server.js
'use strict';
const express = require('express');
const multer = require('multer');
const { scan, Verdict } = require('pompelmi');
const fs = require('fs');
const os = require('os');
const app = express();
const upload = multer({
dest: os.tmpdir(),
limits: {
fileSize: parseInt(process.env.MAX_FILE_SIZE) || 100 * 1024 * 1024, // 100 MB default
files: 1
}
});
// Map pompelmi Symbol verdicts to JSON-friendly strings
function verdictString(v) {
if (v === Verdict.Clean) return 'clean';
if (v === Verdict.Malicious) return 'malicious';
if (v === Verdict.ScanError) return 'scan_error';
return 'unknown';
}
// POST /scan
app.post('/scan', upload.single('file'), async (req, res) => {
if (!req.file) {
return res.status(400).json({ error: 'No file provided. Send a multipart/form-data request with field name "file".' });
}
const { path: tmpPath, originalname, size } = req.file;
try {
const verdict = await scan(tmpPath);
const label = verdictString(verdict);
// Delete temp file regardless of verdict
fs.unlinkSync(tmpPath);
const statusCode = verdict === Verdict.Clean ? 200
: verdict === Verdict.Malicious ? 200 // 200 with verdict — caller decides what to do
: 200;
return res.status(statusCode).json({
verdict: label,
safe: verdict === Verdict.Clean,
filename: originalname,
size
});
} catch (err) {
if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
return res.status(500).json({ error: 'Scan failed: ' + err.message });
}
});
// Multer size limit error
app.use((err, req, res, next) => {
if (err.code === 'LIMIT_FILE_SIZE') {
return res.status(413).json({ error: 'File exceeds the maximum allowed size.' });
}
next(err);
});
const PORT = process.env.PORT || 4000;
app.listen(PORT, () => console.log('Scan API listening on port', PORT));
/scan endpoint always returns HTTP 200 with a JSON body — even
for malicious files. This is intentional: the HTTP status code reflects whether
the request succeeded (it did — the file was scanned), not whether the
file was clean. Callers inspect the verdict field to decide what
to do next.
Health endpoint
The health endpoint verifies that clamscan is available and the
virus database has been downloaded. Load balancers and container orchestrators
use this to determine whether the service is ready to accept traffic.
const { execFile } = require('child_process');
// GET /health
app.get('/health', (req, res) => {
execFile('clamscan', ['--version'], { timeout: 5000 }, (err, stdout) => {
if (err) {
return res.status(503).json({
status: 'unavailable',
reason: 'clamscan binary not found or failed: ' + err.message
});
}
res.json({
status: 'ok',
clamav: stdout.trim() // e.g. "ClamAV 1.3.1/27392/..."
});
});
});
Calling the API from another service
curl
curl -s -F "file=@/path/to/file.pdf" http://localhost:4000/scan | jq .
Node.js with form-data
npm install form-data node-fetch
const FormData = require('form-data');
const fetch = require('node-fetch');
const fs = require('fs');
async function scanWithApi(filePath) {
const form = new FormData();
form.append('file', fs.createReadStream(filePath));
const response = await fetch('http://scan-api:4000/scan', {
method: 'POST',
body: form,
headers: form.getHeaders()
});
const result = await response.json();
if (!response.ok) {
throw new Error('Scan API error: ' + (result.error || response.statusText));
}
return result; // { verdict, safe, filename, size }
}
scanWithApi('./uploads/document.docx').then(result => {
if (!result.safe) {
console.error('File rejected:', result.verdict);
} else {
console.log('File is safe.');
}
});
Python
import requests
def scan_file(path: str) -> dict:
with open(path, 'rb') as f:
response = requests.post(
'http://scan-api:4000/scan',
files={'file': f}
)
response.raise_for_status()
return response.json()
result = scan_file('./uploads/report.pdf')
print(result['verdict']) # 'clean', 'malicious', or 'scan_error'
Dockerize the scanning API
# Dockerfile FROM node:20-slim RUN apt-get update \ && apt-get install -y --no-install-recommends clamav \ && rm -rf /var/lib/apt/lists/* # Download virus definitions at build time # Mount a persistent volume in production to avoid re-downloading on every deploy RUN freshclam --no-warnings || true WORKDIR /app COPY package*.json ./ RUN npm ci --omit=dev COPY server.js . ENV PORT=4000 EXPOSE 4000 CMD ["node", "server.js"]
# docker-compose.yml — run alongside your main application
version: '3.9'
services:
app:
build: ./app
environment:
- SCAN_API_URL=http://scan-api:4000
depends_on:
scan-api:
condition: service_healthy
scan-api:
build: ./scan-api
environment:
- MAX_FILE_SIZE=104857600 # 100 MB
healthcheck:
test: ['CMD', 'wget', '-qO-', 'http://localhost:4000/health']
interval: 30s
start_period: 60s
For a more advanced deployment using a shared clamd daemon instead
of per-request clamscan processes, see
Running pompelmi with ClamAV in Docker Compose.