Scanning files inside a ZIP archive in Node.js
When a user uploads a ZIP archive, scanning only the outer ZIP file is often insufficient: ClamAV will detect malware it has signatures for inside the archive, but it is limited by its internal recursion and scan-size limits. For applications that need to process the contents of a ZIP — or that store individual files from the archive — the right approach is to extract each entry, scan it individually with pompelmi, and reject the whole batch if any entry is malicious.
Approach
- Accept the ZIP upload via Multer, writing to a temp path.
- Open the ZIP with
yauzl(a streaming ZIP reader). - For each entry: enforce an uncompressed size limit, extract to a unique temp path, scan with pompelmi.
- If any entry returns
Verdict.MaliciousorVerdict.ScanError, abort and delete all extracted temp files. - If all entries pass, proceed with storage.
- Always delete both the original ZIP and all extracted temp files in a
finallyblock.
Install
npm install pompelmi yauzl multer express
Scan a ZIP entry by entry
yauzl is the standard streaming ZIP parser for Node.js — it
reads entries without decompressing the entire archive into memory at once.
Wrap its callback API in a Promise for clean async/await usage:
// lib/scanZip.js
const yauzl = require('yauzl');
const { scan, Verdict } = require('pompelmi');
const { createWriteStream, unlinkSync, existsSync } = require('fs');
const { pipeline } = require('stream/promises');
const { join } = require('path');
const { tmpdir } = require('os');
const { randomBytes } = require('crypto');
const MAX_UNCOMPRESSED_BYTES = 200 * 1024 * 1024; // 200 MB per entry
const MAX_ENTRIES = 100;
/**
* Scan every file entry in a ZIP archive.
* Returns an array of { name, verdict } objects.
* Throws on I/O errors. Rejects immediately if any entry is malicious.
*/
async function scanZipContents(zipPath) {
return new Promise((resolve, reject) => {
const results = [];
const tempFiles = [];
yauzl.open(zipPath, { lazyEntries: true, autoClose: true }, (err, zipfile) => {
if (err) return reject(err);
if (zipfile.entryCount > MAX_ENTRIES) {
zipfile.close();
return reject(new Error(`Archive contains too many entries (max ${MAX_ENTRIES}).`));
}
zipfile.readEntry();
zipfile.on('entry', async (entry) => {
// Skip directories
if (/\/$/.test(entry.fileName)) {
zipfile.readEntry();
return;
}
// Enforce uncompressed size
if (entry.uncompressedSize > MAX_UNCOMPRESSED_BYTES) {
zipfile.close();
return reject(new Error(
`Entry "${entry.fileName}" exceeds the maximum allowed uncompressed size.`
));
}
const ext = entry.fileName.split('.').pop()?.toLowerCase() ?? 'bin';
const tmpPath = join(tmpdir(), randomBytes(16).toString('hex') + '.' + ext);
tempFiles.push(tmpPath);
try {
// Extract entry to temp file
await new Promise((res, rej) => {
zipfile.openReadStream(entry, (streamErr, readStream) => {
if (streamErr) return rej(streamErr);
const writeStream = createWriteStream(tmpPath);
pipeline(readStream, writeStream).then(res).catch(rej);
});
});
const verdict = await scan(tmpPath);
results.push({ name: entry.fileName, verdict });
if (verdict === Verdict.Malicious || verdict === Verdict.ScanError) {
// Clean up all extracted files and reject
tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
zipfile.close();
return reject(Object.assign(
new Error(`Entry "${entry.fileName}" failed scan: ${verdict.description}`),
{ verdict, entry: entry.fileName }
));
}
zipfile.readEntry();
} catch (entryErr) {
tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
zipfile.close();
reject(entryErr);
}
});
zipfile.on('end', () => {
tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
resolve(results);
});
zipfile.on('error', (zipErr) => {
tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
reject(zipErr);
});
});
});
}
module.exports = { scanZipContents };
Upload endpoint
const express = require('express');
const multer = require('multer');
const { scanZipContents } = require('./lib/scanZip');
const fs = require('fs');
const os = require('os');
const app = express();
const upload = multer({
dest: os.tmpdir(),
limits: { fileSize: 50 * 1024 * 1024 }, // 50 MB compressed
});
app.post('/upload/zip', upload.single('file'), async (req, res) => {
if (!req.file) return res.status(400).json({ error: 'No file provided.' });
const tmpPath = req.file.path;
const ext = req.file.originalname.split('.').pop()?.toLowerCase();
try {
if (ext !== 'zip') {
return res.status(400).json({ error: 'Only ZIP files are accepted.' });
}
const results = await scanZipContents(tmpPath);
return res.json({
status: 'ok',
entries: results.map((r) => ({
name: r.name,
verdict: r.verdict.description,
})),
});
} catch (err) {
const status = err.verdict ? 400 : 500;
return res.status(status).json({ error: err.message });
} finally {
if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
}
});
Size and entry count limits
Without limits, a malicious ZIP can exhaust your server's disk space or memory during extraction. The library above enforces two:
| Limit | Default | What it prevents |
|---|---|---|
MAX_ENTRIES |
100 entries | Archives with thousands of small files that flood temp storage |
MAX_UNCOMPRESSED_BYTES |
200 MB per entry | Single large entries that exhaust disk space during extraction |
Multer fileSize |
50 MB (compressed) | Oversized ZIP files before they are written to disk |
Next steps
- Worried about ZIP bomb attacks specifically? See Preventing ZIP Bomb attacks in Node.js.
- Handling encrypted archives? See How to handle encrypted/password-protected files during scan.
- Scanning multiple uploads in one request? See Scanning multiple file uploads in a single request.