Scanning files inside a ZIP archive in Node.js

When a user uploads a ZIP archive, scanning only the outer ZIP file is often insufficient: ClamAV will detect malware it has signatures for inside the archive, but it is limited by its internal recursion and scan-size limits. For applications that need to process the contents of a ZIP — or that store individual files from the archive — the right approach is to extract each entry, scan it individually with pompelmi, and reject the whole batch if any entry is malicious.

This is different from ZIP bomb prevention, which protects your scanner from resource exhaustion. This guide is about scanning the legitimate contents of an archive before extracting and storing individual files.

Approach

  1. Accept the ZIP upload via Multer, writing to a temp path.
  2. Open the ZIP with yauzl (a streaming ZIP reader).
  3. For each entry: enforce an uncompressed size limit, extract to a unique temp path, scan with pompelmi.
  4. If any entry returns Verdict.Malicious or Verdict.ScanError, abort and delete all extracted temp files.
  5. If all entries pass, proceed with storage.
  6. Always delete both the original ZIP and all extracted temp files in a finally block.

Install

npm install pompelmi yauzl multer express

Scan a ZIP entry by entry

yauzl is the standard streaming ZIP parser for Node.js — it reads entries without decompressing the entire archive into memory at once. Wrap its callback API in a Promise for clean async/await usage:

// lib/scanZip.js
const yauzl   = require('yauzl');
const { scan, Verdict } = require('pompelmi');
const { createWriteStream, unlinkSync, existsSync } = require('fs');
const { pipeline } = require('stream/promises');
const { join } = require('path');
const { tmpdir } = require('os');
const { randomBytes } = require('crypto');

const MAX_UNCOMPRESSED_BYTES = 200 * 1024 * 1024; // 200 MB per entry
const MAX_ENTRIES = 100;

/**
 * Scan every file entry in a ZIP archive.
 * Returns an array of { name, verdict } objects.
 * Throws on I/O errors. Rejects immediately if any entry is malicious.
 */
async function scanZipContents(zipPath) {
  return new Promise((resolve, reject) => {
    const results   = [];
    const tempFiles = [];

    yauzl.open(zipPath, { lazyEntries: true, autoClose: true }, (err, zipfile) => {
      if (err) return reject(err);

      if (zipfile.entryCount > MAX_ENTRIES) {
        zipfile.close();
        return reject(new Error(`Archive contains too many entries (max ${MAX_ENTRIES}).`));
      }

      zipfile.readEntry();

      zipfile.on('entry', async (entry) => {
        // Skip directories
        if (/\/$/.test(entry.fileName)) {
          zipfile.readEntry();
          return;
        }

        // Enforce uncompressed size
        if (entry.uncompressedSize > MAX_UNCOMPRESSED_BYTES) {
          zipfile.close();
          return reject(new Error(
            `Entry "${entry.fileName}" exceeds the maximum allowed uncompressed size.`
          ));
        }

        const ext     = entry.fileName.split('.').pop()?.toLowerCase() ?? 'bin';
        const tmpPath = join(tmpdir(), randomBytes(16).toString('hex') + '.' + ext);
        tempFiles.push(tmpPath);

        try {
          // Extract entry to temp file
          await new Promise((res, rej) => {
            zipfile.openReadStream(entry, (streamErr, readStream) => {
              if (streamErr) return rej(streamErr);
              const writeStream = createWriteStream(tmpPath);
              pipeline(readStream, writeStream).then(res).catch(rej);
            });
          });

          const verdict = await scan(tmpPath);
          results.push({ name: entry.fileName, verdict });

          if (verdict === Verdict.Malicious || verdict === Verdict.ScanError) {
            // Clean up all extracted files and reject
            tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
            zipfile.close();
            return reject(Object.assign(
              new Error(`Entry "${entry.fileName}" failed scan: ${verdict.description}`),
              { verdict, entry: entry.fileName }
            ));
          }

          zipfile.readEntry();

        } catch (entryErr) {
          tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
          zipfile.close();
          reject(entryErr);
        }
      });

      zipfile.on('end', () => {
        tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
        resolve(results);
      });

      zipfile.on('error', (zipErr) => {
        tempFiles.forEach((p) => { if (existsSync(p)) unlinkSync(p); });
        reject(zipErr);
      });
    });
  });
}

module.exports = { scanZipContents };

Upload endpoint

const express = require('express');
const multer  = require('multer');
const { scanZipContents } = require('./lib/scanZip');
const fs = require('fs');
const os = require('os');

const app    = express();
const upload = multer({
  dest:   os.tmpdir(),
  limits: { fileSize: 50 * 1024 * 1024 },  // 50 MB compressed
});

app.post('/upload/zip', upload.single('file'), async (req, res) => {
  if (!req.file) return res.status(400).json({ error: 'No file provided.' });

  const tmpPath = req.file.path;
  const ext     = req.file.originalname.split('.').pop()?.toLowerCase();

  try {
    if (ext !== 'zip') {
      return res.status(400).json({ error: 'Only ZIP files are accepted.' });
    }

    const results = await scanZipContents(tmpPath);

    return res.json({
      status:  'ok',
      entries: results.map((r) => ({
        name:    r.name,
        verdict: r.verdict.description,
      })),
    });

  } catch (err) {
    const status = err.verdict ? 400 : 500;
    return res.status(status).json({ error: err.message });

  } finally {
    if (fs.existsSync(tmpPath)) fs.unlinkSync(tmpPath);
  }
});

Size and entry count limits

Without limits, a malicious ZIP can exhaust your server's disk space or memory during extraction. The library above enforces two:

Limit Default What it prevents
MAX_ENTRIES 100 entries Archives with thousands of small files that flood temp storage
MAX_UNCOMPRESSED_BYTES 200 MB per entry Single large entries that exhaust disk space during extraction
Multer fileSize 50 MB (compressed) Oversized ZIP files before they are written to disk
Tune these limits for your use case. A document archive might cap entries at 10 and size at 20 MB. A developer tool accepting build artefacts might allow 500 MB. There is no universal correct value.

Next steps