zip download of single pdfs

2026-05-17 02:57:39 +02:00
parent 13097b73fc
commit 4b0046a943
11 changed files with 272 additions and 27 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,19 @@ All notable changes to `pdf-tools` are documented here.

 The project follows a pragmatic versioning scheme while the app is still below `1.0.0`: minor versions mark coherent user-facing milestones; patch versions mark fixes and small improvements.

+## 0.3.1 — Split ZIP export release
+
+### Added
+
+- Added browser-side ZIP packaging for split results via `src/pdf/pdfZipService.ts`.
+- Added a “Download all as ZIP” link next to the individual single-page split downloads.
+- Added tests for split-result ZIP creation, ZIP entry name sanitization/deduplication, ZIP filename generation, and empty-result handling.
+
+### Changed
+
+- Extended generated-output handling so split downloads can include both individual page files and one ZIP archive, with object URL cleanup handled by `usePdfGeneratedOutputs`.
+- Bumped the app/package version to `0.3.1`.
+
 ## 0.3.0 — Selection workspace and maintenance release

 ### Added
--- a/14
+++ b/14
@@ -4,7 +4,7 @@

 Current hosted version: <https://pdftools.add-ideas.de>

-Current release: **v0.3.0 — Selection workspace and maintenance release**. See [`CHANGELOG.md`](CHANGELOG.md) for release notes and milestone history.
+Current release: **v0.3.1 — Split ZIP export release**. See [`CHANGELOG.md`](CHANGELOG.md) for release notes and milestone history.

 The app is a static React/Vite single-page application. There is no backend service, no server-side queue, and no server-side document storage. When hosted correctly, the server only delivers HTML, JavaScript, CSS, and static assets; PDF processing happens in the user's browser.

@@ -14,7 +14,7 @@ Many everyday PDF tasks are not full document-authoring tasks. They are page-wor

 - remove pages before sending a document;
 - rotate scanned pages;
- split a PDF into single-page files;
+- split a PDF into single-page files and download them individually or as one ZIP archive;
 - merge another PDF into the current document;
 - extract a subset of pages;
 - reorder pages visually;
@@ -72,6 +72,7 @@ This makes the project especially useful for self-hosted environments, public-se
 - Extract selected pages into a new PDF.
 - Open selected pages as a new active workspace for continued editing.
 - Split the source PDF into single-page PDFs.
+- Download all split results as one ZIP archive.
 - Merge another PDF by replacing, appending, or inserting it into the current workspace.

 ### Keyboard shortcuts
@@ -227,10 +228,10 @@ The application version shown in the header is defined in `src/version.ts`. The
 The current development baseline is:

 ```text
-v0.3.0 — Selection workspace and maintenance release
+v0.3.1 — Split ZIP export release
 ```

-This release preserves the browser-only workspace baseline and adds the first post-refactor feature: opening selected pages as a new active workspace. Workspace state, thumbnail handling, generated download URLs, page-grid components, tests, type-checking, linting, and formatting are separated enough to support additional feature work without turning `App.tsx` back into a monolith.
+This release preserves the browser-only workspace baseline and adds split-result ZIP downloads on top of the selection-workspace feature. Workspace state, thumbnail handling, generated download URLs, page-grid components, tests, type-checking, linting, and formatting are separated enough to support additional feature work without turning `App.tsx` back into a monolith.

 ## Project structure

@@ -239,7 +240,7 @@ src/
  App.tsx                         Main application orchestration and UI wiring
  components/
    ActionDialog.tsx              Reusable confirmation/action dialog
-    ActionsPanel.tsx              Export, extract, and split actions
+    ActionsPanel.tsx              Export, extract, split, and ZIP download actions
    FileLoader.tsx                PDF file loading
    HelpDialog.tsx                In-app tutorial and shortcut reference
    Layout.tsx                    Application shell/header
@@ -249,6 +250,7 @@ src/
  pdf/
    pdfService.ts                 pdf-lib operations: load, merge, split, export
    pdfThumbnailService.ts        pdf.js thumbnail rendering
+    pdfZipService.ts              Browser-side ZIP packaging for split results
    pdfTypes.ts                   PDF-related types
  workspace/
    workspaceCommands.ts          Command model for undo/redo
@@ -306,7 +308,7 @@ src/
 ### Milestone 5: Export and power tools

 - [ ] Basic text extraction.
- [ ] ZIP export for split results.
+- [x] ZIP export for split results.
 - [ ] Optimize/compress MVP.
 - [ ] Carefully scoped encrypted PDF handling.

--- a/package-lock.json
+++ b/package-lock.json
@@ -1,13 +1,14 @@
 {
  "name": "pdf-tools",
-  "version": "0.3.0",
+  "version": "0.3.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "pdf-tools",
-      "version": "0.2.2",
+      "version": "0.3.1",
      "dependencies": {
+        "fflate": "^0.8.3",
        "pdf-lib": "^1.17.1",
        "pdfjs-dist": "^5.7.284",
        "react": "^19.2.6",
@@ -2526,6 +2527,12 @@
        }
      }
    },
+    "node_modules/fflate": {
+      "version": "0.8.3",
+      "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.3.tgz",
+      "integrity": "sha512-tbZNuJrLwGUp3zshBtdy4W+ORxZuIh8a5ilyIEQDC5rY1f3U20JMry0Ll3WBzU58EZKsEuJFXhb5gwv8CsPvgA==",
+      "license": "MIT"
+    },
    "node_modules/file-entry-cache": {
      "version": "8.0.0",
      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "pdf-tools",
-  "version": "0.3.0",
+  "version": "0.3.1",
  "private": true,
  "type": "module",
  "scripts": {
@@ -16,6 +16,7 @@
    "check": "npm run typecheck && npm run lint && npm run test && npm run build"
  },
  "dependencies": {
+    "fflate": "^0.8.3",
    "pdf-lib": "^1.17.1",
    "pdfjs-dist": "^5.7.284",
    "react": "^19.2.6",
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -38,6 +38,10 @@ import {
 } from './pdf/pdfService';
 import { usePdfThumbnails } from './pdf/usePdfThumbnails';
 import { usePdfGeneratedOutputs } from './hooks/usePdfGeneratedOutputs';
+import {
+  createSplitResultsZip,
+  createSplitZipFilename,
+} from './pdf/pdfZipService';
 import {
  createSelectionPdfName,
  createSelectionWorkspaceName,
@@ -85,6 +89,7 @@ const App: React.FC = () => {

  const {
    splitDownloads,
+    splitZipDownload,
    subsetDownload,
    exportDownload,
    replaceSplitResults,
@@ -916,7 +921,11 @@ const App: React.FC = () => {
    setIsBusy(true);
    try {
      const result = await splitIntoSinglePages(pdf);
-      replaceSplitResults(result);
+      const zipBlob = await createSplitResultsZip(result);
+      replaceSplitResults(result, {
+        blob: zipBlob,
+        filename: createSplitZipFilename(pdf.name),
+      });
    } catch (e) {
      console.error(e);
      setError('Error while splitting PDF (see console).');
@@ -1242,6 +1251,7 @@ const App: React.FC = () => {
        onOpenSelectionAsWorkspace={handleOpenSelectionAsWorkspace}
        onExportReordered={handleExportReordered}
        splitDownloads={splitDownloads}
+        splitZipDownload={splitZipDownload}
        subsetDownload={subsetDownload}
        exportDownload={exportDownload}
      />
--- a/src/components/ActionsPanel.tsx
+++ b/src/components/ActionsPanel.tsx
@@ -16,6 +16,7 @@ interface ActionsPanelProps {
  onExportReordered: () => void;

  splitDownloads: SplitPdfDownload[];
+  splitZipDownload: PdfDownload | null;
  subsetDownload: PdfDownload | null;
  exportDownload: PdfDownload | null;
 }
@@ -29,6 +30,7 @@ const ActionsPanel: React.FC<ActionsPanelProps> = ({
  onOpenSelectionAsWorkspace,
  onExportReordered,
  splitDownloads,
+  splitZipDownload,
  subsetDownload,
  exportDownload,
 }) => {
@@ -132,7 +134,18 @@ const ActionsPanel: React.FC<ActionsPanelProps> = ({
      {splitDownloads.length > 0 && (
        <div style={{ marginTop: '0.75rem', fontSize: '0.9rem' }}>
          <strong>Single-page PDFs:</strong>
-          <div>
+          {splitZipDownload && (
+            <div style={{ marginTop: '0.25rem' }}>
+              <a
+                className="download-link"
+                href={splitZipDownload.url}
+                download={splitZipDownload.filename}
+              >
+                Download all as ZIP ({splitDownloads.length} files)
+              </a>
+            </div>
+          )}
+          <div style={{ marginTop: '0.25rem' }}>
            {splitDownloads.map((download) => (
              <a
                key={download.id}
--- a/src/components/HelpDialog.tsx
+++ b/src/components/HelpDialog.tsx
@@ -49,11 +49,15 @@ const tutorialSteps = [
    body: 'Extract selected pages when you only need a download. Open the selection as a new workspace when you want to continue working on that subset.',
  },
  {
-    title: '5. Save your workspace or export a PDF',
+    title: '5. Split and download results',
+    body: 'Splitting creates individual one-page PDF downloads and a ZIP archive that contains all generated page files.',
+  },
+  {
+    title: '6. Save your workspace or export a PDF',
    body: 'Saving a workspace keeps the current working state in this browser. Exporting creates a new PDF file for download.',
  },
  {
-    title: '6. Use history deliberately',
+    title: '7. Use history deliberately',
    body: 'Each workspace operation is stored as a command with label and timestamp. Undo and redo walk through that command history.',
  },
 ];
--- a/src/hooks/usePdfGeneratedOutputs.ts
+++ b/src/hooks/usePdfGeneratedOutputs.ts
@@ -11,6 +11,11 @@ export interface SplitPdfDownload extends PdfDownload {
  pageIndex: number;
 }

+export interface PdfBlobResult {
+  blob: Blob;
+  filename: string;
+}
+
 function revokeDownload(download: PdfDownload | null): void {
  if (download) {
    URL.revokeObjectURL(download.url);
@@ -37,12 +42,17 @@ export function usePdfGeneratedOutputs() {
  const [exportDownload, setExportDownload] = useState<PdfDownload | null>(
    null
  );
+  const [splitZipDownload, setSplitZipDownload] = useState<PdfDownload | null>(
+    null
+  );

  const splitDownloadsRef = useRef<SplitPdfDownload[]>([]);
  const subsetDownloadRef = useRef<PdfDownload | null>(null);
  const exportDownloadRef = useRef<PdfDownload | null>(null);
+  const splitZipDownloadRef = useRef<PdfDownload | null>(null);

-  const replaceSplitResults = useCallback((results: SplitResult[]) => {
+  const replaceSplitResults = useCallback(
+    (results: SplitResult[], zipResult?: PdfBlobResult) => {
      const nextDownloads: SplitPdfDownload[] = results.map((result) => ({
        ...createDownload(
          `split-${result.pageIndex}-${result.filename}`,
@@ -52,15 +62,31 @@ export function usePdfGeneratedOutputs() {
        pageIndex: result.pageIndex,
      }));

+      const nextZipDownload = zipResult
+        ? createDownload('split-zip', zipResult.filename, zipResult.blob)
+        : null;
+
      revokeDownloads(splitDownloadsRef.current);
+      revokeDownload(splitZipDownloadRef.current);
+
      splitDownloadsRef.current = nextDownloads;
+      splitZipDownloadRef.current = nextZipDownload;
+
      setSplitDownloads(nextDownloads);
-  }, []);
+      setSplitZipDownload(nextZipDownload);
+    },
+    []
+  );

  const clearSplitResults = useCallback(() => {
    revokeDownloads(splitDownloadsRef.current);
+    revokeDownload(splitZipDownloadRef.current);
+
    splitDownloadsRef.current = [];
+    splitZipDownloadRef.current = null;
+
    setSplitDownloads([]);
+    setSplitZipDownload(null);
  }, []);

  const replaceSubsetResult = useCallback((blob: Blob, filename: string) => {
@@ -95,14 +121,17 @@ export function usePdfGeneratedOutputs() {
    revokeDownloads(splitDownloadsRef.current);
    revokeDownload(subsetDownloadRef.current);
    revokeDownload(exportDownloadRef.current);
+    revokeDownload(splitZipDownloadRef.current);

    splitDownloadsRef.current = [];
    subsetDownloadRef.current = null;
    exportDownloadRef.current = null;
+    splitZipDownloadRef.current = null;

    setSplitDownloads([]);
    setSubsetDownload(null);
    setExportDownload(null);
+    setSplitZipDownload(null);
  }, []);

  useEffect(() => {
@@ -110,6 +139,7 @@ export function usePdfGeneratedOutputs() {
      revokeDownloads(splitDownloadsRef.current);
      revokeDownload(subsetDownloadRef.current);
      revokeDownload(exportDownloadRef.current);
+      revokeDownload(splitZipDownloadRef.current);
    };
  }, []);

@@ -117,6 +147,7 @@ export function usePdfGeneratedOutputs() {
    splitDownloads,
    subsetDownload,
    exportDownload,
+    splitZipDownload,
    replaceSplitResults,
    clearSplitResults,
    replaceSubsetResult,
--- a/src/pdf/pdfZipService.test.ts
+++ b/src/pdf/pdfZipService.test.ts
@@ -0,0 +1,86 @@
+import { describe, expect, it } from 'vitest';
+import { unzipSync } from 'fflate';
+import { createSplitResultsZip, createSplitZipFilename } from './pdfZipService';
+import type { SplitResult } from './pdfTypes';
+
+async function unzipBlob(blob: Blob): Promise<Record<string, Uint8Array>> {
+  const arrayBuffer = await blob.arrayBuffer();
+  return unzipSync(new Uint8Array(arrayBuffer));
+}
+
+describe('pdfZipService', () => {
+  it('creates a ZIP archive from split PDF blobs', async () => {
+    const results: SplitResult[] = [
+      {
+        pageIndex: 0,
+        filename: 'document_page_001.pdf',
+        blob: new Blob([new Uint8Array([1, 2, 3])], {
+          type: 'application/pdf',
+        }),
+      },
+      {
+        pageIndex: 1,
+        filename: 'document_page_002.pdf',
+        blob: new Blob([new Uint8Array([4, 5, 6])], {
+          type: 'application/pdf',
+        }),
+      },
+    ];
+
+    const zipBlob = await createSplitResultsZip(results);
+    const entries = await unzipBlob(zipBlob);
+
+    expect(zipBlob.type).toBe('application/zip');
+    expect(Object.keys(entries)).toEqual([
+      'document_page_001.pdf',
+      'document_page_002.pdf',
+    ]);
+    expect(Array.from(entries['document_page_001.pdf'])).toEqual([1, 2, 3]);
+    expect(Array.from(entries['document_page_002.pdf'])).toEqual([4, 5, 6]);
+  });
+
+  it('sanitizes and deduplicates ZIP entry names', async () => {
+    const results: SplitResult[] = [
+      {
+        pageIndex: 0,
+        filename: '../page.pdf',
+        blob: new Blob([new Uint8Array([1])], { type: 'application/pdf' }),
+      },
+      {
+        pageIndex: 1,
+        filename: '../page.pdf',
+        blob: new Blob([new Uint8Array([2])], { type: 'application/pdf' }),
+      },
+      {
+        pageIndex: 2,
+        filename: '',
+        blob: new Blob([new Uint8Array([3])], { type: 'application/pdf' }),
+      },
+    ];
+
+    const zipBlob = await createSplitResultsZip(results);
+    const entries = await unzipBlob(zipBlob);
+
+    expect(Object.keys(entries)).toEqual([
+      '.._page.pdf',
+      '.._page_2.pdf',
+      'page_003.pdf',
+    ]);
+  });
+
+  it('creates a readable ZIP filename from the source PDF name', () => {
+    expect(createSplitZipFilename('contract.pdf')).toBe(
+      'contract_split_pages.zip'
+    );
+    expect(createSplitZipFilename('contract.final.PDF')).toBe(
+      'contract.final_split_pages.zip'
+    );
+    expect(createSplitZipFilename('')).toBe('document_split_pages.zip');
+  });
+
+  it('rejects empty split results', async () => {
+    await expect(createSplitResultsZip([])).rejects.toThrow(
+      'Cannot create a ZIP archive without split results.'
+    );
+  });
+});
--- a/src/pdf/pdfZipService.ts
+++ b/src/pdf/pdfZipService.ts
@@ -0,0 +1,78 @@
+import { zipSync } from 'fflate';
+import type { SplitResult } from './pdfTypes';
+
+function bytesToBlob(bytes: Uint8Array, type: string): Blob {
+  const buffer = new ArrayBuffer(bytes.byteLength);
+  new Uint8Array(buffer).set(bytes);
+  return new Blob([buffer], { type });
+}
+
+function removeControlCharacters(value: string): string {
+  return Array.from(value)
+    .filter((character) => {
+      const code = character.charCodeAt(0);
+      return code > 31 && code !== 127;
+    })
+    .join('');
+}
+
+function safeZipEntryName(filename: string, fallback: string): string {
+  const cleaned = removeControlCharacters(filename)
+    .replace(/[\\/]+/g, '_')
+    .trim();
+
+  return cleaned.length > 0 ? cleaned : fallback;
+}
+
+function uniqueZipEntryName(filename: string, usedNames: Set<string>): string {
+  if (!usedNames.has(filename)) {
+    usedNames.add(filename);
+    return filename;
+  }
+
+  const dotIndex = filename.lastIndexOf('.');
+  const hasExtension = dotIndex > 0;
+  const base = hasExtension ? filename.slice(0, dotIndex) : filename;
+  const extension = hasExtension ? filename.slice(dotIndex) : '';
+
+  let counter = 2;
+  let candidate = `${base}_${counter}${extension}`;
+
+  while (usedNames.has(candidate)) {
+    counter += 1;
+    candidate = `${base}_${counter}${extension}`;
+  }
+
+  usedNames.add(candidate);
+  return candidate;
+}
+
+export function createSplitZipFilename(pdfName: string): string {
+  const baseName = pdfName.replace(/\.pdf$/i, '').trim() || 'document';
+  return `${baseName}_split_pages.zip`;
+}
+
+export async function createSplitResultsZip(
+  results: SplitResult[]
+): Promise<Blob> {
+  if (results.length === 0) {
+    throw new Error('Cannot create a ZIP archive without split results.');
+  }
+
+  const usedNames = new Set<string>();
+  const entries: Record<string, Uint8Array> = {};
+
+  for (const result of results) {
+    const fallback = `page_${String(result.pageIndex + 1).padStart(3, '0')}.pdf`;
+    const entryName = uniqueZipEntryName(
+      safeZipEntryName(result.filename, fallback),
+      usedNames
+    );
+
+    const arrayBuffer = await result.blob.arrayBuffer();
+    entries[entryName] = new Uint8Array(arrayBuffer);
+  }
+
+  const zippedBytes = zipSync(entries, { level: 6 });
+  return bytesToBlob(zippedBytes, 'application/zip');
+}
--- a/src/version.ts
+++ b/src/version.ts
@@ -1 +1 @@
-export const APP_VERSION = '0.3.0';
+export const APP_VERSION = '0.3.1';