mock server, file and folder management

This commit is contained in:
2026-06-12 02:18:30 +02:00
parent b67c8abdc5
commit f3db5fc5cf
28 changed files with 3049 additions and 6 deletions

View File

@@ -0,0 +1 @@
"""Managed file storage services for Multi Seal Mail."""

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
import boto3
from app.settings import settings
class StorageBackendError(RuntimeError):
pass
class StorageBackend(Protocol):
name: str
def put_bytes(self, key: str, data: bytes, *, content_type: str | None = None) -> None: ...
def get_bytes(self, key: str) -> bytes: ...
def delete(self, key: str) -> None: ...
def exists(self, key: str) -> bool: ...
@dataclass(slots=True)
class LocalFilesystemStorageBackend:
root: Path
name: str = "local"
def __post_init__(self) -> None:
self.root = self.root.expanduser().resolve()
self.root.mkdir(parents=True, exist_ok=True)
def _path(self, key: str) -> Path:
path = (self.root / key).resolve()
if not path.is_relative_to(self.root):
raise StorageBackendError("Storage key escapes local storage root")
return path
def put_bytes(self, key: str, data: bytes, *, content_type: str | None = None) -> None:
path = self._path(key)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(data)
def get_bytes(self, key: str) -> bytes:
path = self._path(key)
if not path.exists() or not path.is_file():
raise StorageBackendError("Stored object does not exist")
return path.read_bytes()
def delete(self, key: str) -> None:
path = self._path(key)
if path.exists() and path.is_file():
path.unlink()
def exists(self, key: str) -> bool:
path = self._path(key)
return path.exists() and path.is_file()
@dataclass(slots=True)
class S3StorageBackend:
bucket: str
endpoint_url: str
region_name: str
access_key_id: str
secret_access_key: str
name: str = "s3"
@property
def client(self):
return boto3.client(
"s3",
endpoint_url=self.endpoint_url,
region_name=self.region_name,
aws_access_key_id=self.access_key_id,
aws_secret_access_key=self.secret_access_key,
)
def put_bytes(self, key: str, data: bytes, *, content_type: str | None = None) -> None:
kwargs = {"Bucket": self.bucket, "Key": key, "Body": data}
if content_type:
kwargs["ContentType"] = content_type
self.client.put_object(**kwargs)
def get_bytes(self, key: str) -> bytes:
try:
obj = self.client.get_object(Bucket=self.bucket, Key=key)
return obj["Body"].read()
except Exception as exc: # pragma: no cover - depends on S3 backend
raise StorageBackendError(str(exc)) from exc
def delete(self, key: str) -> None:
self.client.delete_object(Bucket=self.bucket, Key=key)
def exists(self, key: str) -> bool:
try:
self.client.head_object(Bucket=self.bucket, Key=key)
return True
except Exception:
return False
def get_storage_backend() -> StorageBackend:
backend = settings.file_storage_backend.lower().strip()
if backend in {"local", "filesystem", "fs"}:
return LocalFilesystemStorageBackend(Path(settings.file_storage_local_root))
if backend in {"s3", "garage"}:
return S3StorageBackend(
bucket=settings.file_storage_s3_bucket or settings.s3_bucket,
endpoint_url=settings.file_storage_s3_endpoint_url or settings.s3_endpoint_url,
region_name=settings.file_storage_s3_region or settings.s3_region,
access_key_id=settings.file_storage_s3_access_key_id or settings.s3_access_key_id,
secret_access_key=settings.file_storage_s3_secret_access_key or settings.s3_secret_access_key,
)
raise StorageBackendError(f"Unsupported file storage backend: {settings.file_storage_backend}")

View File

@@ -0,0 +1,73 @@
from __future__ import annotations
import re
from pathlib import PurePosixPath
from uuid import uuid4
_SAFE_NAME_RE = re.compile(r"[^A-Za-z0-9_.@ -]+")
class UnsafeFilePathError(ValueError):
pass
def normalize_logical_path(path: str | None, *, fallback_filename: str | None = None) -> str:
"""Return a safe tenant-relative logical path using POSIX separators.
The logical path is metadata, not a filesystem path. It never starts with a
slash and cannot contain path traversal components. It is used for browsing,
wildcard matching and attachment rules.
"""
raw = (path or "").replace("\\", "/").strip()
if not raw and fallback_filename:
raw = fallback_filename
if not raw:
raise UnsafeFilePathError("File path is empty")
if raw.startswith("/"):
raw = raw.lstrip("/")
parts: list[str] = []
for part in raw.split("/"):
clean = part.strip()
if not clean or clean == ".":
continue
if clean == "..":
raise UnsafeFilePathError("Path traversal is not allowed")
parts.append(clean)
if not parts:
raise UnsafeFilePathError("File path is empty")
return "/".join(parts)
def normalize_folder(path: str | None) -> str:
raw = (path or "").replace("\\", "/").strip().strip("/")
if not raw:
return ""
normalized = normalize_logical_path(raw)
return "" if normalized == "." else normalized
def filename_from_path(path: str) -> str:
name = PurePosixPath(path).name
if not name or name in {".", ".."}:
raise UnsafeFilePathError("Invalid filename")
return name
def join_folder_filename(folder: str | None, filename: str) -> str:
safe_name = sanitize_filename(filename)
safe_folder = normalize_folder(folder)
return f"{safe_folder}/{safe_name}" if safe_folder else safe_name
def sanitize_filename(filename: str | None) -> str:
raw = (filename or "file").replace("\\", "/").split("/")[-1].strip()
raw = raw.strip(".") or "file"
safe = _SAFE_NAME_RE.sub("_", raw)
safe = re.sub(r"\s+", " ", safe).strip()
return safe or f"file-{uuid4().hex}"
def safe_storage_component(value: str | None, fallback: str = "file") -> str:
safe = sanitize_filename(value or fallback)
return safe.replace(" ", "_")[:180]

View File

@@ -0,0 +1,750 @@
from __future__ import annotations
import hashlib
import mimetypes
import re
import zipfile
from dataclasses import dataclass
from datetime import datetime, timezone
from io import BytesIO
from pathlib import PurePosixPath
from typing import Any, Iterable
from uuid import uuid4
from sqlalchemy import or_
from sqlalchemy.orm import Session
from app.db.models import (
Campaign,
CampaignAttachmentUse,
CampaignJob,
FileAsset,
FileBlob,
FileFolder,
FileShare,
FileVersion,
Group,
UserGroupMembership,
)
from app.settings import settings
from app.storage.backends import get_storage_backend
from app.storage.paths import filename_from_path, join_folder_filename, normalize_folder, normalize_logical_path, safe_storage_component
class FileStorageError(RuntimeError):
pass
@dataclass(slots=True)
class UploadedStoredFile:
asset: FileAsset
version: FileVersion
blob: FileBlob
@dataclass(slots=True)
class ResolvedPattern:
pattern: str
matches: list[FileAsset]
def utcnow() -> datetime:
return datetime.now(timezone.utc)
def user_group_ids(session: Session, *, tenant_id: str, user_id: str, include_admin_groups: bool = False) -> list[str]:
if include_admin_groups:
return [row.id for row in session.query(Group).filter(Group.tenant_id == tenant_id).order_by(Group.name.asc()).all()]
return [
row.group_id
for row in session.query(UserGroupMembership)
.filter(UserGroupMembership.tenant_id == tenant_id, UserGroupMembership.user_id == user_id)
.all()
]
def ensure_group_access(session: Session, *, tenant_id: str, group_id: str, user_id: str, is_admin: bool = False) -> None:
group = session.get(Group, group_id)
if not group or group.tenant_id != tenant_id:
raise FileStorageError("Group not found")
if is_admin:
return
membership = (
session.query(UserGroupMembership)
.filter(UserGroupMembership.tenant_id == tenant_id, UserGroupMembership.user_id == user_id, UserGroupMembership.group_id == group_id)
.one_or_none()
)
if membership is None:
raise FileStorageError("No access to this group file space")
def _owner_filter(query, owner_type: str, owner_id: str):
if owner_type == "user":
return query.filter(FileFolder.owner_user_id == owner_id)
if owner_type == "group":
return query.filter(FileFolder.owner_group_id == owner_id)
raise FileStorageError("Unsupported owner type")
def ensure_owner_access(session: Session, *, tenant_id: str, owner_type: str, owner_id: str, user_id: str, is_admin: bool = False) -> None:
owner_type = owner_type.lower().strip()
if owner_type == "user":
if owner_id != user_id and not is_admin:
raise FileStorageError("No access to this user file space")
return
if owner_type == "group":
ensure_group_access(session, tenant_id=tenant_id, group_id=owner_id, user_id=user_id, is_admin=is_admin)
return
raise FileStorageError("Files must be owned by a user or group")
def create_folder(
session: Session,
*,
tenant_id: str,
owner_type: str,
owner_id: str,
user_id: str,
path: str,
is_admin: bool = False,
) -> FileFolder:
owner_type = owner_type.lower().strip()
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
normalized = normalize_folder(path)
if not normalized:
raise FileStorageError("Folder path is required")
query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type, FileFolder.path == normalized)
query = _owner_filter(query, owner_type, owner_id)
existing = query.order_by(FileFolder.deleted_at.asc()).first()
if existing:
if existing.deleted_at is not None:
existing.deleted_at = None
session.add(existing)
return existing
folder = FileFolder(
tenant_id=tenant_id,
owner_type=owner_type,
owner_user_id=owner_id if owner_type == "user" else None,
owner_group_id=owner_id if owner_type == "group" else None,
path=normalized,
created_by_user_id=user_id,
metadata_={},
)
session.add(folder)
session.flush()
return folder
def list_folders_for_user(
session: Session,
*,
tenant_id: str,
user_id: str,
owner_type: str,
owner_id: str,
include_deleted: bool = False,
is_admin: bool = False,
) -> list[FileFolder]:
owner_type = owner_type.lower().strip()
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type)
query = _owner_filter(query, owner_type, owner_id)
if not include_deleted:
query = query.filter(FileFolder.deleted_at.is_(None))
return query.order_by(FileFolder.path.asc()).all()
def soft_delete_folder(
session: Session,
*,
tenant_id: str,
owner_type: str,
owner_id: str,
user_id: str,
path: str,
recursive: bool = True,
is_admin: bool = False,
) -> tuple[int, int]:
owner_type = owner_type.lower().strip()
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
normalized = normalize_folder(path)
if not normalized:
raise FileStorageError("Folder path is required")
prefix = f"{normalized}/"
now = utcnow()
folder_query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type, FileFolder.deleted_at.is_(None))
folder_query = _owner_filter(folder_query, owner_type, owner_id)
if recursive:
folder_query = folder_query.filter(or_(FileFolder.path == normalized, FileFolder.path.like(f"{prefix}%")))
else:
child_exists = folder_query.filter(FileFolder.path.like(f"{prefix}%")).first() is not None
file_exists = _asset_query_for_owner(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id).filter(FileAsset.display_path.like(f"{prefix}%")).first() is not None
if child_exists or file_exists:
raise FileStorageError("Folder is not empty")
folder_query = folder_query.filter(FileFolder.path == normalized)
folders = folder_query.all()
for folder in folders:
folder.deleted_at = now
session.add(folder)
file_query = _asset_query_for_owner(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id).filter(FileAsset.deleted_at.is_(None), FileAsset.display_path.like(f"{prefix}%"))
assets = file_query.all() if recursive else []
for asset in assets:
asset.deleted_at = now
session.add(asset)
return len(folders), len(assets)
def _asset_query_for_owner(session: Session, *, tenant_id: str, owner_type: str, owner_id: str):
query = session.query(FileAsset).filter(FileAsset.tenant_id == tenant_id, FileAsset.owner_type == owner_type)
if owner_type == "user":
return query.filter(FileAsset.owner_user_id == owner_id)
if owner_type == "group":
return query.filter(FileAsset.owner_group_id == owner_id)
raise FileStorageError("Unsupported owner type")
def _storage_bucket_name() -> str:
return settings.file_storage_s3_bucket or settings.s3_bucket
def _storage_backend_name() -> str:
return settings.file_storage_backend.lower().strip()
def _storage_key(*, tenant_id: str, checksum: str, filename: str) -> str:
return f"tenants/{tenant_id}/files/{checksum[:2]}/{uuid4().hex}-{safe_storage_component(filename)}"
def _get_or_create_blob(
session: Session,
*,
tenant_id: str,
data: bytes,
filename: str,
content_type: str | None,
) -> FileBlob:
checksum = hashlib.sha256(data).hexdigest()
size = len(data)
blob = (
session.query(FileBlob)
.filter(FileBlob.tenant_id == tenant_id, FileBlob.checksum_sha256 == checksum, FileBlob.size_bytes == size)
.one_or_none()
)
if blob:
blob.ref_count += 1
session.add(blob)
return blob
storage_key = _storage_key(tenant_id=tenant_id, checksum=checksum, filename=filename)
backend = get_storage_backend()
backend.put_bytes(storage_key, data, content_type=content_type)
blob = FileBlob(
tenant_id=tenant_id,
storage_backend=_storage_backend_name(),
storage_bucket=_storage_bucket_name(),
storage_key=storage_key,
checksum_sha256=checksum,
size_bytes=size,
content_type=content_type,
ref_count=1,
)
session.add(blob)
session.flush()
return blob
def create_file_asset(
session: Session,
*,
tenant_id: str,
owner_type: str,
owner_id: str,
user_id: str,
filename: str,
data: bytes,
folder: str | None = None,
display_path: str | None = None,
content_type: str | None = None,
description: str | None = None,
metadata: dict[str, Any] | None = None,
campaign_id: str | None = None,
is_admin: bool = False,
) -> UploadedStoredFile:
owner_type = owner_type.lower().strip()
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
safe_filename = filename_from_path(normalize_logical_path(filename, fallback_filename="file"))
logical_path = normalize_logical_path(display_path) if display_path else join_folder_filename(folder, safe_filename)
if not content_type:
content_type = mimetypes.guess_type(safe_filename)[0] or "application/octet-stream"
blob = _get_or_create_blob(session, tenant_id=tenant_id, data=data, filename=safe_filename, content_type=content_type)
asset = FileAsset(
tenant_id=tenant_id,
owner_type=owner_type,
owner_user_id=owner_id if owner_type == "user" else None,
owner_group_id=owner_id if owner_type == "group" else None,
display_path=logical_path,
filename=filename_from_path(logical_path),
description=description,
created_by_user_id=user_id,
metadata_=metadata or {},
)
session.add(asset)
session.flush()
version = FileVersion(
tenant_id=tenant_id,
file_asset_id=asset.id,
blob_id=blob.id,
version_number=1,
filename_at_upload=safe_filename,
display_path_at_upload=logical_path,
content_type=content_type,
size_bytes=blob.size_bytes,
checksum_sha256=blob.checksum_sha256,
created_by_user_id=user_id,
)
session.add(version)
session.flush()
asset.current_version_id = version.id
session.add(asset)
if campaign_id:
share_file(session, tenant_id=tenant_id, asset=asset, target_type="campaign", target_id=campaign_id, permission="read", user_id=user_id)
return UploadedStoredFile(asset=asset, version=version, blob=blob)
def get_asset_for_user(session: Session, *, tenant_id: str, user_id: str, asset_id: str, require_write: bool = False, is_admin: bool = False) -> FileAsset:
asset = session.get(FileAsset, asset_id)
if not asset or asset.tenant_id != tenant_id or asset.deleted_at is not None:
raise FileStorageError("File not found")
if is_admin:
return asset
group_ids = user_group_ids(session, tenant_id=tenant_id, user_id=user_id)
owns = (asset.owner_type == "user" and asset.owner_user_id == user_id) or (asset.owner_type == "group" and asset.owner_group_id in group_ids)
if owns:
return asset
permission_values = ["read", "write", "manage"] if not require_write else ["write", "manage"]
share = (
session.query(FileShare)
.filter(
FileShare.tenant_id == tenant_id,
FileShare.file_asset_id == asset.id,
FileShare.revoked_at.is_(None),
FileShare.permission.in_(permission_values),
or_(
(FileShare.target_type == "user") & (FileShare.target_id == user_id),
(FileShare.target_type == "group") & (FileShare.target_id.in_(group_ids)),
(FileShare.target_type == "tenant") & (FileShare.target_id == tenant_id),
),
)
.first()
)
if not share:
raise FileStorageError("No access to this file")
return asset
def list_assets_for_user(
session: Session,
*,
tenant_id: str,
user_id: str,
owner_type: str | None = None,
owner_id: str | None = None,
campaign_id: str | None = None,
path_prefix: str | None = None,
include_deleted: bool = False,
is_admin: bool = False,
) -> list[FileAsset]:
query = session.query(FileAsset).filter(FileAsset.tenant_id == tenant_id)
if not include_deleted:
query = query.filter(FileAsset.deleted_at.is_(None))
if owner_type:
query = query.filter(FileAsset.owner_type == owner_type)
if owner_type == "user" and owner_id:
query = query.filter(FileAsset.owner_user_id == owner_id)
if owner_type == "group" and owner_id:
query = query.filter(FileAsset.owner_group_id == owner_id)
if campaign_id:
query = query.join(FileShare, FileShare.file_asset_id == FileAsset.id).filter(
FileShare.tenant_id == tenant_id,
FileShare.target_type == "campaign",
FileShare.target_id == campaign_id,
FileShare.revoked_at.is_(None),
)
elif not is_admin and not owner_type:
group_ids = user_group_ids(session, tenant_id=tenant_id, user_id=user_id)
query = query.outerjoin(FileShare, FileShare.file_asset_id == FileAsset.id).filter(
or_(
(FileAsset.owner_type == "user") & (FileAsset.owner_user_id == user_id),
(FileAsset.owner_type == "group") & (FileAsset.owner_group_id.in_(group_ids)),
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "user") & (FileShare.target_id == user_id),
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "group") & (FileShare.target_id.in_(group_ids)),
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "tenant") & (FileShare.target_id == tenant_id),
)
)
if path_prefix:
prefix = normalize_folder(path_prefix)
if prefix:
query = query.filter(FileAsset.display_path.like(f"{prefix}/%"))
return query.order_by(FileAsset.display_path.asc(), FileAsset.updated_at.desc()).all()
def current_version_and_blob(session: Session, asset: FileAsset) -> tuple[FileVersion, FileBlob]:
if not asset.current_version_id:
raise FileStorageError("File has no current version")
version = session.get(FileVersion, asset.current_version_id)
if not version:
raise FileStorageError("File version not found")
blob = session.get(FileBlob, version.blob_id)
if not blob:
raise FileStorageError("File blob not found")
return version, blob
def read_asset_bytes(session: Session, asset: FileAsset) -> tuple[bytes, FileVersion, FileBlob]:
version, blob = current_version_and_blob(session, asset)
backend = get_storage_backend()
return backend.get_bytes(blob.storage_key), version, blob
def share_file(
session: Session,
*,
tenant_id: str,
asset: FileAsset,
target_type: str,
target_id: str,
permission: str,
user_id: str,
) -> FileShare:
target_type = target_type.lower().strip()
permission = permission.lower().strip()
if target_type not in {"user", "group", "campaign", "tenant"}:
raise FileStorageError("Unsupported share target")
if permission not in {"read", "write", "manage"}:
raise FileStorageError("Unsupported file permission")
if target_type == "campaign":
campaign = session.get(Campaign, target_id)
if not campaign or campaign.tenant_id != tenant_id:
raise FileStorageError("Campaign not found")
existing = (
session.query(FileShare)
.filter(
FileShare.tenant_id == tenant_id,
FileShare.file_asset_id == asset.id,
FileShare.target_type == target_type,
FileShare.target_id == target_id,
FileShare.revoked_at.is_(None),
)
.one_or_none()
)
if existing:
existing.permission = permission
session.add(existing)
return existing
share = FileShare(
tenant_id=tenant_id,
file_asset_id=asset.id,
target_type=target_type,
target_id=target_id,
permission=permission,
created_by_user_id=user_id,
)
session.add(share)
return share
def soft_delete_assets(session: Session, assets: Iterable[FileAsset]) -> int:
count = 0
now = utcnow()
for asset in assets:
if asset.deleted_at is None:
asset.deleted_at = now
session.add(asset)
count += 1
return count
def asset_is_audit_relevant(session: Session, asset: FileAsset) -> bool:
return (
session.query(CampaignAttachmentUse)
.filter(CampaignAttachmentUse.file_asset_id == asset.id, CampaignAttachmentUse.use_stage == "sent")
.first()
is not None
)
def _normalize_pattern(pattern: str) -> str:
if pattern.strip() in {"", "*"}:
return "*"
return normalize_logical_path(pattern, fallback_filename="*")
def _logical_glob_regex(pattern: str) -> re.Pattern[str]:
"""Compile Multi Seal Mail logical globs.
`*` and `?` stay within one folder segment. `**` crosses folder
boundaries, and `**/` also matches the current folder so `**/*.pdf`
returns direct and nested PDF files.
"""
pattern = _normalize_pattern(pattern)
pieces = ["^"]
index = 0
while index < len(pattern):
char = pattern[index]
if char == "*":
if index + 1 < len(pattern) and pattern[index + 1] == "*":
index += 2
if index < len(pattern) and pattern[index] == "/":
pieces.append("(?:.*/)?")
index += 1
else:
pieces.append(".*")
continue
pieces.append("[^/]*")
elif char == "?":
pieces.append("[^/]")
else:
pieces.append(re.escape(char))
index += 1
pieces.append("$")
return re.compile("".join(pieces))
def _relative_display_path(asset: FileAsset, base_path: str | None) -> str:
path = normalize_logical_path(asset.display_path)
base = normalize_folder(base_path)
if not base:
return path
prefix = f"{base}/"
if path.startswith(prefix):
return path[len(prefix) :]
return path
def match_assets(assets: Iterable[FileAsset], pattern: str, *, base_path: str | None = None) -> list[FileAsset]:
regex = _logical_glob_regex(pattern)
normalized_pattern = _normalize_pattern(pattern)
has_path_context = base_path is not None or "/" in normalized_pattern or "**" in normalized_pattern
matches: list[FileAsset] = []
for asset in assets:
candidates = [_relative_display_path(asset, base_path)] if has_path_context else [asset.display_path, asset.filename]
if any(regex.match(candidate) for candidate in candidates):
matches.append(asset)
return matches
def resolve_patterns(assets: list[FileAsset], patterns: list[str], *, base_path: str | None = None) -> tuple[list[ResolvedPattern], list[FileAsset]]:
resolved = [ResolvedPattern(pattern=pattern, matches=match_assets(assets, pattern, base_path=base_path)) for pattern in patterns]
matched_ids = {asset.id for item in resolved for asset in item.matches}
unmatched = [asset for asset in assets if asset.id not in matched_ids]
return resolved, unmatched
def rename_asset(asset: FileAsset, *, new_path: str) -> None:
normalized = normalize_logical_path(new_path)
asset.display_path = normalized
asset.filename = filename_from_path(normalized)
def build_rename_preview(asset: FileAsset, *, mode: str, find: str | None = None, replacement: str = "", prefix: str = "", suffix: str = "") -> str:
path = PurePosixPath(asset.display_path)
folder = "" if str(path.parent) == "." else str(path.parent)
name = path.name
stem = PurePosixPath(name).stem
ext = "".join(PurePosixPath(name).suffixes)
if mode == "prefix":
next_name = prefix + name
elif mode == "suffix":
next_name = f"{stem}{suffix}{ext}"
elif mode == "replace":
if not find:
next_name = name
else:
next_name = name.replace(find, replacement)
else:
raise FileStorageError("Unsupported rename mode")
return f"{folder}/{next_name}" if folder else next_name
def create_zip_bytes(session: Session, assets: Iterable[FileAsset]) -> bytes:
buffer = BytesIO()
with zipfile.ZipFile(buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
for asset in assets:
data, _, _ = read_asset_bytes(session, asset)
archive.writestr(asset.display_path, data)
buffer.seek(0)
return buffer.getvalue()
def extract_zip_upload(
session: Session,
*,
tenant_id: str,
owner_type: str,
owner_id: str,
user_id: str,
zip_data: bytes,
folder: str | None,
campaign_id: str | None,
is_admin: bool = False,
max_files: int = 1000,
max_total_bytes: int = 250 * 1024 * 1024,
) -> list[UploadedStoredFile]:
uploaded: list[UploadedStoredFile] = []
total = 0
base_folder = normalize_folder(folder)
with zipfile.ZipFile(BytesIO(zip_data)) as archive:
infos = [info for info in archive.infolist() if not info.is_dir()]
if len(infos) > max_files:
raise FileStorageError(f"ZIP contains too many files (limit {max_files})")
for info in infos:
if info.file_size < 0:
raise FileStorageError("Invalid ZIP member")
total += info.file_size
if total > max_total_bytes:
raise FileStorageError("ZIP is too large after extraction")
inner_path = normalize_logical_path(info.filename)
target_path = f"{base_folder}/{inner_path}" if base_folder else inner_path
data = archive.read(info)
uploaded.append(
create_file_asset(
session,
tenant_id=tenant_id,
owner_type=owner_type,
owner_id=owner_id,
user_id=user_id,
filename=filename_from_path(inner_path),
data=data,
display_path=target_path,
content_type=mimetypes.guess_type(inner_path)[0] or "application/octet-stream",
campaign_id=campaign_id,
is_admin=is_admin,
)
)
return uploaded
def _candidate_match_keys(raw_match: str) -> set[str]:
cleaned = raw_match.replace("\\", "/").strip().strip("/")
result = {cleaned}
if cleaned:
result.add(PurePosixPath(cleaned).name)
return {item for item in result if item}
def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob, *, stage: str = "built") -> None:
"""Create best-effort immutable file-use records for matched managed files.
Existing attachment resolution is still filesystem/path based. This bridge
records uses when a resolved attachment match can be tied to a managed file
by logical path or filename among files shared with the campaign.
"""
attachments = job.resolved_attachments or []
if not isinstance(attachments, list):
return
assets = list_assets_for_user(
session,
tenant_id=job.tenant_id,
user_id="",
campaign_id=job.campaign_id,
is_admin=True,
)
by_key: dict[str, FileAsset] = {}
for asset in assets:
by_key[asset.display_path.strip("/")] = asset
by_key[asset.filename] = asset
for attachment in attachments:
if not isinstance(attachment, dict):
continue
matches = attachment.get("matches") if isinstance(attachment.get("matches"), list) else []
for raw in matches:
if not isinstance(raw, str):
continue
asset = next((by_key[key] for key in _candidate_match_keys(raw) if key in by_key), None)
if not asset:
continue
version, blob = current_version_and_blob(session, asset)
exists = (
session.query(CampaignAttachmentUse)
.filter(
CampaignAttachmentUse.campaign_job_id == job.id,
CampaignAttachmentUse.file_version_id == version.id,
CampaignAttachmentUse.filename_used == asset.filename,
CampaignAttachmentUse.use_stage == stage,
)
.one_or_none()
)
if exists:
continue
session.add(
CampaignAttachmentUse(
tenant_id=job.tenant_id,
campaign_id=job.campaign_id,
campaign_version_id=job.campaign_version_id,
campaign_job_id=job.id,
entry_index=job.entry_index,
entry_id=job.entry_id,
file_asset_id=asset.id,
file_version_id=version.id,
file_blob_id=blob.id,
filename_used=asset.filename,
checksum_sha256=blob.checksum_sha256,
size_bytes=blob.size_bytes,
content_type=blob.content_type,
use_stage=stage,
)
)
def mark_job_attachment_uses_sent(session: Session, job: CampaignJob) -> None:
record_campaign_attachment_uses_for_job(session, job, stage="built")
now = utcnow()
uses = (
session.query(CampaignAttachmentUse)
.filter(
CampaignAttachmentUse.tenant_id == job.tenant_id,
CampaignAttachmentUse.campaign_job_id == job.id,
CampaignAttachmentUse.use_stage == "built",
)
.all()
)
for use in uses:
sent = (
session.query(CampaignAttachmentUse)
.filter(
CampaignAttachmentUse.campaign_job_id == job.id,
CampaignAttachmentUse.file_version_id == use.file_version_id,
CampaignAttachmentUse.use_stage == "sent",
)
.one_or_none()
)
if sent:
continue
session.add(
CampaignAttachmentUse(
tenant_id=use.tenant_id,
campaign_id=use.campaign_id,
campaign_version_id=use.campaign_version_id,
campaign_job_id=use.campaign_job_id,
entry_index=use.entry_index,
entry_id=use.entry_id,
file_asset_id=use.file_asset_id,
file_version_id=use.file_version_id,
file_blob_id=use.file_blob_id,
filename_used=use.filename_used,
checksum_sha256=use.checksum_sha256,
size_bytes=use.size_bytes,
content_type=use.content_type,
use_stage="sent",
used_at=now,
)
)