751 lines
27 KiB
Python
751 lines
27 KiB
Python
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import mimetypes
|
|
import re
|
|
import zipfile
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from io import BytesIO
|
|
from pathlib import PurePosixPath
|
|
from typing import Any, Iterable
|
|
from uuid import uuid4
|
|
|
|
from sqlalchemy import or_
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.db.models import (
|
|
Campaign,
|
|
CampaignAttachmentUse,
|
|
CampaignJob,
|
|
FileAsset,
|
|
FileBlob,
|
|
FileFolder,
|
|
FileShare,
|
|
FileVersion,
|
|
Group,
|
|
UserGroupMembership,
|
|
)
|
|
from app.settings import settings
|
|
from app.storage.backends import get_storage_backend
|
|
from app.storage.paths import filename_from_path, join_folder_filename, normalize_folder, normalize_logical_path, safe_storage_component
|
|
|
|
|
|
class FileStorageError(RuntimeError):
|
|
pass
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class UploadedStoredFile:
|
|
asset: FileAsset
|
|
version: FileVersion
|
|
blob: FileBlob
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ResolvedPattern:
|
|
pattern: str
|
|
matches: list[FileAsset]
|
|
|
|
|
|
def utcnow() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
def user_group_ids(session: Session, *, tenant_id: str, user_id: str, include_admin_groups: bool = False) -> list[str]:
|
|
if include_admin_groups:
|
|
return [row.id for row in session.query(Group).filter(Group.tenant_id == tenant_id).order_by(Group.name.asc()).all()]
|
|
return [
|
|
row.group_id
|
|
for row in session.query(UserGroupMembership)
|
|
.filter(UserGroupMembership.tenant_id == tenant_id, UserGroupMembership.user_id == user_id)
|
|
.all()
|
|
]
|
|
|
|
|
|
def ensure_group_access(session: Session, *, tenant_id: str, group_id: str, user_id: str, is_admin: bool = False) -> None:
|
|
group = session.get(Group, group_id)
|
|
if not group or group.tenant_id != tenant_id:
|
|
raise FileStorageError("Group not found")
|
|
if is_admin:
|
|
return
|
|
membership = (
|
|
session.query(UserGroupMembership)
|
|
.filter(UserGroupMembership.tenant_id == tenant_id, UserGroupMembership.user_id == user_id, UserGroupMembership.group_id == group_id)
|
|
.one_or_none()
|
|
)
|
|
if membership is None:
|
|
raise FileStorageError("No access to this group file space")
|
|
|
|
|
|
|
|
|
|
def _owner_filter(query, owner_type: str, owner_id: str):
|
|
if owner_type == "user":
|
|
return query.filter(FileFolder.owner_user_id == owner_id)
|
|
if owner_type == "group":
|
|
return query.filter(FileFolder.owner_group_id == owner_id)
|
|
raise FileStorageError("Unsupported owner type")
|
|
|
|
|
|
def ensure_owner_access(session: Session, *, tenant_id: str, owner_type: str, owner_id: str, user_id: str, is_admin: bool = False) -> None:
|
|
owner_type = owner_type.lower().strip()
|
|
if owner_type == "user":
|
|
if owner_id != user_id and not is_admin:
|
|
raise FileStorageError("No access to this user file space")
|
|
return
|
|
if owner_type == "group":
|
|
ensure_group_access(session, tenant_id=tenant_id, group_id=owner_id, user_id=user_id, is_admin=is_admin)
|
|
return
|
|
raise FileStorageError("Files must be owned by a user or group")
|
|
|
|
|
|
def create_folder(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
owner_type: str,
|
|
owner_id: str,
|
|
user_id: str,
|
|
path: str,
|
|
is_admin: bool = False,
|
|
) -> FileFolder:
|
|
owner_type = owner_type.lower().strip()
|
|
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
|
|
normalized = normalize_folder(path)
|
|
if not normalized:
|
|
raise FileStorageError("Folder path is required")
|
|
query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type, FileFolder.path == normalized)
|
|
query = _owner_filter(query, owner_type, owner_id)
|
|
existing = query.order_by(FileFolder.deleted_at.asc()).first()
|
|
if existing:
|
|
if existing.deleted_at is not None:
|
|
existing.deleted_at = None
|
|
session.add(existing)
|
|
return existing
|
|
folder = FileFolder(
|
|
tenant_id=tenant_id,
|
|
owner_type=owner_type,
|
|
owner_user_id=owner_id if owner_type == "user" else None,
|
|
owner_group_id=owner_id if owner_type == "group" else None,
|
|
path=normalized,
|
|
created_by_user_id=user_id,
|
|
metadata_={},
|
|
)
|
|
session.add(folder)
|
|
session.flush()
|
|
return folder
|
|
|
|
|
|
def list_folders_for_user(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
user_id: str,
|
|
owner_type: str,
|
|
owner_id: str,
|
|
include_deleted: bool = False,
|
|
is_admin: bool = False,
|
|
) -> list[FileFolder]:
|
|
owner_type = owner_type.lower().strip()
|
|
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
|
|
query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type)
|
|
query = _owner_filter(query, owner_type, owner_id)
|
|
if not include_deleted:
|
|
query = query.filter(FileFolder.deleted_at.is_(None))
|
|
return query.order_by(FileFolder.path.asc()).all()
|
|
|
|
|
|
def soft_delete_folder(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
owner_type: str,
|
|
owner_id: str,
|
|
user_id: str,
|
|
path: str,
|
|
recursive: bool = True,
|
|
is_admin: bool = False,
|
|
) -> tuple[int, int]:
|
|
owner_type = owner_type.lower().strip()
|
|
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
|
|
normalized = normalize_folder(path)
|
|
if not normalized:
|
|
raise FileStorageError("Folder path is required")
|
|
prefix = f"{normalized}/"
|
|
now = utcnow()
|
|
|
|
folder_query = session.query(FileFolder).filter(FileFolder.tenant_id == tenant_id, FileFolder.owner_type == owner_type, FileFolder.deleted_at.is_(None))
|
|
folder_query = _owner_filter(folder_query, owner_type, owner_id)
|
|
if recursive:
|
|
folder_query = folder_query.filter(or_(FileFolder.path == normalized, FileFolder.path.like(f"{prefix}%")))
|
|
else:
|
|
child_exists = folder_query.filter(FileFolder.path.like(f"{prefix}%")).first() is not None
|
|
file_exists = _asset_query_for_owner(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id).filter(FileAsset.display_path.like(f"{prefix}%")).first() is not None
|
|
if child_exists or file_exists:
|
|
raise FileStorageError("Folder is not empty")
|
|
folder_query = folder_query.filter(FileFolder.path == normalized)
|
|
|
|
folders = folder_query.all()
|
|
for folder in folders:
|
|
folder.deleted_at = now
|
|
session.add(folder)
|
|
|
|
file_query = _asset_query_for_owner(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id).filter(FileAsset.deleted_at.is_(None), FileAsset.display_path.like(f"{prefix}%"))
|
|
assets = file_query.all() if recursive else []
|
|
for asset in assets:
|
|
asset.deleted_at = now
|
|
session.add(asset)
|
|
|
|
return len(folders), len(assets)
|
|
|
|
|
|
def _asset_query_for_owner(session: Session, *, tenant_id: str, owner_type: str, owner_id: str):
|
|
query = session.query(FileAsset).filter(FileAsset.tenant_id == tenant_id, FileAsset.owner_type == owner_type)
|
|
if owner_type == "user":
|
|
return query.filter(FileAsset.owner_user_id == owner_id)
|
|
if owner_type == "group":
|
|
return query.filter(FileAsset.owner_group_id == owner_id)
|
|
raise FileStorageError("Unsupported owner type")
|
|
|
|
def _storage_bucket_name() -> str:
|
|
return settings.file_storage_s3_bucket or settings.s3_bucket
|
|
|
|
|
|
def _storage_backend_name() -> str:
|
|
return settings.file_storage_backend.lower().strip()
|
|
|
|
|
|
def _storage_key(*, tenant_id: str, checksum: str, filename: str) -> str:
|
|
return f"tenants/{tenant_id}/files/{checksum[:2]}/{uuid4().hex}-{safe_storage_component(filename)}"
|
|
|
|
|
|
def _get_or_create_blob(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
data: bytes,
|
|
filename: str,
|
|
content_type: str | None,
|
|
) -> FileBlob:
|
|
checksum = hashlib.sha256(data).hexdigest()
|
|
size = len(data)
|
|
blob = (
|
|
session.query(FileBlob)
|
|
.filter(FileBlob.tenant_id == tenant_id, FileBlob.checksum_sha256 == checksum, FileBlob.size_bytes == size)
|
|
.one_or_none()
|
|
)
|
|
if blob:
|
|
blob.ref_count += 1
|
|
session.add(blob)
|
|
return blob
|
|
|
|
storage_key = _storage_key(tenant_id=tenant_id, checksum=checksum, filename=filename)
|
|
backend = get_storage_backend()
|
|
backend.put_bytes(storage_key, data, content_type=content_type)
|
|
blob = FileBlob(
|
|
tenant_id=tenant_id,
|
|
storage_backend=_storage_backend_name(),
|
|
storage_bucket=_storage_bucket_name(),
|
|
storage_key=storage_key,
|
|
checksum_sha256=checksum,
|
|
size_bytes=size,
|
|
content_type=content_type,
|
|
ref_count=1,
|
|
)
|
|
session.add(blob)
|
|
session.flush()
|
|
return blob
|
|
|
|
|
|
def create_file_asset(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
owner_type: str,
|
|
owner_id: str,
|
|
user_id: str,
|
|
filename: str,
|
|
data: bytes,
|
|
folder: str | None = None,
|
|
display_path: str | None = None,
|
|
content_type: str | None = None,
|
|
description: str | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
campaign_id: str | None = None,
|
|
is_admin: bool = False,
|
|
) -> UploadedStoredFile:
|
|
owner_type = owner_type.lower().strip()
|
|
ensure_owner_access(session, tenant_id=tenant_id, owner_type=owner_type, owner_id=owner_id, user_id=user_id, is_admin=is_admin)
|
|
|
|
safe_filename = filename_from_path(normalize_logical_path(filename, fallback_filename="file"))
|
|
logical_path = normalize_logical_path(display_path) if display_path else join_folder_filename(folder, safe_filename)
|
|
if not content_type:
|
|
content_type = mimetypes.guess_type(safe_filename)[0] or "application/octet-stream"
|
|
|
|
blob = _get_or_create_blob(session, tenant_id=tenant_id, data=data, filename=safe_filename, content_type=content_type)
|
|
asset = FileAsset(
|
|
tenant_id=tenant_id,
|
|
owner_type=owner_type,
|
|
owner_user_id=owner_id if owner_type == "user" else None,
|
|
owner_group_id=owner_id if owner_type == "group" else None,
|
|
display_path=logical_path,
|
|
filename=filename_from_path(logical_path),
|
|
description=description,
|
|
created_by_user_id=user_id,
|
|
metadata_=metadata or {},
|
|
)
|
|
session.add(asset)
|
|
session.flush()
|
|
version = FileVersion(
|
|
tenant_id=tenant_id,
|
|
file_asset_id=asset.id,
|
|
blob_id=blob.id,
|
|
version_number=1,
|
|
filename_at_upload=safe_filename,
|
|
display_path_at_upload=logical_path,
|
|
content_type=content_type,
|
|
size_bytes=blob.size_bytes,
|
|
checksum_sha256=blob.checksum_sha256,
|
|
created_by_user_id=user_id,
|
|
)
|
|
session.add(version)
|
|
session.flush()
|
|
asset.current_version_id = version.id
|
|
session.add(asset)
|
|
if campaign_id:
|
|
share_file(session, tenant_id=tenant_id, asset=asset, target_type="campaign", target_id=campaign_id, permission="read", user_id=user_id)
|
|
return UploadedStoredFile(asset=asset, version=version, blob=blob)
|
|
|
|
|
|
def get_asset_for_user(session: Session, *, tenant_id: str, user_id: str, asset_id: str, require_write: bool = False, is_admin: bool = False) -> FileAsset:
|
|
asset = session.get(FileAsset, asset_id)
|
|
if not asset or asset.tenant_id != tenant_id or asset.deleted_at is not None:
|
|
raise FileStorageError("File not found")
|
|
if is_admin:
|
|
return asset
|
|
group_ids = user_group_ids(session, tenant_id=tenant_id, user_id=user_id)
|
|
owns = (asset.owner_type == "user" and asset.owner_user_id == user_id) or (asset.owner_type == "group" and asset.owner_group_id in group_ids)
|
|
if owns:
|
|
return asset
|
|
permission_values = ["read", "write", "manage"] if not require_write else ["write", "manage"]
|
|
share = (
|
|
session.query(FileShare)
|
|
.filter(
|
|
FileShare.tenant_id == tenant_id,
|
|
FileShare.file_asset_id == asset.id,
|
|
FileShare.revoked_at.is_(None),
|
|
FileShare.permission.in_(permission_values),
|
|
or_(
|
|
(FileShare.target_type == "user") & (FileShare.target_id == user_id),
|
|
(FileShare.target_type == "group") & (FileShare.target_id.in_(group_ids)),
|
|
(FileShare.target_type == "tenant") & (FileShare.target_id == tenant_id),
|
|
),
|
|
)
|
|
.first()
|
|
)
|
|
if not share:
|
|
raise FileStorageError("No access to this file")
|
|
return asset
|
|
|
|
|
|
def list_assets_for_user(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
user_id: str,
|
|
owner_type: str | None = None,
|
|
owner_id: str | None = None,
|
|
campaign_id: str | None = None,
|
|
path_prefix: str | None = None,
|
|
include_deleted: bool = False,
|
|
is_admin: bool = False,
|
|
) -> list[FileAsset]:
|
|
query = session.query(FileAsset).filter(FileAsset.tenant_id == tenant_id)
|
|
if not include_deleted:
|
|
query = query.filter(FileAsset.deleted_at.is_(None))
|
|
if owner_type:
|
|
query = query.filter(FileAsset.owner_type == owner_type)
|
|
if owner_type == "user" and owner_id:
|
|
query = query.filter(FileAsset.owner_user_id == owner_id)
|
|
if owner_type == "group" and owner_id:
|
|
query = query.filter(FileAsset.owner_group_id == owner_id)
|
|
if campaign_id:
|
|
query = query.join(FileShare, FileShare.file_asset_id == FileAsset.id).filter(
|
|
FileShare.tenant_id == tenant_id,
|
|
FileShare.target_type == "campaign",
|
|
FileShare.target_id == campaign_id,
|
|
FileShare.revoked_at.is_(None),
|
|
)
|
|
elif not is_admin and not owner_type:
|
|
group_ids = user_group_ids(session, tenant_id=tenant_id, user_id=user_id)
|
|
query = query.outerjoin(FileShare, FileShare.file_asset_id == FileAsset.id).filter(
|
|
or_(
|
|
(FileAsset.owner_type == "user") & (FileAsset.owner_user_id == user_id),
|
|
(FileAsset.owner_type == "group") & (FileAsset.owner_group_id.in_(group_ids)),
|
|
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "user") & (FileShare.target_id == user_id),
|
|
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "group") & (FileShare.target_id.in_(group_ids)),
|
|
(FileShare.revoked_at.is_(None)) & (FileShare.target_type == "tenant") & (FileShare.target_id == tenant_id),
|
|
)
|
|
)
|
|
if path_prefix:
|
|
prefix = normalize_folder(path_prefix)
|
|
if prefix:
|
|
query = query.filter(FileAsset.display_path.like(f"{prefix}/%"))
|
|
return query.order_by(FileAsset.display_path.asc(), FileAsset.updated_at.desc()).all()
|
|
|
|
|
|
def current_version_and_blob(session: Session, asset: FileAsset) -> tuple[FileVersion, FileBlob]:
|
|
if not asset.current_version_id:
|
|
raise FileStorageError("File has no current version")
|
|
version = session.get(FileVersion, asset.current_version_id)
|
|
if not version:
|
|
raise FileStorageError("File version not found")
|
|
blob = session.get(FileBlob, version.blob_id)
|
|
if not blob:
|
|
raise FileStorageError("File blob not found")
|
|
return version, blob
|
|
|
|
|
|
def read_asset_bytes(session: Session, asset: FileAsset) -> tuple[bytes, FileVersion, FileBlob]:
|
|
version, blob = current_version_and_blob(session, asset)
|
|
backend = get_storage_backend()
|
|
return backend.get_bytes(blob.storage_key), version, blob
|
|
|
|
|
|
def share_file(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
asset: FileAsset,
|
|
target_type: str,
|
|
target_id: str,
|
|
permission: str,
|
|
user_id: str,
|
|
) -> FileShare:
|
|
target_type = target_type.lower().strip()
|
|
permission = permission.lower().strip()
|
|
if target_type not in {"user", "group", "campaign", "tenant"}:
|
|
raise FileStorageError("Unsupported share target")
|
|
if permission not in {"read", "write", "manage"}:
|
|
raise FileStorageError("Unsupported file permission")
|
|
if target_type == "campaign":
|
|
campaign = session.get(Campaign, target_id)
|
|
if not campaign or campaign.tenant_id != tenant_id:
|
|
raise FileStorageError("Campaign not found")
|
|
existing = (
|
|
session.query(FileShare)
|
|
.filter(
|
|
FileShare.tenant_id == tenant_id,
|
|
FileShare.file_asset_id == asset.id,
|
|
FileShare.target_type == target_type,
|
|
FileShare.target_id == target_id,
|
|
FileShare.revoked_at.is_(None),
|
|
)
|
|
.one_or_none()
|
|
)
|
|
if existing:
|
|
existing.permission = permission
|
|
session.add(existing)
|
|
return existing
|
|
share = FileShare(
|
|
tenant_id=tenant_id,
|
|
file_asset_id=asset.id,
|
|
target_type=target_type,
|
|
target_id=target_id,
|
|
permission=permission,
|
|
created_by_user_id=user_id,
|
|
)
|
|
session.add(share)
|
|
return share
|
|
|
|
|
|
def soft_delete_assets(session: Session, assets: Iterable[FileAsset]) -> int:
|
|
count = 0
|
|
now = utcnow()
|
|
for asset in assets:
|
|
if asset.deleted_at is None:
|
|
asset.deleted_at = now
|
|
session.add(asset)
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def asset_is_audit_relevant(session: Session, asset: FileAsset) -> bool:
|
|
return (
|
|
session.query(CampaignAttachmentUse)
|
|
.filter(CampaignAttachmentUse.file_asset_id == asset.id, CampaignAttachmentUse.use_stage == "sent")
|
|
.first()
|
|
is not None
|
|
)
|
|
|
|
|
|
def _normalize_pattern(pattern: str) -> str:
|
|
if pattern.strip() in {"", "*"}:
|
|
return "*"
|
|
return normalize_logical_path(pattern, fallback_filename="*")
|
|
|
|
|
|
def _logical_glob_regex(pattern: str) -> re.Pattern[str]:
|
|
"""Compile Multi Seal Mail logical globs.
|
|
|
|
`*` and `?` stay within one folder segment. `**` crosses folder
|
|
boundaries, and `**/` also matches the current folder so `**/*.pdf`
|
|
returns direct and nested PDF files.
|
|
"""
|
|
|
|
pattern = _normalize_pattern(pattern)
|
|
pieces = ["^"]
|
|
index = 0
|
|
while index < len(pattern):
|
|
char = pattern[index]
|
|
if char == "*":
|
|
if index + 1 < len(pattern) and pattern[index + 1] == "*":
|
|
index += 2
|
|
if index < len(pattern) and pattern[index] == "/":
|
|
pieces.append("(?:.*/)?")
|
|
index += 1
|
|
else:
|
|
pieces.append(".*")
|
|
continue
|
|
pieces.append("[^/]*")
|
|
elif char == "?":
|
|
pieces.append("[^/]")
|
|
else:
|
|
pieces.append(re.escape(char))
|
|
index += 1
|
|
pieces.append("$")
|
|
return re.compile("".join(pieces))
|
|
|
|
|
|
def _relative_display_path(asset: FileAsset, base_path: str | None) -> str:
|
|
path = normalize_logical_path(asset.display_path)
|
|
base = normalize_folder(base_path)
|
|
if not base:
|
|
return path
|
|
prefix = f"{base}/"
|
|
if path.startswith(prefix):
|
|
return path[len(prefix) :]
|
|
return path
|
|
|
|
|
|
def match_assets(assets: Iterable[FileAsset], pattern: str, *, base_path: str | None = None) -> list[FileAsset]:
|
|
regex = _logical_glob_regex(pattern)
|
|
normalized_pattern = _normalize_pattern(pattern)
|
|
has_path_context = base_path is not None or "/" in normalized_pattern or "**" in normalized_pattern
|
|
matches: list[FileAsset] = []
|
|
for asset in assets:
|
|
candidates = [_relative_display_path(asset, base_path)] if has_path_context else [asset.display_path, asset.filename]
|
|
if any(regex.match(candidate) for candidate in candidates):
|
|
matches.append(asset)
|
|
return matches
|
|
|
|
|
|
def resolve_patterns(assets: list[FileAsset], patterns: list[str], *, base_path: str | None = None) -> tuple[list[ResolvedPattern], list[FileAsset]]:
|
|
resolved = [ResolvedPattern(pattern=pattern, matches=match_assets(assets, pattern, base_path=base_path)) for pattern in patterns]
|
|
matched_ids = {asset.id for item in resolved for asset in item.matches}
|
|
unmatched = [asset for asset in assets if asset.id not in matched_ids]
|
|
return resolved, unmatched
|
|
|
|
|
|
def rename_asset(asset: FileAsset, *, new_path: str) -> None:
|
|
normalized = normalize_logical_path(new_path)
|
|
asset.display_path = normalized
|
|
asset.filename = filename_from_path(normalized)
|
|
|
|
|
|
def build_rename_preview(asset: FileAsset, *, mode: str, find: str | None = None, replacement: str = "", prefix: str = "", suffix: str = "") -> str:
|
|
path = PurePosixPath(asset.display_path)
|
|
folder = "" if str(path.parent) == "." else str(path.parent)
|
|
name = path.name
|
|
stem = PurePosixPath(name).stem
|
|
ext = "".join(PurePosixPath(name).suffixes)
|
|
if mode == "prefix":
|
|
next_name = prefix + name
|
|
elif mode == "suffix":
|
|
next_name = f"{stem}{suffix}{ext}"
|
|
elif mode == "replace":
|
|
if not find:
|
|
next_name = name
|
|
else:
|
|
next_name = name.replace(find, replacement)
|
|
else:
|
|
raise FileStorageError("Unsupported rename mode")
|
|
return f"{folder}/{next_name}" if folder else next_name
|
|
|
|
|
|
def create_zip_bytes(session: Session, assets: Iterable[FileAsset]) -> bytes:
|
|
buffer = BytesIO()
|
|
with zipfile.ZipFile(buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
|
|
for asset in assets:
|
|
data, _, _ = read_asset_bytes(session, asset)
|
|
archive.writestr(asset.display_path, data)
|
|
buffer.seek(0)
|
|
return buffer.getvalue()
|
|
|
|
|
|
def extract_zip_upload(
|
|
session: Session,
|
|
*,
|
|
tenant_id: str,
|
|
owner_type: str,
|
|
owner_id: str,
|
|
user_id: str,
|
|
zip_data: bytes,
|
|
folder: str | None,
|
|
campaign_id: str | None,
|
|
is_admin: bool = False,
|
|
max_files: int = 1000,
|
|
max_total_bytes: int = 250 * 1024 * 1024,
|
|
) -> list[UploadedStoredFile]:
|
|
uploaded: list[UploadedStoredFile] = []
|
|
total = 0
|
|
base_folder = normalize_folder(folder)
|
|
with zipfile.ZipFile(BytesIO(zip_data)) as archive:
|
|
infos = [info for info in archive.infolist() if not info.is_dir()]
|
|
if len(infos) > max_files:
|
|
raise FileStorageError(f"ZIP contains too many files (limit {max_files})")
|
|
for info in infos:
|
|
if info.file_size < 0:
|
|
raise FileStorageError("Invalid ZIP member")
|
|
total += info.file_size
|
|
if total > max_total_bytes:
|
|
raise FileStorageError("ZIP is too large after extraction")
|
|
inner_path = normalize_logical_path(info.filename)
|
|
target_path = f"{base_folder}/{inner_path}" if base_folder else inner_path
|
|
data = archive.read(info)
|
|
uploaded.append(
|
|
create_file_asset(
|
|
session,
|
|
tenant_id=tenant_id,
|
|
owner_type=owner_type,
|
|
owner_id=owner_id,
|
|
user_id=user_id,
|
|
filename=filename_from_path(inner_path),
|
|
data=data,
|
|
display_path=target_path,
|
|
content_type=mimetypes.guess_type(inner_path)[0] or "application/octet-stream",
|
|
campaign_id=campaign_id,
|
|
is_admin=is_admin,
|
|
)
|
|
)
|
|
return uploaded
|
|
|
|
|
|
def _candidate_match_keys(raw_match: str) -> set[str]:
|
|
cleaned = raw_match.replace("\\", "/").strip().strip("/")
|
|
result = {cleaned}
|
|
if cleaned:
|
|
result.add(PurePosixPath(cleaned).name)
|
|
return {item for item in result if item}
|
|
|
|
|
|
def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob, *, stage: str = "built") -> None:
|
|
"""Create best-effort immutable file-use records for matched managed files.
|
|
|
|
Existing attachment resolution is still filesystem/path based. This bridge
|
|
records uses when a resolved attachment match can be tied to a managed file
|
|
by logical path or filename among files shared with the campaign.
|
|
"""
|
|
|
|
attachments = job.resolved_attachments or []
|
|
if not isinstance(attachments, list):
|
|
return
|
|
assets = list_assets_for_user(
|
|
session,
|
|
tenant_id=job.tenant_id,
|
|
user_id="",
|
|
campaign_id=job.campaign_id,
|
|
is_admin=True,
|
|
)
|
|
by_key: dict[str, FileAsset] = {}
|
|
for asset in assets:
|
|
by_key[asset.display_path.strip("/")] = asset
|
|
by_key[asset.filename] = asset
|
|
for attachment in attachments:
|
|
if not isinstance(attachment, dict):
|
|
continue
|
|
matches = attachment.get("matches") if isinstance(attachment.get("matches"), list) else []
|
|
for raw in matches:
|
|
if not isinstance(raw, str):
|
|
continue
|
|
asset = next((by_key[key] for key in _candidate_match_keys(raw) if key in by_key), None)
|
|
if not asset:
|
|
continue
|
|
version, blob = current_version_and_blob(session, asset)
|
|
exists = (
|
|
session.query(CampaignAttachmentUse)
|
|
.filter(
|
|
CampaignAttachmentUse.campaign_job_id == job.id,
|
|
CampaignAttachmentUse.file_version_id == version.id,
|
|
CampaignAttachmentUse.filename_used == asset.filename,
|
|
CampaignAttachmentUse.use_stage == stage,
|
|
)
|
|
.one_or_none()
|
|
)
|
|
if exists:
|
|
continue
|
|
session.add(
|
|
CampaignAttachmentUse(
|
|
tenant_id=job.tenant_id,
|
|
campaign_id=job.campaign_id,
|
|
campaign_version_id=job.campaign_version_id,
|
|
campaign_job_id=job.id,
|
|
entry_index=job.entry_index,
|
|
entry_id=job.entry_id,
|
|
file_asset_id=asset.id,
|
|
file_version_id=version.id,
|
|
file_blob_id=blob.id,
|
|
filename_used=asset.filename,
|
|
checksum_sha256=blob.checksum_sha256,
|
|
size_bytes=blob.size_bytes,
|
|
content_type=blob.content_type,
|
|
use_stage=stage,
|
|
)
|
|
)
|
|
|
|
|
|
def mark_job_attachment_uses_sent(session: Session, job: CampaignJob) -> None:
|
|
record_campaign_attachment_uses_for_job(session, job, stage="built")
|
|
now = utcnow()
|
|
uses = (
|
|
session.query(CampaignAttachmentUse)
|
|
.filter(
|
|
CampaignAttachmentUse.tenant_id == job.tenant_id,
|
|
CampaignAttachmentUse.campaign_job_id == job.id,
|
|
CampaignAttachmentUse.use_stage == "built",
|
|
)
|
|
.all()
|
|
)
|
|
for use in uses:
|
|
sent = (
|
|
session.query(CampaignAttachmentUse)
|
|
.filter(
|
|
CampaignAttachmentUse.campaign_job_id == job.id,
|
|
CampaignAttachmentUse.file_version_id == use.file_version_id,
|
|
CampaignAttachmentUse.use_stage == "sent",
|
|
)
|
|
.one_or_none()
|
|
)
|
|
if sent:
|
|
continue
|
|
session.add(
|
|
CampaignAttachmentUse(
|
|
tenant_id=use.tenant_id,
|
|
campaign_id=use.campaign_id,
|
|
campaign_version_id=use.campaign_version_id,
|
|
campaign_job_id=use.campaign_job_id,
|
|
entry_index=use.entry_index,
|
|
entry_id=use.entry_id,
|
|
file_asset_id=use.file_asset_id,
|
|
file_version_id=use.file_version_id,
|
|
file_blob_id=use.file_blob_id,
|
|
filename_used=use.filename_used,
|
|
checksum_sha256=use.checksum_sha256,
|
|
size_bytes=use.size_bytes,
|
|
content_type=use.content_type,
|
|
use_stage="sent",
|
|
used_at=now,
|
|
)
|
|
)
|