attachment backend use

This commit is contained in:
2026-06-13 04:14:10 +02:00
parent 36e9211ee6
commit fe5ac084b7
11 changed files with 696 additions and 145 deletions

View File

@@ -0,0 +1,293 @@
from __future__ import annotations
import copy
import json
import shutil
import tempfile
from contextlib import contextmanager
from dataclasses import asdict, dataclass
from pathlib import Path, PurePosixPath
from typing import Any, Iterator
from sqlalchemy.orm import Session
from app.db.models import FileAsset
from app.storage.files import current_version_and_blob, list_assets_for_user, read_asset_bytes
from app.storage.paths import normalize_folder, normalize_logical_path, safe_storage_component
MANAGED_SOURCE_PREFIX = "managed:"
@dataclass(frozen=True, slots=True)
class ManagedAttachmentFile:
local_path: str
asset_id: str
version_id: str
blob_id: str
display_path: str
relative_path: str
filename: str
owner_type: str
owner_id: str
checksum_sha256: str
size_bytes: int
content_type: str | None
def as_dict(self) -> dict[str, Any]:
payload = asdict(self)
payload.pop("local_path", None)
return payload
@dataclass(slots=True)
class PreparedCampaignSnapshot:
path: Path
raw_json: dict[str, Any]
managed_files_by_local_path: dict[str, ManagedAttachmentFile]
shared_assets: list[FileAsset]
def parse_managed_source(value: object) -> tuple[str, str] | None:
if not isinstance(value, str) or not value.startswith(MANAGED_SOURCE_PREFIX):
return None
parts = value.split(":", 2)
if len(parts) != 3 or parts[1] not in {"user", "group"} or not parts[2].strip():
return None
return parts[1], parts[2].strip()
def _asset_owner_id(asset: FileAsset) -> str | None:
if asset.owner_type == "user":
return asset.owner_user_id
if asset.owner_type == "group":
return asset.owner_group_id
return None
def _relative_asset_path(asset: FileAsset, logical_root: str) -> str | None:
display_path = normalize_logical_path(asset.display_path)
root = normalize_folder(logical_root)
if not root:
return display_path
prefix = f"{root}/"
if not display_path.startswith(prefix):
return None
return display_path[len(prefix) :]
def _safe_local_target(root: Path, relative_path: str) -> Path:
parts = PurePosixPath(normalize_logical_path(relative_path)).parts
if not parts or any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"Unsafe managed attachment path: {relative_path!r}")
target = root.joinpath(*parts).resolve()
resolved_root = root.resolve()
if not target.is_relative_to(resolved_root):
raise ValueError(f"Managed attachment path escapes materialization root: {relative_path!r}")
return target
def _iter_rule_dicts(attachments: dict[str, Any], raw_json: dict[str, Any]):
global_rules = attachments.get("global")
if isinstance(global_rules, list):
for rule in global_rules:
if isinstance(rule, dict):
yield rule
entries = raw_json.get("entries")
inline = entries.get("inline") if isinstance(entries, dict) else None
if isinstance(inline, list):
for entry in inline:
if not isinstance(entry, dict):
continue
rules = entry.get("attachments")
if isinstance(rules, list):
for rule in rules:
if isinstance(rule, dict):
yield rule
def _selected_base_path(
rule: dict[str, Any],
prepared_by_id: dict[str, tuple[str, str]],
prepared_by_old_path: dict[str, list[tuple[str, str]]],
first_prepared: tuple[str, str] | None,
) -> tuple[str, str] | None:
base_path_id = str(rule.get("base_path_id") or "").strip()
if base_path_id and base_path_id in prepared_by_id:
return prepared_by_id[base_path_id]
base_dir = str(rule.get("base_dir") or ".").strip() or "."
candidates = prepared_by_old_path.get(base_dir)
if candidates:
return candidates[0]
if base_dir in {"", "."}:
return first_prepared
return None
def prepare_campaign_snapshot(
session: Session,
*,
tenant_id: str,
campaign_id: str,
raw_json: dict[str, Any],
destination: Path,
include_bytes: bool,
) -> PreparedCampaignSnapshot:
"""Create a temporary file-oriented campaign snapshot for managed attachments.
The existing mailer resolver deliberately remains file-oriented. Managed
campaign-shared file versions are materialized into an isolated tree and a
copied campaign JSON is rewritten to point to those directories. The
returned manifest preserves exact asset/version/blob identity so build and
audit code never has to guess by filename.
"""
destination = destination.expanduser().resolve()
destination.mkdir(parents=True, exist_ok=True)
materialized_root = destination / "managed-attachments"
materialized_root.mkdir(parents=True, exist_ok=True)
prepared_json = copy.deepcopy(raw_json if isinstance(raw_json, dict) else {})
attachments = prepared_json.get("attachments")
if not isinstance(attachments, dict):
attachments = {}
prepared_json["attachments"] = attachments
base_paths = attachments.get("base_paths")
if not isinstance(base_paths, list):
base_paths = []
shared_assets = list_assets_for_user(
session,
tenant_id=tenant_id,
user_id="",
campaign_id=campaign_id,
is_admin=True,
)
manifest: dict[str, ManagedAttachmentFile] = {}
prepared_by_id: dict[str, tuple[str, str]] = {}
prepared_by_old_path: dict[str, list[tuple[str, str]]] = {}
first_prepared: tuple[str, str] | None = None
for index, item in enumerate(base_paths):
if not isinstance(item, dict):
continue
parsed_source = parse_managed_source(item.get("source"))
if parsed_source is None:
continue
owner_type, owner_id = parsed_source
old_path = str(item.get("path") or ".").strip() or "."
logical_root = "" if old_path in {"", ".", "/"} else normalize_folder(old_path)
base_path_id = str(item.get("id") or f"base-path-{index + 1}")
local_root = materialized_root / f"{index + 1:03d}-{safe_storage_component(base_path_id)}"
local_root.mkdir(parents=True, exist_ok=True)
local_root_string = str(local_root.resolve())
prepared = (base_path_id, local_root_string)
prepared_by_id[base_path_id] = prepared
prepared_by_old_path.setdefault(old_path, []).append(prepared)
if first_prepared is None:
first_prepared = prepared
item["path"] = local_root_string
for asset in shared_assets:
if asset.owner_type != owner_type or _asset_owner_id(asset) != owner_id:
continue
relative_path = _relative_asset_path(asset, logical_root)
if not relative_path:
continue
target = _safe_local_target(local_root, relative_path)
target.parent.mkdir(parents=True, exist_ok=True)
if include_bytes:
data, version, blob = read_asset_bytes(session, asset)
target.write_bytes(data)
else:
version, blob = current_version_and_blob(session, asset)
target.touch()
local_key = str(target.resolve())
manifest[local_key] = ManagedAttachmentFile(
local_path=local_key,
asset_id=asset.id,
version_id=version.id,
blob_id=blob.id,
display_path=asset.display_path,
relative_path=normalize_logical_path(relative_path),
filename=asset.filename,
owner_type=asset.owner_type,
owner_id=owner_id,
checksum_sha256=blob.checksum_sha256,
size_bytes=blob.size_bytes,
content_type=blob.content_type,
)
for rule in _iter_rule_dicts(attachments, prepared_json):
selected = _selected_base_path(rule, prepared_by_id, prepared_by_old_path, first_prepared)
if selected is None:
continue
base_path_id, local_root_string = selected
rule["base_path_id"] = base_path_id
rule["base_dir"] = local_root_string
if first_prepared is not None:
attachments["base_path"] = first_prepared[1]
snapshot_path = destination / "campaign.json"
snapshot_path.write_text(json.dumps(prepared_json, ensure_ascii=False, indent=2), encoding="utf-8")
return PreparedCampaignSnapshot(
path=snapshot_path,
raw_json=prepared_json,
managed_files_by_local_path=manifest,
shared_assets=shared_assets,
)
@contextmanager
def prepared_campaign_snapshot(
session: Session,
*,
tenant_id: str,
campaign_id: str,
raw_json: dict[str, Any],
include_bytes: bool,
prefix: str = "multimailer-managed-campaign-",
) -> Iterator[PreparedCampaignSnapshot]:
temp_dir = Path(tempfile.mkdtemp(prefix=prefix))
try:
yield prepare_campaign_snapshot(
session,
tenant_id=tenant_id,
campaign_id=campaign_id,
raw_json=raw_json,
destination=temp_dir,
include_bytes=include_bytes,
)
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
def managed_match_payloads(
matches: list[str],
manifest: dict[str, ManagedAttachmentFile],
) -> list[dict[str, Any]]:
payloads: list[dict[str, Any]] = []
for match in matches:
item = manifest.get(str(Path(match).resolve()))
if item is not None:
payloads.append(item.as_dict())
return payloads
def annotate_built_messages_with_managed_files(
built_messages: list[Any],
manifest: dict[str, ManagedAttachmentFile],
) -> None:
"""Attach exact managed-file identities to built attachment summaries."""
for built in built_messages:
draft = getattr(built, "draft", None)
for attachment in getattr(draft, "attachments", []) if draft is not None else []:
matches = list(getattr(attachment, "matches", []) or [])
attachment.managed_matches = managed_match_payloads(matches, manifest)

View File

@@ -4,7 +4,7 @@ from pathlib import PurePosixPath
from sqlalchemy.orm import Session
from app.db.models import CampaignAttachmentUse, CampaignJob, FileAsset
from app.db.models import CampaignAttachmentUse, CampaignJob, FileAsset, FileBlob, FileVersion
from app.storage.common import utcnow
from app.storage.files import current_version_and_blob, list_assets_for_user
@@ -17,17 +17,60 @@ def _candidate_match_keys(raw_match: str) -> set[str]:
return {item for item in result if item}
def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob, *, stage: str = "built") -> None:
"""Create best-effort immutable file-use records for matched managed files.
def _add_use(
session: Session,
job: CampaignJob,
*,
asset: FileAsset,
version: FileVersion,
blob: FileBlob,
filename_used: str,
stage: str,
) -> None:
exists = (
session.query(CampaignAttachmentUse)
.filter(
CampaignAttachmentUse.campaign_job_id == job.id,
CampaignAttachmentUse.file_version_id == version.id,
CampaignAttachmentUse.filename_used == filename_used,
CampaignAttachmentUse.use_stage == stage,
)
.one_or_none()
)
if exists:
return
session.add(
CampaignAttachmentUse(
tenant_id=job.tenant_id,
campaign_id=job.campaign_id,
campaign_version_id=job.campaign_version_id,
campaign_job_id=job.id,
entry_index=job.entry_index,
entry_id=job.entry_id,
file_asset_id=asset.id,
file_version_id=version.id,
file_blob_id=blob.id,
filename_used=filename_used,
checksum_sha256=blob.checksum_sha256,
size_bytes=blob.size_bytes,
content_type=blob.content_type,
use_stage=stage,
)
)
Existing attachment resolution is still filesystem/path based. This bridge
records uses when a resolved attachment match can be tied to a managed file
by logical path or filename among files shared with the campaign.
def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob, *, stage: str = "built") -> None:
"""Record immutable managed file versions used by a built/sent job.
New builds carry exact managed asset/version/blob IDs. Filename matching is
retained only as a compatibility fallback for jobs created before managed
attachment materialization was introduced.
"""
attachments = job.resolved_attachments or []
if not isinstance(attachments, list):
return
assets = list_assets_for_user(
session,
tenant_id=job.tenant_id,
@@ -35,12 +78,40 @@ def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob,
campaign_id=job.campaign_id,
is_admin=True,
)
assets_by_id = {asset.id: asset for asset in assets}
for attachment in attachments:
if not isinstance(attachment, dict):
continue
managed_matches = attachment.get("managed_matches")
if isinstance(managed_matches, list):
for item in managed_matches:
if not isinstance(item, dict):
continue
asset = assets_by_id.get(str(item.get("asset_id") or ""))
version = session.get(FileVersion, str(item.get("version_id") or ""))
blob = session.get(FileBlob, str(item.get("blob_id") or ""))
if not asset or not version or not blob:
continue
if version.file_asset_id != asset.id or version.blob_id != blob.id:
continue
_add_use(
session,
job,
asset=asset,
version=version,
blob=blob,
filename_used=str(item.get("filename") or asset.filename),
stage=stage,
)
# Compatibility fallback for older job snapshots without managed_matches.
by_key: dict[str, FileAsset] = {}
for asset in assets:
by_key[asset.display_path.strip("/")] = asset
by_key[asset.filename] = asset
by_key.setdefault(asset.filename, asset)
for attachment in attachments:
if not isinstance(attachment, dict):
if not isinstance(attachment, dict) or attachment.get("managed_matches"):
continue
matches = attachment.get("matches") if isinstance(attachment.get("matches"), list) else []
for raw in matches:
@@ -50,35 +121,14 @@ def record_campaign_attachment_uses_for_job(session: Session, job: CampaignJob,
if not asset:
continue
version, blob = current_version_and_blob(session, asset)
exists = (
session.query(CampaignAttachmentUse)
.filter(
CampaignAttachmentUse.campaign_job_id == job.id,
CampaignAttachmentUse.file_version_id == version.id,
CampaignAttachmentUse.filename_used == asset.filename,
CampaignAttachmentUse.use_stage == stage,
)
.one_or_none()
)
if exists:
continue
session.add(
CampaignAttachmentUse(
tenant_id=job.tenant_id,
campaign_id=job.campaign_id,
campaign_version_id=job.campaign_version_id,
campaign_job_id=job.id,
entry_index=job.entry_index,
entry_id=job.entry_id,
file_asset_id=asset.id,
file_version_id=version.id,
file_blob_id=blob.id,
filename_used=asset.filename,
checksum_sha256=blob.checksum_sha256,
size_bytes=blob.size_bytes,
content_type=blob.content_type,
use_stage=stage,
)
_add_use(
session,
job,
asset=asset,
version=version,
blob=blob,
filename_used=asset.filename,
stage=stage,
)