from __future__ import annotations import copy import json import shutil import tempfile from contextlib import contextmanager from dataclasses import asdict, dataclass from pathlib import Path, PurePosixPath from typing import Any, Iterator from sqlalchemy.orm import Session from app.db.models import FileAsset from app.storage.files import current_version_and_blob, list_assets_for_user, read_asset_bytes from app.storage.paths import normalize_folder, normalize_logical_path, safe_storage_component MANAGED_SOURCE_PREFIX = "managed:" @dataclass(frozen=True, slots=True) class ManagedAttachmentFile: local_path: str asset_id: str version_id: str blob_id: str display_path: str relative_path: str filename: str owner_type: str owner_id: str checksum_sha256: str size_bytes: int content_type: str | None def as_dict(self) -> dict[str, Any]: payload = asdict(self) payload.pop("local_path", None) return payload @dataclass(slots=True) class PreparedCampaignSnapshot: path: Path raw_json: dict[str, Any] managed_files_by_local_path: dict[str, ManagedAttachmentFile] shared_assets: list[FileAsset] def parse_managed_source(value: object) -> tuple[str, str] | None: if not isinstance(value, str) or not value.startswith(MANAGED_SOURCE_PREFIX): return None parts = value.split(":", 2) if len(parts) != 3 or parts[1] not in {"user", "group"} or not parts[2].strip(): return None return parts[1], parts[2].strip() def _asset_owner_id(asset: FileAsset) -> str | None: if asset.owner_type == "user": return asset.owner_user_id if asset.owner_type == "group": return asset.owner_group_id return None def _relative_asset_path(asset: FileAsset, logical_root: str) -> str | None: display_path = normalize_logical_path(asset.display_path) root = normalize_folder(logical_root) if not root: return display_path prefix = f"{root}/" if not display_path.startswith(prefix): return None return display_path[len(prefix) :] def _safe_local_target(root: Path, relative_path: str) -> Path: parts = PurePosixPath(normalize_logical_path(relative_path)).parts if not parts or any(part in {"", ".", ".."} for part in parts): raise ValueError(f"Unsafe managed attachment path: {relative_path!r}") target = root.joinpath(*parts).resolve() resolved_root = root.resolve() if not target.is_relative_to(resolved_root): raise ValueError(f"Managed attachment path escapes materialization root: {relative_path!r}") return target def _iter_rule_dicts(attachments: dict[str, Any], raw_json: dict[str, Any]): global_rules = attachments.get("global") if isinstance(global_rules, list): for rule in global_rules: if isinstance(rule, dict): yield rule entries = raw_json.get("entries") inline = entries.get("inline") if isinstance(entries, dict) else None if isinstance(inline, list): for entry in inline: if not isinstance(entry, dict): continue rules = entry.get("attachments") if isinstance(rules, list): for rule in rules: if isinstance(rule, dict): yield rule def _selected_base_path( rule: dict[str, Any], prepared_by_id: dict[str, tuple[str, str]], prepared_by_old_path: dict[str, list[tuple[str, str]]], first_prepared: tuple[str, str] | None, ) -> tuple[str, str] | None: base_path_id = str(rule.get("base_path_id") or "").strip() if base_path_id and base_path_id in prepared_by_id: return prepared_by_id[base_path_id] base_dir = str(rule.get("base_dir") or ".").strip() or "." candidates = prepared_by_old_path.get(base_dir) if candidates: return candidates[0] if base_dir in {"", "."}: return first_prepared return None def prepare_campaign_snapshot( session: Session, *, tenant_id: str, campaign_id: str, raw_json: dict[str, Any], destination: Path, include_bytes: bool, ) -> PreparedCampaignSnapshot: """Create a temporary file-oriented campaign snapshot for managed attachments. The existing mailer resolver deliberately remains file-oriented. Managed campaign-shared file versions are materialized into an isolated tree and a copied campaign JSON is rewritten to point to those directories. The returned manifest preserves exact asset/version/blob identity so build and audit code never has to guess by filename. """ destination = destination.expanduser().resolve() destination.mkdir(parents=True, exist_ok=True) materialized_root = destination / "managed-attachments" materialized_root.mkdir(parents=True, exist_ok=True) prepared_json = copy.deepcopy(raw_json if isinstance(raw_json, dict) else {}) attachments = prepared_json.get("attachments") if not isinstance(attachments, dict): attachments = {} prepared_json["attachments"] = attachments base_paths = attachments.get("base_paths") if not isinstance(base_paths, list): base_paths = [] shared_assets = list_assets_for_user( session, tenant_id=tenant_id, user_id="", campaign_id=campaign_id, is_admin=True, ) manifest: dict[str, ManagedAttachmentFile] = {} prepared_by_id: dict[str, tuple[str, str]] = {} prepared_by_old_path: dict[str, list[tuple[str, str]]] = {} first_prepared: tuple[str, str] | None = None for index, item in enumerate(base_paths): if not isinstance(item, dict): continue parsed_source = parse_managed_source(item.get("source")) if parsed_source is None: continue owner_type, owner_id = parsed_source old_path = str(item.get("path") or ".").strip() or "." logical_root = "" if old_path in {"", ".", "/"} else normalize_folder(old_path) base_path_id = str(item.get("id") or f"base-path-{index + 1}") local_root = materialized_root / f"{index + 1:03d}-{safe_storage_component(base_path_id)}" local_root.mkdir(parents=True, exist_ok=True) local_root_string = str(local_root.resolve()) prepared = (base_path_id, local_root_string) prepared_by_id[base_path_id] = prepared prepared_by_old_path.setdefault(old_path, []).append(prepared) if first_prepared is None: first_prepared = prepared item["path"] = local_root_string for asset in shared_assets: if asset.owner_type != owner_type or _asset_owner_id(asset) != owner_id: continue relative_path = _relative_asset_path(asset, logical_root) if not relative_path: continue target = _safe_local_target(local_root, relative_path) target.parent.mkdir(parents=True, exist_ok=True) if include_bytes: data, version, blob = read_asset_bytes(session, asset) target.write_bytes(data) else: version, blob = current_version_and_blob(session, asset) target.touch() local_key = str(target.resolve()) manifest[local_key] = ManagedAttachmentFile( local_path=local_key, asset_id=asset.id, version_id=version.id, blob_id=blob.id, display_path=asset.display_path, relative_path=normalize_logical_path(relative_path), filename=asset.filename, owner_type=asset.owner_type, owner_id=owner_id, checksum_sha256=blob.checksum_sha256, size_bytes=blob.size_bytes, content_type=blob.content_type, ) for rule in _iter_rule_dicts(attachments, prepared_json): selected = _selected_base_path(rule, prepared_by_id, prepared_by_old_path, first_prepared) if selected is None: continue base_path_id, local_root_string = selected rule["base_path_id"] = base_path_id rule["base_dir"] = local_root_string if first_prepared is not None: attachments["base_path"] = first_prepared[1] snapshot_path = destination / "campaign.json" snapshot_path.write_text(json.dumps(prepared_json, ensure_ascii=False, indent=2), encoding="utf-8") return PreparedCampaignSnapshot( path=snapshot_path, raw_json=prepared_json, managed_files_by_local_path=manifest, shared_assets=shared_assets, ) @contextmanager def prepared_campaign_snapshot( session: Session, *, tenant_id: str, campaign_id: str, raw_json: dict[str, Any], include_bytes: bool, prefix: str = "multimailer-managed-campaign-", ) -> Iterator[PreparedCampaignSnapshot]: temp_dir = Path(tempfile.mkdtemp(prefix=prefix)) try: yield prepare_campaign_snapshot( session, tenant_id=tenant_id, campaign_id=campaign_id, raw_json=raw_json, destination=temp_dir, include_bytes=include_bytes, ) finally: shutil.rmtree(temp_dir, ignore_errors=True) def managed_match_payloads( matches: list[str], manifest: dict[str, ManagedAttachmentFile], ) -> list[dict[str, Any]]: payloads: list[dict[str, Any]] = [] for match in matches: item = manifest.get(str(Path(match).resolve())) if item is not None: payloads.append(item.as_dict()) return payloads def public_attachment_summary_payload(value: Any) -> dict[str, Any]: """Return an attachment summary without temporary materialization paths. Managed builds use isolated local directories internally. Queue, review and audit payloads must expose the stable managed paths and immutable IDs instead of those temporary paths. Legacy filesystem attachments remain unchanged for backwards compatibility. """ if hasattr(value, "model_dump"): payload = value.model_dump(mode="json") elif isinstance(value, dict): payload = copy.deepcopy(value) else: return {} managed_matches = payload.get("managed_matches") if not isinstance(managed_matches, list) or not managed_matches: return payload logical_matches: list[str] = [] for item in managed_matches: if not isinstance(item, dict): continue display_path = str(item.get("display_path") or item.get("relative_path") or item.get("filename") or "").strip() if display_path: logical_matches.append(display_path) payload["matches"] = logical_matches # These values point into a deleted temporary materialization directory. # The named source plus managed match metadata are the stable references. payload["base_path"] = None payload["directory"] = payload.get("base_path_name") or "managed" return payload def annotate_built_messages_with_managed_files( built_messages: list[Any], manifest: dict[str, ManagedAttachmentFile], ) -> None: """Attach exact managed-file identities to built attachment summaries.""" for built in built_messages: draft = getattr(built, "draft", None) for attachment in getattr(draft, "attachments", []) if draft is not None else []: matches = list(getattr(attachment, "matches", []) or []) attachment.managed_matches = managed_match_payloads(matches, manifest)