v0.3.1 with Saxon fully working

This commit is contained in:
2026-06-05 03:19:04 +02:00
parent fc828363f0
commit 71f2f3d44b
51 changed files with 12683 additions and 1 deletions

20
src/transform/hash.ts Normal file
View File

@@ -0,0 +1,20 @@
export async function createSha256Hash(text: string): Promise<string> {
if (!window.crypto?.subtle) {
return createFallbackHash(text);
}
const data = new TextEncoder().encode(text);
const digest = await window.crypto.subtle.digest('SHA-256', data);
const bytes = Array.from(new Uint8Array(digest));
return bytes.map((byte) => byte.toString(16).padStart(2, '0')).join('');
}
function createFallbackHash(text: string): string {
let hash = 0;
for (let index = 0; index < text.length; index += 1) {
hash = (hash << 5) - hash + text.charCodeAt(index);
hash |= 0;
}
return `fallback-${Math.abs(hash).toString(16)}`;
}

View File

@@ -0,0 +1,69 @@
import { createSha256Hash } from './hash';
import { serializeResultDocument } from './serialization';
import type {
TransformEngine,
TransformRequest,
TransformResult,
TransformationRun,
} from './transformTypes';
import { parseXmlDocument } from '../validation/xmlValidation';
import { validateXslt } from '../validation/xsltValidation';
import { hasErrors } from '../validation/validationTypes';
export const nativeXsltEngine: TransformEngine = {
id: 'native-xsltprocessor',
label: 'Native browser XSLTProcessor',
supportsXsltVersions: ['1.0'],
async transform(request: TransformRequest): Promise<TransformResult> {
const xmlParse = parseXmlDocument(request.xmlText, 'XML input');
const xsltDiagnostics = validateXslt(request.xsltText, request.engine);
const diagnostics = [...xmlParse.diagnostics, ...xsltDiagnostics];
if (hasErrors(diagnostics)) {
return {
output: '',
diagnostics,
engine: request.engine,
transformedAt: new Date().toISOString(),
};
}
const stylesheetParse = parseXmlDocument(
request.xsltText,
'XSLT stylesheet'
);
const processor = new XSLTProcessor();
processor.importStylesheet(stylesheetParse.document);
const resultDocument = processor.transformToDocument(xmlParse.document);
const output = serializeResultDocument(resultDocument);
return {
output,
diagnostics,
engine: request.engine,
transformedAt: new Date().toISOString(),
};
},
};
export async function createTransformationRun(
request: TransformRequest,
output: string
): Promise<TransformationRun> {
const [xmlInputHash, xsltHash, outputHash] = await Promise.all([
createSha256Hash(request.xmlText),
createSha256Hash(request.xsltText),
createSha256Hash(output),
]);
return {
engine: request.engine,
transformedAt: new Date().toISOString(),
xmlInputHash,
xsltHash,
outputHash,
outputLength: output.length,
};
}

View File

@@ -0,0 +1,309 @@
type SefNode = Record<string, unknown> & {
N?: string;
C?: SefNode[];
firstChild?: unknown;
parentNode?: SefNode;
};
type SaxonXdmMap = {
inSituPut: (key: unknown, value: unknown[]) => void;
};
const SAXON_SCRIPT_URL = '/vendor/saxon/SaxonJS2.js';
let saxonLoadPromise: Promise<void> | null = null;
type SaxonPrivateRuntime = {
getPlatform?: () => {
resource?: (name: string) => unknown;
parseXmlFromString?: (text: string) => unknown;
};
checkOptions?: (options: Record<string, unknown>) => Record<string, unknown>;
internalTransform?: (
stylesheetInternal: unknown,
source: unknown,
checkedOptions: Record<string, unknown>
) => void;
getResource?: (options: {
file?: string;
location?: string;
text?: string;
type?: 'xml' | 'json' | string;
}) => Promise<unknown>;
XPath?: {
sefToJSON?: (node: unknown, keepDebug: boolean) => unknown;
};
XS?: {
QName?: {
fromParts: (prefix: string, uri: string, local: string) => unknown;
};
};
XdmMap?: new () => SaxonXdmMap;
transform?: (
options: Record<string, unknown>,
execution?: 'sync' | 'async'
) => Promise<{ principalResult?: unknown }>;
};
async function getSaxon(): Promise<SaxonPrivateRuntime> {
await ensureSaxonLoaded();
const saxon = window.SaxonJS as SaxonPrivateRuntime | undefined;
if (!saxon) {
throw new Error('window.SaxonJS is not loaded.');
}
return saxon;
}
async function ensureSaxonLoaded(): Promise<void> {
if (window.SaxonJS) return;
saxonLoadPromise ??= new Promise<void>((resolve, reject) => {
const existingScript = document.querySelector<HTMLScriptElement>(
`script[src="${SAXON_SCRIPT_URL}"]`
);
if (existingScript) {
existingScript.addEventListener('load', () => resolve(), { once: true });
existingScript.addEventListener(
'error',
() => reject(new Error('Failed to load SaxonJS2.js.')),
{ once: true }
);
return;
}
const script = document.createElement('script');
script.src = SAXON_SCRIPT_URL;
script.async = true;
script.onload = () => resolve();
script.onerror = () => reject(new Error('Failed to load SaxonJS2.js.'));
document.head.appendChild(script);
});
await saxonLoadPromise;
}
function addParentPointers(node: SefNode): void {
node.C?.forEach((child) => {
child.parentNode = node;
addParentPointers(child);
});
}
function checksum(sef: SefNode): void {
function hashString(value: string, seed: number): number {
let current = seed << 8;
for (let index = 0; index < value.length; index += 1) {
current = (current << 1) + value.charCodeAt(index);
}
return current;
}
function hashPair(name: string, uri: string, seed: number): number {
return hashString(name, seed) ^ hashString(uri, seed);
}
let hash = 0;
let counter = 0;
function visit(node: SefNode): void {
hash ^= hashPair(
String(node.N ?? ''),
'http://ns.saxonica.com/xslt/export',
counter++
);
for (const [key, value] of Object.entries(node)) {
if (key !== 'N' && key !== 'C' && key !== String.fromCharCode(931)) {
hash ^= hashPair(key, '', counter);
hash ^= hashString(String(value), counter);
}
}
node.C?.forEach((child) => visit(child));
hash ^= 1;
}
visit(sef);
sef[String.fromCharCode(931)] = (
hash < 0 ? 4294967295 + hash + 1 : hash
).toString(16);
}
function getFirstPrincipalNode(principalResult: unknown): SefNode {
const value = Array.isArray(principalResult)
? principalResult[0]
: principalResult;
if (!value) {
throw new Error('The SaxonJS compiler returned no principal result.');
}
return value as SefNode;
}
function getSyntheticStylesheetBaseUri(): string {
return new URL(
'/__xsl-tools__/in-memory-stylesheet.xsl',
window.location.href
).href;
}
function serializeSaxonResult(value: unknown): string {
if (typeof value === 'string') {
return value;
}
if (value instanceof XMLDocument || value instanceof Document) {
return new XMLSerializer().serializeToString(value);
}
if (value instanceof Element) {
return new XMLSerializer().serializeToString(value);
}
if (value instanceof DocumentFragment) {
const container = document.createElement('div');
container.append(
...Array.from(value.childNodes).map((node) => node.cloneNode(true))
);
return container.innerHTML;
}
return String(value ?? '');
}
async function parseXmlForSaxon(
saxon: SaxonPrivateRuntime,
xmlText: string
): Promise<Record<string, unknown>> {
const baseUri = getSyntheticStylesheetBaseUri();
if (saxon.getResource) {
try {
const parsed = (await saxon.getResource({
text: xmlText,
type: 'xml',
})) as Record<string, unknown>;
parsed._saxonBaseUri = baseUri;
parsed._saxonDocUri = baseUri;
return parsed;
} catch {
// Some browser/runtime combinations only support parsing through the platform adapter.
}
}
const parsed = saxon.getPlatform?.().parseXmlFromString?.(xmlText) as
| Record<string, unknown>
| undefined;
if (!parsed) {
throw new Error(
'Could not parse XML for SaxonJS: neither getResource({ text }) nor platform.parseXmlFromString(...) is available.'
);
}
parsed._saxonBaseUri = baseUri;
parsed._saxonDocUri = baseUri;
return parsed;
}
export async function compileXsltTextToSefJson(
xsltText: string,
compilerOptions: Record<string, unknown> = {}
): Promise<string> {
const saxon = await getSaxon();
const platform = saxon.getPlatform?.();
if (!platform?.resource) {
throw new Error('SaxonJS platform.resource(...) is not available.');
}
if (!saxon.checkOptions || !saxon.internalTransform) {
throw new Error(
'SaxonJS internal compiler APIs are not available: checkOptions/internalTransform missing.'
);
}
if (!saxon.XPath?.sefToJSON) {
throw new Error('SaxonJS.XPath.sefToJSON(...) is not available.');
}
if (!saxon.XdmMap || !saxon.XS?.QName?.fromParts) {
throw new Error('SaxonJS XdmMap/QName APIs are not available.');
}
const compiler = platform.resource('compiler') as SefNode | undefined;
if (!compiler || compiler.N !== 'package') {
throw new Error(
'SaxonJS compiler resource is not available. You are probably loading SaxonJS2.rt.js, which contains only the runtime. Use the full SaxonJS2.js browser file for dynamic XSLT-to-SEF compilation.'
);
}
addParentPointers(compiler);
const stylesheetParams = new saxon.XdmMap();
const staticParameters = new saxon.XdmMap();
stylesheetParams.inSituPut(
saxon.XS.QName.fromParts('', '', 'staticParameters'),
[staticParameters]
);
const source = await parseXmlForSaxon(saxon, xsltText);
const options = {
destination: 'application',
initialMode: 'compile-complete',
isDynamicStylesheet: true,
templateParams: {
'Q{}stylesheet-base-uri': getSyntheticStylesheetBaseUri(),
'Q{}options': {
noXPath: false,
...compilerOptions,
},
},
stylesheetParams,
stylesheetInternal: compiler,
outputProperties: {},
};
if (compiler.relocatable === 'true') {
(options as Record<string, unknown>).isRelocatableStylesheet = true;
}
const checkedOptions = saxon.checkOptions(options);
saxon.internalTransform(compiler, source, checkedOptions);
const sefXml = getFirstPrincipalNode(checkedOptions.principalResult);
const sefJson = saxon.XPath.sefToJSON(
sefXml.firstChild ?? sefXml,
false
) as SefNode;
checksum(sefJson);
return JSON.stringify(sefJson);
}
export async function transformXmlWithDynamicSaxon(
xmlText: string,
xsltText: string
): Promise<string> {
const saxon = await getSaxon();
if (!saxon.transform) {
throw new Error('SaxonJS transform API is not available.');
}
const generatedSef = await compileXsltTextToSefJson(xsltText);
const result = await saxon.transform(
{
stylesheetText: generatedSef,
sourceText: xmlText,
destination: 'serialized',
},
'async'
);
return serializeSaxonResult(result.principalResult);
}

View File

@@ -0,0 +1,44 @@
import type {
TransformEngine,
TransformRequest,
TransformResult,
} from './transformTypes';
import { transformXmlWithDynamicSaxon } from './saxonJsDynamicCompiler';
export const saxonJsDynamicEngine: TransformEngine = {
id: 'saxon-js-dynamic',
label: 'SaxonJS 2 dynamic XSLT 3.0',
supportsXsltVersions: ['2.0', '3.0'],
async transform(request: TransformRequest): Promise<TransformResult> {
try {
const output = await transformXmlWithDynamicSaxon(
request.xmlText,
request.xsltText
);
return {
output,
diagnostics: [],
engine: request.engine,
transformedAt: new Date().toISOString(),
};
} catch (error) {
return {
output: '',
diagnostics: [
{
severity: 'error',
source: 'SaxonJS dynamic compiler',
message:
error instanceof Error
? error.message
: 'Dynamic SaxonJS transformation failed.',
},
],
engine: request.engine,
transformedAt: new Date().toISOString(),
};
}
},
};

View File

@@ -0,0 +1,37 @@
export interface SaxonJsTransformResult {
principalResult?: unknown;
resultDocuments?: Record<string, unknown>;
stylesheetInternal?: unknown;
masterDocument?: Document;
}
export interface SaxonJsRuntime {
transform(
options: {
stylesheetText?: string;
stylesheetLocation?: string;
stylesheetInternal?: unknown;
sourceText?: string;
sourceNode?: Node;
destination?: 'serialized' | 'document' | 'raw' | 'application';
logLevel?: number;
},
execution?: 'sync' | 'async'
): SaxonJsTransformResult | Promise<SaxonJsTransformResult>;
serialize?: (
value: unknown,
options?: {
method?: 'xml' | 'html' | 'text' | 'json' | 'adaptive';
indent?: boolean;
}
) => string;
}
declare global {
interface Window {
SaxonJS?: SaxonJsRuntime;
}
}
export {};

View File

@@ -0,0 +1,11 @@
import { prettyPrintXml } from '../validation/xmlValidation';
export function serializeResultDocument(result: Document): string {
const serialized = new XMLSerializer().serializeToString(result);
try {
return prettyPrintXml(serialized);
} catch {
return serialized;
}
}

View File

@@ -0,0 +1,77 @@
import { parseXmlDocument } from '../validation/xmlValidation';
export interface ApproximateTraceItem {
resultPath: string;
likelySourcePath?: string;
likelyTemplate?: string;
confidence: 'low' | 'medium';
}
export function createApproximateTrace(
sourceXml: string,
xslt: string,
outputXml: string
): ApproximateTraceItem[] {
const outputParse = parseXmlDocument(outputXml, 'XML output');
const xsltParse = parseXmlDocument(xslt, 'XSLT stylesheet');
const sourceParse = parseXmlDocument(sourceXml, 'XML input');
if (
outputParse.diagnostics.length > 0 ||
xsltParse.diagnostics.length > 0 ||
sourceParse.diagnostics.length > 0
) {
return [];
}
const templates = Array.from(
xsltParse.document.getElementsByTagNameNS(
'http://www.w3.org/1999/XSL/Transform',
'template'
)
)
.map((template) => template.getAttribute('match'))
.filter(Boolean);
const resultRoot = outputParse.document.documentElement;
if (!resultRoot) return [];
return walkElementPaths(resultRoot)
.slice(0, 20)
.map((path) => {
const localName = path
.split('/')
.pop()
?.replace(/\[\d+\]$/, '');
const likelyTemplate = templates.find((template) => {
if (!template || !localName) return false;
return template === localName || template.endsWith(`/${localName}`);
});
return {
resultPath: path,
likelySourcePath: localName
? `//*[local-name()='${localName}']`
: undefined,
likelyTemplate: likelyTemplate
? `match="${likelyTemplate}"`
: undefined,
confidence: likelyTemplate ? 'medium' : 'low',
};
});
}
function walkElementPaths(root: Element): string[] {
const paths: string[] = [];
const visit = (element: Element, path: string) => {
paths.push(path);
Array.from(element.children).forEach((child, index) => {
visit(child, `${path}/${child.localName}[${index + 1}]`);
});
};
visit(root, `/${root.localName}[1]`);
return paths;
}

View File

@@ -0,0 +1,27 @@
import { nativeXsltEngine } from './nativeXsltEngine';
import { saxonJsDynamicEngine } from './saxonJsDynamicEngine';
import type {
TransformEngine,
TransformEngineId,
TransformRequest,
TransformResult,
} from './transformTypes';
const engines: Record<TransformEngineId, TransformEngine> = {
'saxon-js-dynamic': saxonJsDynamicEngine,
'native-xsltprocessor': nativeXsltEngine,
};
export function getTransformEngine(id: TransformEngineId): TransformEngine {
return engines[id] ?? saxonJsDynamicEngine;
}
export async function runTransformation(
request: TransformRequest
): Promise<TransformResult> {
const engine = getTransformEngine(request.engine);
return engine.transform(request);
}
export const availableTransformEngines = Object.values(engines);

View File

@@ -0,0 +1,32 @@
import type { DiagnosticMessage } from '../validation/validationTypes';
export type TransformEngineId = 'native-xsltprocessor' | 'saxon-js-dynamic';
export interface TransformRequest {
xmlText: string;
xsltText: string;
engine: TransformEngineId;
}
export interface TransformResult {
output: string;
diagnostics: DiagnosticMessage[];
engine: TransformEngineId;
transformedAt: string;
}
export interface TransformationRun {
engine: TransformEngineId;
transformedAt: string;
xmlInputHash: string;
xsltHash: string;
outputHash: string;
outputLength: number;
}
export interface TransformEngine {
id: TransformEngineId;
label: string;
supportsXsltVersions: string[];
transform(request: TransformRequest): Promise<TransformResult>;
}