Auto-commit 2026-04-29 16:31

This commit is contained in:
2026-04-29 16:31:27 -04:00
parent e8687bb6b2
commit 0495ee5bd2
19691 changed files with 3272886 additions and 138 deletions

41
node_modules/@nodable/entities/README.md generated vendored Normal file
View File

@@ -0,0 +1,41 @@
# @nodable/entities
Fast, zero-dependency XML/HTML entity encoder and decoder for Node.js.
## Install
```bash
npm install @nodable/entities
```
## Quick start
```js
import { EntityEncoder, EntityDecoder, ALL_ENTITIES } from '@nodable/entities';
// Encode: plain text → entity references
const enc = new EntityEncoder();
enc.encode('Hello © 2024 & <stuff>');
// → 'Hello &copy; 2024 &amp; &lt;stuff&gt;'
// Decode: entity references → plain text
const dec = new EntityDecoder({ namedEntities: ALL_ENTITIES });
dec.decode('Hello &copy; 2024 &amp; &lt;stuff&gt;');
// → 'Hello © 2024 & <stuff>'
```
## Performance
| | encode | decode |
|---|---|---|
| `entities` (npm) | 3.65 M req/s | 1.76 M req/s |
| `@nodable/entities` | 3.33 M req/s | **5.19 M req/s** |
## Documentation
- [EntityEncoder](docs/EntityEncoder.md) — options, API, recipes
- [EntityDecoder](docs/EntityDecoder.md) — options, API, security limits, entity sets
## License
MIT

54
node_modules/@nodable/entities/package.json generated vendored Normal file
View File

@@ -0,0 +1,54 @@
{
"name": "@nodable/entities",
"version": "2.1.0",
"description": "Entity parser for XML, HTML, External entites with security and NCR control",
"main": "./src/index.js",
"type": "module",
"sideEffects": false,
"types": "./src/index.d.ts",
"scripts": {
"test": "node --experimental-vm-modules node_modules/.bin/jest",
"test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch",
"test:coverage": "node --experimental-vm-modules node_modules/.bin/jest --coverage",
"lint": "eslint src/ test/"
},
"files": [
"src",
"README.md"
],
"repository": {
"type": "git",
"url": "git+https://github.com/nodable/val-parsers.git"
},
"keywords": [
"fast",
"xml",
"html",
"entity",
"encode",
"decode",
"ncr",
"security",
"performance"
],
"author": "Amit Gupta (https://solothought.com)",
"license": "MIT",
"publishConfig": {
"access": "public"
},
"devDependencies": {
"jest": "^29.7.0"
},
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/nodable"
}
],
"jest": {
"testMatch": [
"**/?(*.)+(spec|test).[jt]s?(x)",
"**/*_spec.[jt]s?(x)"
]
}
}

543
node_modules/@nodable/entities/src/EntityDecoder.js generated vendored Normal file
View File

@@ -0,0 +1,543 @@
// ---------------------------------------------------------------------------
// Built-in named entity map (name → replacement string)
// No regex, no {regex,val} objects — just flat key/value pairs.
// ---------------------------------------------------------------------------
import { XML as DEFAULT_XML_ENTITIES } from "./entities.js"
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+');
/**
* Validate that an entity name contains no dangerous characters.
* @param {string} name
* @returns {string} the name, unchanged
* @throws {Error} on invalid characters
*/
function validateEntityName(name) {
if (name[0] === '#') {
throw new Error(`[EntityReplacer] Invalid character '#' in entity name: "${name}"`);
}
for (const ch of name) {
if (SPECIAL_CHARS.has(ch)) {
throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`);
}
}
return name;
}
/**
* Merge one or more entity maps into a flat name→string map.
* Accepts either:
* - plain string values: { amp: '&' }
* - legacy {regex,val} / {regx,val}: { lt: { regex: /.../, val: '<' } }
*
* Values containing '&' are skipped (recursive expansion risk).
*
* @param {...object} maps
* @returns {Record<string, string>}
*/
function mergeEntityMaps(...maps) {
const out = Object.create(null);
for (const map of maps) {
if (!map) continue;
for (const key of Object.keys(map)) {
const raw = map[key];
if (typeof raw === 'string') {
out[key] = raw;
} else if (raw && typeof raw === 'object' && raw.val !== undefined) {
// Legacy {regex,val} or {regx,val} — extract the string val only
const val = raw.val;
if (typeof val === 'string') {
out[key] = val;
}
// function vals are not supported in the scanner — skip
}
}
}
return out;
}
// ---------------------------------------------------------------------------
// applyLimitsTo helpers
// ---------------------------------------------------------------------------
const LIMIT_TIER_EXTERNAL = 'external'; // input/runtime + persistent external maps
const LIMIT_TIER_BASE = 'base'; // DEFAULT_XML_ENTITIES + namedEntities (system) maps
const LIMIT_TIER_ALL = 'all'; // every entity regardless of tier
/**
* Resolve `applyLimitsTo` option into a normalised Set of tier strings.
* Accepted values: 'external' | 'base' | 'all' | string[]
* Default: 'external' (only untrusted injected entities are counted).
* @param {string|string[]|undefined} raw
* @returns {Set<string>}
*/
function parseLimitTiers(raw) {
if (!raw || raw === LIMIT_TIER_EXTERNAL) return new Set([LIMIT_TIER_EXTERNAL]);
if (raw === LIMIT_TIER_ALL) return new Set([LIMIT_TIER_ALL]);
if (raw === LIMIT_TIER_BASE) return new Set([LIMIT_TIER_BASE]);
if (Array.isArray(raw)) return new Set(raw);
return new Set([LIMIT_TIER_EXTERNAL]); // safe default for unrecognised values
}
// ---------------------------------------------------------------------------
// NCR (Numeric Character Reference) classification
// ---------------------------------------------------------------------------
// Severity order — higher number = stricter action.
// Used to enforce minimum action levels for specific codepoint ranges.
const NCR_LEVEL = Object.freeze({ allow: 0, leave: 1, remove: 2, throw: 3 });
// XML 1.0 §2.2: allowed chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
// Restricted C0: U+0001U+001F excluding U+0009, U+000A, U+000D
const XML10_ALLOWED_C0 = new Set([0x09, 0x0A, 0x0D]);
/**
* Parse the `ncr` constructor option into flat, hot-path-friendly fields.
* @param {object|undefined} ncr
* @returns {{ xmlVersion: number, onLevel: number, nullLevel: number }}
*/
function parseNCRConfig(ncr) {
if (!ncr) {
return { xmlVersion: 1.0, onLevel: NCR_LEVEL.allow, nullLevel: NCR_LEVEL.remove };
}
const xmlVersion = ncr.xmlVersion === 1.1 ? 1.1 : 1.0;
const onLevel = NCR_LEVEL[ncr.onNCR] ?? NCR_LEVEL.allow;
const nullLevel = NCR_LEVEL[ncr.nullNCR] ?? NCR_LEVEL.remove;
// 'allow' is not meaningful for null — clamp to at least 'remove'
const clampedNull = Math.max(nullLevel, NCR_LEVEL.remove);
return { xmlVersion, onLevel, nullLevel: clampedNull };
}
// ---------------------------------------------------------------------------
// EntityReplacer
// ---------------------------------------------------------------------------
/**
* Single-pass, zero-regex entity replacer for XML/HTML content.
*
* Algorithm: scan the string once for '&', read to ';', resolve via map
* or direct codepoint conversion, build output chunks, join once at the end.
*
* Entity lookup priority (highest → lowest):
* 1. input / runtime (DOCTYPE entities for current document)
* 2. persistent external (survive across documents)
* 3. base named map (DEFAULT_XML_ENTITIES + user-supplied namedEntities)
*
* Both input and external resolve as the 'external' tier for limit purposes.
* Base map entities resolve as the 'base' tier.
*
* Numeric / hex references (&#NNN; / &#xHH;) are resolved directly via
* String.fromCodePoint() — no map needed. They count as 'base' tier.
*
* @example
* const replacer = new EntityReplacer({ namedEntities: COMMON_HTML });
* replacer.setExternalEntities({ brand: 'Acme' });
*
* const instance = replacer.reset();
* instance.addInputEntities({ version: '1.0' });
* instance.encode('&brand; v&version; &lt;'); // 'Acme v1.0 <'
*/
export default class EntityDecoder {
/**
* @param {object} [options]
* @param {object|null} [options.namedEntities] — extra named entities merged into base map
* @param {object} [options.limit] — security limits
* @param {number} [options.limit.maxTotalExpansions=0] — 0 = unlimited
* @param {number} [options.limit.maxExpandedLength=0] — 0 = unlimited
* @param {'external'|'base'|'all'|string[]} [options.limit.applyLimitsTo='external']
* Which entity tiers count against the security limits:
* - 'external' (default) — only input/runtime + persistent external entities
* - 'base' — only DEFAULT_XML_ENTITIES + namedEntities
* - 'all' — every entity regardless of tier
* - string[] — explicit combination, e.g. ['external', 'base']
* @param {((resolved: string, original: string) => string)|null} [options.postCheck=null]
* @param {string[]} [options.remove=[]] — entity names (e.g. ['nbsp', '#13']) to delete (replace with empty string)
* @param {string[]} [options.leave=[]] — entity names to keep as literal (unchanged in output)
* @param {object} [options.ncr] — Numeric Character Reference controls
* @param {1.0|1.1} [options.ncr.xmlVersion=1.0]
* XML version governing which codepoint ranges are restricted:
* - 1.0 — C0 controls U+0001U+001F (except U+0009/000A/000D) are prohibited
* - 1.1 — C0 controls are allowed when written as NCRs; C1 (U+007FU+009F) decoded as-is
* @param {'allow'|'leave'|'remove'|'throw'} [options.ncr.onNCR='allow']
* Base action for numeric references. Severity order: allow < leave < remove < throw.
* For codepoint ranges that carry a minimum level (surrogates → remove, XML 1.0 C0 → remove),
* the effective action is max(onNCR, rangeMinimum).
* @param {'remove'|'throw'} [options.ncr.nullNCR='remove']
* Action for U+0000 (null). 'allow' and 'leave' are clamped to 'remove' since null is never safe.
*/
constructor(options = {}) {
this._limit = options.limit || {};
this._maxTotalExpansions = this._limit.maxTotalExpansions || 0;
this._maxExpandedLength = this._limit.maxExpandedLength || 0;
this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r;
this._limitTiers = parseLimitTiers(this._limit.applyLimitsTo ?? LIMIT_TIER_EXTERNAL);
this._numericAllowed = options.numericAllowed ?? true;
// Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction.
this._baseMap = mergeEntityMaps(DEFAULT_XML_ENTITIES, options.namedEntities || null);
// Persistent external entities — survive across documents.
// Stored as a separate map so reset() never touches them.
/** @type {Record<string, string>} */
this._externalMap = Object.create(null);
// Input / runtime entities — current document only, wiped on reset().
/** @type {Record<string, string>} */
this._inputMap = Object.create(null);
// Per-document counters
this._totalExpansions = 0;
this._expandedLength = 0;
// --- New: remove / leave sets ---
/** @type {Set<string>} */
this._removeSet = new Set(options.remove && Array.isArray(options.remove) ? options.remove : []);
/** @type {Set<string>} */
this._leaveSet = new Set(options.leave && Array.isArray(options.leave) ? options.leave : []);
// --- NCR config (parsed into flat fields for hot-path speed) ---
const ncrCfg = parseNCRConfig(options.ncr);
this._ncrXmlVersion = ncrCfg.xmlVersion;
this._ncrOnLevel = ncrCfg.onLevel;
this._ncrNullLevel = ncrCfg.nullLevel;
}
// -------------------------------------------------------------------------
// Persistent external entity registration
// -------------------------------------------------------------------------
/**
* Replace the full set of persistent external entities.
* All keys are validated — throws on invalid characters.
* @param {Record<string, string | { regex?: RegExp, val: string }>} map
*/
setExternalEntities(map) {
if (map) {
for (const key of Object.keys(map)) {
validateEntityName(key);
}
}
this._externalMap = mergeEntityMaps(map);
}
/**
* Add a single persistent external entity.
* @param {string} key
* @param {string} value
*/
addExternalEntity(key, value) {
validateEntityName(key);
if (typeof value === 'string' && value.indexOf('&') === -1) {
this._externalMap[key] = value;
}
}
// -------------------------------------------------------------------------
// Input / runtime entity registration (per document)
// -------------------------------------------------------------------------
/**
* Inject DOCTYPE entities for the current document.
* Also resets per-document expansion counters.
* @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string }>} map
*/
addInputEntities(map) {
this._totalExpansions = 0;
this._expandedLength = 0;
this._inputMap = mergeEntityMaps(map);
}
// -------------------------------------------------------------------------
// Per-document reset
// -------------------------------------------------------------------------
/**
* Wipe input/runtime entities and reset counters.
* Call this before processing each new document.
* @returns {this}
*/
reset() {
this._inputMap = Object.create(null);
this._totalExpansions = 0;
this._expandedLength = 0;
return this;
}
// -------------------------------------------------------------------------
// XML version (can be set after construction, e.g. once parser reads <?xml?>)
// -------------------------------------------------------------------------
/**
* Update the XML version used for NCR classification.
* Call this as soon as the document's `<?xml version="...">` declaration is parsed.
* @param {1.0|1.1|number} version
*/
setXmlVersion(version) {
this._ncrXmlVersion = version === 1.1 ? 1.1 : 1.0;
}
// -------------------------------------------------------------------------
// Primary API
// -------------------------------------------------------------------------
/**
* Replace all entity references in `str` in a single pass.
*
* @param {string} str
* @returns {string}
*/
decode(str) {
if (typeof str !== 'string' || str.length === 0) return str;
//TODO: check if needed
//if (str.indexOf('&') === -1) return str; // fast path — no entities at all
const original = str;
const chunks = [];
const len = str.length;
let last = 0; // start of next unprocessed literal chunk
let i = 0;
const limitExpansions = this._maxTotalExpansions > 0;
const limitLength = this._maxExpandedLength > 0;
const checkLimits = limitExpansions || limitLength;
while (i < len) {
// Scan forward to next '&'
if (str.charCodeAt(i) !== 38 /* '&' */) { i++; continue; }
// --- Found '&' at position i ---
// Scan forward to ';'
let j = i + 1;
while (j < len && str.charCodeAt(j) !== 59 /* ';' */ && (j - i) <= 32) j++;
if (j >= len || str.charCodeAt(j) !== 59) {
// No closing ';' within window — treat '&' as literal
i++;
continue;
}
// Raw token between '&' and ';' (exclusive)
const token = str.slice(i + 1, j);
if (token.length === 0) { i++; continue; }
let replacement;
let tier; // which limit tier this entity belongs to
if (this._removeSet.has(token)) {
// Remove entity: replace with empty string
replacement = '';
// If entity was unknown (replacement undefined), we still need a tier for limits.
// Treat as external tier because it's user-directed removal of an unknown reference.
if (tier === undefined) {
tier = LIMIT_TIER_EXTERNAL;
}
} else if (this._leaveSet.has(token)) {
// Do not replace — keep original &token; as literal
i++;
continue;
} else if (token.charCodeAt(0) === 35 /* '#' */) {
// ---- Numeric / NCR reference ----
// NCR classification always runs first — prohibited codepoints must be
// caught regardless of numericAllowed.
const ncrResult = this._resolveNCR(token);
if (ncrResult === undefined) {
// 'leave' action — keep original &token; as-is
i++;
continue;
}
replacement = ncrResult; // '' for remove, char string for allow
tier = LIMIT_TIER_BASE;
} else {
// ---- Named reference ----
const resolved = this._resolveName(token);
replacement = resolved?.value;
tier = resolved?.tier;
}
if (replacement === undefined) {
// Unknown entity — leave as-is, advance past '&' only
i++;
continue;
}
// Flush literal chunk before this entity
if (i > last) chunks.push(str.slice(last, i));
chunks.push(replacement);
last = j + 1; // skip past ';'
i = last;
// Apply expansion limits only if this tier is being tracked
if (checkLimits && this._tierCounts(tier)) {
if (limitExpansions) {
this._totalExpansions++;
if (this._totalExpansions > this._maxTotalExpansions) {
throw new Error(
`[EntityReplacer] Entity expansion count limit exceeded: ` +
`${this._totalExpansions} > ${this._maxTotalExpansions}`
);
}
}
if (limitLength) {
// delta: replacement.length minus the raw &token; length (token.length + 2 for '&' and ';')
const delta = replacement.length - (token.length + 2);
if (delta > 0) {
this._expandedLength += delta;
if (this._expandedLength > this._maxExpandedLength) {
throw new Error(
`[EntityReplacer] Expanded content length limit exceeded: ` +
`${this._expandedLength} > ${this._maxExpandedLength}`
);
}
}
}
}
}
// Flush trailing literal
if (last < len) chunks.push(str.slice(last));
// If nothing was replaced, chunks is empty — return original
const result = chunks.length === 0 ? str : chunks.join('');
return this._postCheck(result, original);
}
// -------------------------------------------------------------------------
// Private: limit tier check
// -------------------------------------------------------------------------
/**
* Returns true if a resolved entity of the given tier should count
* against the expansion/length limits.
* @param {string} tier — LIMIT_TIER_EXTERNAL | LIMIT_TIER_BASE
* @returns {boolean}
*/
_tierCounts(tier) {
if (this._limitTiers.has(LIMIT_TIER_ALL)) return true;
return this._limitTiers.has(tier);
}
// -------------------------------------------------------------------------
// Private: entity resolution
// -------------------------------------------------------------------------
/**
* Resolve a named entity token (without & and ;).
* Priority: inputMap > externalMap > baseMap
* Returns the resolved value tagged with its limit tier.
*
* @param {string} name
* @returns {{ value: string, tier: string }|undefined}
*/
_resolveName(name) {
// input and external both count as 'external' tier for limit purposes —
// they are injected at runtime and are the untrusted surface.
if (name in this._inputMap) return { value: this._inputMap[name], tier: LIMIT_TIER_EXTERNAL };
if (name in this._externalMap) return { value: this._externalMap[name], tier: LIMIT_TIER_EXTERNAL };
if (name in this._baseMap) return { value: this._baseMap[name], tier: LIMIT_TIER_BASE };
return undefined;
}
/**
* Classify a codepoint and return the minimum action level that must be applied.
* Returns -1 when no minimum is imposed (normal allow path).
*
* Ranges checked (in priority order):
* 1. U+0000 — null, governed by nullNCR (always ≥ remove)
* 2. U+D800U+DFFF — surrogates, always prohibited (min: remove)
* 3. U+0001U+001F \ {0x09,0x0A,0x0D} — XML 1.0 restricted C0 (min: remove)
* (skipped in XML 1.1 — C0 controls are allowed when written as NCRs)
*
* @param {number} cp — codepoint
* @returns {number} — minimum NCR_LEVEL value, or -1 for no restriction
*/
_classifyNCR(cp) {
// 1. Null
if (cp === 0) return this._ncrNullLevel;
// 2. Surrogates — always prohibited, minimum 'remove'
if (cp >= 0xD800 && cp <= 0xDFFF) return NCR_LEVEL.remove;
// 3. XML 1.0 restricted C0 controls
if (this._ncrXmlVersion === 1.0) {
if (cp >= 0x01 && cp <= 0x1F && !XML10_ALLOWED_C0.has(cp)) return NCR_LEVEL.remove;
}
return -1; // no restriction
}
/**
* Execute a resolved NCR action.
*
* @param {number} action — NCR_LEVEL value
* @param {string} token — raw token (e.g. '#38') for error messages
* @param {number} cp — codepoint, used only for error messages
* @returns {string|undefined}
* - decoded character string → 'allow'
* - '' → 'remove'
* - undefined → 'leave' (caller must skip past '&' only)
* - throws Error → 'throw'
*/
_applyNCRAction(action, token, cp) {
switch (action) {
case NCR_LEVEL.allow: return String.fromCodePoint(cp);
case NCR_LEVEL.remove: return '';
case NCR_LEVEL.leave: return undefined; // signal: keep literal
case NCR_LEVEL.throw:
throw new Error(
`[EntityDecoder] Prohibited numeric character reference ` +
`&${token}; (U+${cp.toString(16).toUpperCase().padStart(4, '0')})`
);
default: return String.fromCodePoint(cp);
}
}
/**
* Full NCR resolution pipeline for a numeric token.
*
* Steps:
* 1. Parse the codepoint (decimal or hex).
* 2. Validate the raw codepoint range (NaN, <0, >0x10FFFF).
* 3. If numericAllowed is false and no minimum restriction applies → leave as-is.
* 4. Classify the codepoint to find the minimum required action level.
* 5. Resolve effective action = max(onNCR, minimum).
* 6. Apply and return.
*
* @param {string} token — e.g. '#38', '#x26', '#X26'
* @returns {string|undefined}
* - string (incl. '') — replacement ('' = remove)
* - undefined — leave original &token; as-is
*/
_resolveNCR(token) {
// Step 1: parse codepoint
const second = token.charCodeAt(1);
let cp;
if (second === 120 /* x */ || second === 88 /* X */) {
cp = parseInt(token.slice(2), 16);
} else {
cp = parseInt(token.slice(1), 10);
}
// Step 2: out-of-range → leave as-is unconditionally
if (Number.isNaN(cp) || cp < 0 || cp > 0x10FFFF) return undefined;
// Step 3: classify to get minimum action level
const minimum = this._classifyNCR(cp);
// Step 4: if numericAllowed is false and no hard minimum → leave
if (!this._numericAllowed && minimum < NCR_LEVEL.remove) return undefined;
// Step 5: effective action = max(configured onNCR, range minimum)
const effective = minimum === -1
? this._ncrOnLevel
: Math.max(this._ncrOnLevel, minimum);
// Step 6: apply
return this._applyNCRAction(effective, token, cp);
}
}

194
node_modules/@nodable/entities/src/EntityEncoder.js generated vendored Normal file
View File

@@ -0,0 +1,194 @@
// EntityDecoder.js
import { trie1, trie2, trie3 } from './entityTries.js';
// Replacement strings indexed by char code — direct array access, no hashing
const XML_UNSAFE_REPLACEMENT = new Array(128);
XML_UNSAFE_REPLACEMENT[38] = '&amp;'; // &
XML_UNSAFE_REPLACEMENT[60] = '&lt;'; // <
XML_UNSAFE_REPLACEMENT[62] = '&gt;'; // >
XML_UNSAFE_REPLACEMENT[34] = '&quot;'; // "
XML_UNSAFE_REPLACEMENT[39] = '&apos;'; // '
// Typed bitmask for O(1) "is this ASCII code XML-unsafe?" check
const IS_XML_UNSAFE = new Uint8Array(128);
IS_XML_UNSAFE[38] = 1;
IS_XML_UNSAFE[60] = 1;
IS_XML_UNSAFE[62] = 1;
IS_XML_UNSAFE[34] = 1;
IS_XML_UNSAFE[39] = 1;
// Fast pre-scan: bail out immediately if nothing needs encoding
const NEEDS_PROCESSING = /[&<>"'\u0080-\uFFFF]/;
export default class EntityEncoder {
constructor(options = {}) {
this.encodeXmlSafe = options.encodeXmlSafe !== false;
this.encodeAllNamed = options.encodeAllNamed !== false;
this.maxReplacements = options.maxReplacements || 0;
this.replacementsCount = 0;
}
encode(str) {
if (typeof str !== 'string' || str.length === 0) return str;
if (!NEEDS_PROCESSING.test(str)) return str;
const maxRep = this.maxReplacements;
if (maxRep > 0 && this.replacementsCount >= maxRep) return str;
// Hoist to locals — avoids `this` property lookup inside the hot loop
const encodeXmlSafe = this.encodeXmlSafe;
const encodeAllNamed = this.encodeAllNamed;
const len = str.length;
let result = '';
let last = 0;
let i = 0;
let limitReached = false;
// ── Main loop: runs to len-2 so trie3 never needs a bounds check ────────
// The last 2 characters are handled by the tail block below.
const mainEnd = len - 2; // i <= mainEnd guarantees i+1 and i+2 are valid
while (i <= mainEnd && !limitReached) {
const c0 = str.charCodeAt(i);
// ── ASCII branch ───────────────────────────────────────────────────
if (c0 < 128) {
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
last = ++i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
// Bulk-skip: advance to the next interesting position without
// touching the outer loop overhead on every safe character
i++;
while (i <= mainEnd && !limitReached) {
const c = str.charCodeAt(i);
if (c >= 128 || (encodeXmlSafe && IS_XML_UNSAFE[c] === 1)) break;
i++;
}
}
continue;
}
// ── Non-ASCII: integer-keyed trie lookup ───────────────────────────
// No bounds checks needed for c1/c2 because i <= mainEnd guarantees
// i+1 and i+2 are both within the string.
let matchedEntity = null;
let advance = 1;
// Try 3-char match first (longest wins)
const mid3 = trie3.get(c0);
if (mid3 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const inner3 = mid3.get(c1);
if (inner3 !== undefined) {
const c2 = str.charCodeAt(i + 2);
const candidate = inner3.get(c2);
if (candidate !== undefined) { matchedEntity = candidate; advance = 3; }
}
}
// Try 2-char match
if (matchedEntity === null) {
const inner2 = trie2.get(c0);
if (inner2 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const candidate = inner2.get(c1);
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
}
}
// Try 1-char match
if (matchedEntity === null && encodeAllNamed) {
const candidate = trie1.get(c0);
if (candidate !== undefined) { matchedEntity = candidate; }
}
if (matchedEntity !== null) {
result += str.substring(last, i) + matchedEntity;
i += advance;
last = i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
}
// ── Tail: handle the last 1-2 characters (no 3-char match possible) ────
while (i < len && !limitReached) {
const c0 = str.charCodeAt(i);
if (c0 < 128) {
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
last = ++i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
continue;
}
// Non-ASCII tail — only 2-char and 1-char matches are possible here
let matchedEntity = null;
let advance = 1;
if (i + 1 < len) {
const inner2 = trie2.get(c0);
if (inner2 !== undefined) {
const c1 = str.charCodeAt(i + 1);
const candidate = inner2.get(c1);
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
}
}
if (matchedEntity === null && encodeAllNamed) {
const candidate = trie1.get(c0);
if (candidate !== undefined) { matchedEntity = candidate; }
}
if (matchedEntity !== null) {
result += str.substring(last, i) + matchedEntity;
i += advance;
last = i;
if (maxRep > 0) {
this.replacementsCount++;
if (this.replacementsCount >= maxRep) {
limitReached = true;
break;
}
}
} else {
i++;
}
}
// ── Flush any remaining literal suffix ────────────────────────────────
if (last < len) result += str.substring(last);
return result;
}
reset() {
this.replacementsCount = 0;
}
}

1177
node_modules/@nodable/entities/src/entities.js generated vendored Normal file

File diff suppressed because it is too large Load Diff

49
node_modules/@nodable/entities/src/entityTries.js generated vendored Normal file
View File

@@ -0,0 +1,49 @@
// entityTries.js
// Builds integer-keyed tries so the decoder never allocates a string object
// during lookup — every key is a plain charCode number.
//
// trie1: Map<code0, entity>
// trie2: Map<code0, Map<code1, entity>>
// trie3: Map<code0, Map<code1, Map<code2, entity>>>
import { ALL_ENTITIES } from './entities.js';
// Reverse map: character sequence → "&name;"
const CHAR_TO_ENTITY = new Map();
for (const [name, chars] of Object.entries(ALL_ENTITIES)) {
CHAR_TO_ENTITY.set(chars, `&${name};`);
}
export const trie1 = new Map(); // code0 → entity string
export const trie2 = new Map(); // code0 → Map → entity string
export const trie3 = new Map(); // code0 → Map → Map → entity string
for (const [chars, entity] of CHAR_TO_ENTITY) {
const len = chars.length;
if (len === 1) {
const c0 = chars.charCodeAt(0);
// Keep shortest match only if no longer match already claimed this code
// (longer matches are inserted in the same pass so we just overwrite —
// trie1 is only consulted after trie2/trie3 both miss, so no conflict)
trie1.set(c0, entity);
} else if (len === 2) {
const c0 = chars.charCodeAt(0);
const c1 = chars.charCodeAt(1);
let inner = trie2.get(c0);
if (inner === undefined) { inner = new Map(); trie2.set(c0, inner); }
inner.set(c1, entity);
} else if (len === 3) {
const c0 = chars.charCodeAt(0);
const c1 = chars.charCodeAt(1);
const c2 = chars.charCodeAt(2);
let mid = trie3.get(c0);
if (mid === undefined) { mid = new Map(); trie3.set(c0, mid); }
let inner = mid.get(c1);
if (inner === undefined) { inner = new Map(); mid.set(c1, inner); }
inner.set(c2, entity);
}
// HTML5 has no named entity whose character sequence is longer than 3 chars
}

264
node_modules/@nodable/entities/src/index.d.ts generated vendored Normal file
View File

@@ -0,0 +1,264 @@
// ---------------------------------------------------------------------------
// @nodable/entities — TypeScript declarations
// ---------------------------------------------------------------------------
/** A function-based entity replacement value (used for numeric refs). */
export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string;
// ---------------------------------------------------------------------------
// Encoder options
// ---------------------------------------------------------------------------
export interface EntityEncoderOptions {
/**
* Whether to encode XML unsafe characters: `&`, `<`, `>`, `"`, `'`.
* @default true
*/
encodeXmlSafe?: boolean;
/**
* Whether to encode nonASCII characters (e.g. `é` → `&eacute;`) using the
* builtin named entity trie.
* @default true
*/
encodeAllNamed?: boolean;
/**
* Maximum number of replacements performed **cumulatively** across all
* `encode()` calls. `0` means unlimited.
*
* Use `reset()` to reset the internal counter.
* @default 0
*/
maxReplacements?: number;
}
// ---------------------------------------------------------------------------
// EntityEncoder class
// ---------------------------------------------------------------------------
/**
* Highperformance encoder that replaces characters with XML/HTML entities.
*
* - Escapes XML unsafe characters (`&`, `<`, `>`, `"`, `'`) when `encodeXmlSafe` is true.
* - Replaces nonASCII characters (e.g. `é`, `©`) with named entities using
* a compact triebased lookup when `encodeAllNamed` is true.
* - Supports a cumulative replacement limit (`maxReplacements`) that persists
* across multiple `encode()` calls until `reset()` is called.
*
* @example
* const encoder = new EntityEncoder({ encodeXmlSafe: true, encodeAllNamed: true });
* encoder.encode('<foo>'); // "&lt;foo&gt;"
* encoder.encode('© 2025'); // "&copy; 2025"
*
* // With limit
* const limited = new EntityEncoder({ maxReplacements: 2 });
* limited.encode('<>&'); // "&lt;&gt;&" (third replacement omitted)
* limited.reset(); // reset counter
*/
export class EntityEncoder {
constructor(options?: EntityEncoderOptions);
/**
* Encode a string by replacing XMLunsafe characters and (optionally)
* nonASCII characters with named entities.
*
* If `maxReplacements` is set and the cumulative limit has been reached,
* the input string is returned unchanged.
*
* @returns Encoded string (may be identical to input if no replacements needed
* or the limit has been exhausted).
*/
encode(str: string): string;
/**
* Reset the internal replacement counter.
* Does **not** change `encodeXmlSafe`, `encodeAllNamed`, or `maxReplacements`.
*/
reset(): void;
}
// ---------------------------------------------------------------------------
// Constructor options for EntityDecoder (existing)
// ---------------------------------------------------------------------------
/**
* Controls which entity categories count toward the expansion limits.
*
* - `'external'` — only untrusted / injected entities (default)
* - `'base'` — only builtin XML entities + usersupplied `namedEntities`
* - `'all'` — all entities regardless of tier
* - `string[]` — explicit combination, e.g. `['external', 'base']`
*/
export type ApplyLimitsTo = 'external' | 'base' | 'all' | Array<'external' | 'base'>;
export interface EntityDecoderLimitOptions {
/**
* Maximum number of entity references expanded **per document**.
* `0` means unlimited.
* @default 0
*/
maxTotalExpansions?: number;
/**
* Maximum number of characters **added** by entity expansion per document.
* `0` means unlimited.
* @default 0
*/
maxExpandedLength?: number;
/**
* Which entity tiers count toward the expansion limits.
*
* - `'external'` (default) only input/runtime + persistent external entities
* - `'base'` only builtin XML + `namedEntities`
* - `'all'` every entity regardless of tier
* - `string[]` explicit combination, e.g. `['external', 'base']`
*
* @default 'external'
*/
applyLimitsTo?: ApplyLimitsTo;
}
export interface EntityDecoderNCROptions {
/**
* XML version used for NCR classification.
* @default 1.0
*/
xmlVersion?: 1.0 | 1.1;
/**
* Base action for all numeric references.
* @default 'allow'
*/
onNCR?: 'allow' | 'leave' | 'remove' | 'throw';
/**
* Action for null NCR (U+0000).
* @default 'remove'
*/
nullNCR?: 'remove' | 'throw';
}
export interface EntityDecoderOptions {
/**
* Extra named entities merged into the **base map** (trusted, counts as `'base'` tier).
* These are combined with the builtin XML entities (`lt`, `gt`, `quot`, `apos`).
* Values containing `&` are silently skipped to prevent recursive expansion.
*
* @default null
*/
namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null;
/**
* Hook called once on the fully decoded string (after all replacements).
*
* - Receives `(resolved, original)` and **must return a string**.
* - To reject expansion, return `original`.
* - To sanitize, return a cleaned version of `resolved`.
*
* @example
* postCheck: (resolved, original) =>
* /<[a-z]/i.test(resolved) ? original : resolved
*/
postCheck?: ((resolved: string, original: string) => string) | null;
/**
* Whether numeric character references (`&#NNN;`, `&#xHH;`) are allowed.
* @default true
*/
numericAllowed?: boolean;
/**
* Array of entity names or numeric references to leave unexpanded.
* @default []
*/
leave?: string[];
/**
* Array of entity names or numeric references to remove.
* @default []
*/
remove?: string[];
/**
* Security limits for entity expansion.
*/
limit?: EntityDecoderLimitOptions;
/**
* Numeric Character Reference (NCR) policy.
*/
ncr?: EntityDecoderNCROptions;
}
// ---------------------------------------------------------------------------
// EntityDecoder class (default export)
// ---------------------------------------------------------------------------
/**
* Singlepass, zeroregex entity decoder for XML/HTML content.
*
* ## Entity lookup priority (highest → lowest)
* 1. **input / runtime** injected via `addInputEntities()` (DOCTYPE per document)
* 2. **persistent external** set via `setExternalEntities()` / `addExternalEntity()`
* 3. **base map** builtin XML entities + usersupplied `namedEntities`
*
* Numeric references (`&#NNN;`, `&#xHH;`) are resolved directly and count as the `'base'` tier.
*
* @example
* const decoder = new EntityDecoder({
* namedEntities: COMMON_HTML,
* maxTotalExpansions: 100
* });
* decoder.setExternalEntities({ brand: 'Acme' });
*
* decoder.addInputEntities({ version: '1.0' });
* decoder.decode('&brand; v&version; &lt;'); // 'Acme v1.0 <'
*
* decoder.reset(); // clears input entities + counters, keeps external entities
*/
export default class EntityDecoder {
constructor(options?: EntityDecoderOptions);
setExternalEntities(
map: Record<string, string | { regex: RegExp; val: string | EntityValFn }>
): void;
addExternalEntity(key: string, value: string): void;
addInputEntities(
map: Record<
string,
| string
| { regx: RegExp; val: string | EntityValFn }
| { regex: RegExp; val: string | EntityValFn }
>
): void;
reset(): this;
decode(str: string): string;
}
// ---------------------------------------------------------------------------
// Named entity group exports (for use with `namedEntities` option)
// ---------------------------------------------------------------------------
export const COMMON_HTML: Record<string, string>;
export const ALL_ENTITIES: Record<string, string>;
export const XML: Record<string, string>;
export const BASIC_LATIN: Record<string, string>;
export const LATIN_ACCENTS: Record<string, string>;
export const LATIN_EXTENDED: Record<string, string>;
export const GREEK: Record<string, string>;
export const CYRILLIC: Record<string, string>;
export const MATH: Record<string, string>;
export const MATH_ADVANCED: Record<string, string>;
export const ARROWS: Record<string, string>;
export const SHAPES: Record<string, string>;
export const PUNCTUATION: Record<string, string>;
export const CURRENCY: Record<string, string>;
export const FRACTIONS: Record<string, string>;
export const MISC_SYMBOLS: Record<string, string>;

29
node_modules/@nodable/entities/src/index.js generated vendored Normal file
View File

@@ -0,0 +1,29 @@
/**
* @nodable/entities
*
* Standalone, zero-dependency XML/HTML entity replacement.
*
*/
export { default as EntityDecoder } from './EntityDecoder.js';
export {
COMMON_HTML,
XML,
ALL_ENTITIES,
ARROWS,
BASIC_LATIN,
CURRENCY,
MATH,
MATH_ADVANCED,
CYRILLIC,
FRACTIONS,
GREEK,
LATIN_ACCENTS,
LATIN_EXTENDED,
MISC_SYMBOLS,
PUNCTUATION,
SHAPES,
} from './entities.js';
export { default as EntityEncoder } from './EntityEncoder.js';