Auto-commit 2026-04-29 16:31
This commit is contained in:
41
node_modules/@nodable/entities/README.md
generated
vendored
Normal file
41
node_modules/@nodable/entities/README.md
generated
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# @nodable/entities
|
||||
|
||||
Fast, zero-dependency XML/HTML entity encoder and decoder for Node.js.
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
npm install @nodable/entities
|
||||
```
|
||||
|
||||
## Quick start
|
||||
|
||||
```js
|
||||
import { EntityEncoder, EntityDecoder, ALL_ENTITIES } from '@nodable/entities';
|
||||
|
||||
// Encode: plain text → entity references
|
||||
const enc = new EntityEncoder();
|
||||
enc.encode('Hello © 2024 & <stuff>');
|
||||
// → 'Hello © 2024 & <stuff>'
|
||||
|
||||
// Decode: entity references → plain text
|
||||
const dec = new EntityDecoder({ namedEntities: ALL_ENTITIES });
|
||||
dec.decode('Hello © 2024 & <stuff>');
|
||||
// → 'Hello © 2024 & <stuff>'
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
| | encode | decode |
|
||||
|---|---|---|
|
||||
| `entities` (npm) | 3.65 M req/s | 1.76 M req/s |
|
||||
| `@nodable/entities` | 3.33 M req/s | **5.19 M req/s** |
|
||||
|
||||
## Documentation
|
||||
|
||||
- [EntityEncoder](docs/EntityEncoder.md) — options, API, recipes
|
||||
- [EntityDecoder](docs/EntityDecoder.md) — options, API, security limits, entity sets
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
54
node_modules/@nodable/entities/package.json
generated
vendored
Normal file
54
node_modules/@nodable/entities/package.json
generated
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
{
|
||||
"name": "@nodable/entities",
|
||||
"version": "2.1.0",
|
||||
"description": "Entity parser for XML, HTML, External entites with security and NCR control",
|
||||
"main": "./src/index.js",
|
||||
"type": "module",
|
||||
"sideEffects": false,
|
||||
"types": "./src/index.d.ts",
|
||||
"scripts": {
|
||||
"test": "node --experimental-vm-modules node_modules/.bin/jest",
|
||||
"test:watch": "node --experimental-vm-modules node_modules/.bin/jest --watch",
|
||||
"test:coverage": "node --experimental-vm-modules node_modules/.bin/jest --coverage",
|
||||
"lint": "eslint src/ test/"
|
||||
},
|
||||
"files": [
|
||||
"src",
|
||||
"README.md"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/nodable/val-parsers.git"
|
||||
},
|
||||
"keywords": [
|
||||
"fast",
|
||||
"xml",
|
||||
"html",
|
||||
"entity",
|
||||
"encode",
|
||||
"decode",
|
||||
"ncr",
|
||||
"security",
|
||||
"performance"
|
||||
],
|
||||
"author": "Amit Gupta (https://solothought.com)",
|
||||
"license": "MIT",
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
},
|
||||
"devDependencies": {
|
||||
"jest": "^29.7.0"
|
||||
},
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/nodable"
|
||||
}
|
||||
],
|
||||
"jest": {
|
||||
"testMatch": [
|
||||
"**/?(*.)+(spec|test).[jt]s?(x)",
|
||||
"**/*_spec.[jt]s?(x)"
|
||||
]
|
||||
}
|
||||
}
|
||||
543
node_modules/@nodable/entities/src/EntityDecoder.js
generated
vendored
Normal file
543
node_modules/@nodable/entities/src/EntityDecoder.js
generated
vendored
Normal file
@@ -0,0 +1,543 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// Built-in named entity map (name → replacement string)
|
||||
// No regex, no {regex,val} objects — just flat key/value pairs.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
import { XML as DEFAULT_XML_ENTITIES } from "./entities.js"
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const SPECIAL_CHARS = new Set('!?\\\\/[]$%{}^&*()<>|+');
|
||||
|
||||
/**
|
||||
* Validate that an entity name contains no dangerous characters.
|
||||
* @param {string} name
|
||||
* @returns {string} the name, unchanged
|
||||
* @throws {Error} on invalid characters
|
||||
*/
|
||||
function validateEntityName(name) {
|
||||
if (name[0] === '#') {
|
||||
throw new Error(`[EntityReplacer] Invalid character '#' in entity name: "${name}"`);
|
||||
}
|
||||
for (const ch of name) {
|
||||
if (SPECIAL_CHARS.has(ch)) {
|
||||
throw new Error(`[EntityReplacer] Invalid character '${ch}' in entity name: "${name}"`);
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge one or more entity maps into a flat name→string map.
|
||||
* Accepts either:
|
||||
* - plain string values: { amp: '&' }
|
||||
* - legacy {regex,val} / {regx,val}: { lt: { regex: /.../, val: '<' } }
|
||||
*
|
||||
* Values containing '&' are skipped (recursive expansion risk).
|
||||
*
|
||||
* @param {...object} maps
|
||||
* @returns {Record<string, string>}
|
||||
*/
|
||||
function mergeEntityMaps(...maps) {
|
||||
const out = Object.create(null);
|
||||
for (const map of maps) {
|
||||
if (!map) continue;
|
||||
for (const key of Object.keys(map)) {
|
||||
const raw = map[key];
|
||||
if (typeof raw === 'string') {
|
||||
out[key] = raw;
|
||||
} else if (raw && typeof raw === 'object' && raw.val !== undefined) {
|
||||
// Legacy {regex,val} or {regx,val} — extract the string val only
|
||||
const val = raw.val;
|
||||
if (typeof val === 'string') {
|
||||
out[key] = val;
|
||||
}
|
||||
// function vals are not supported in the scanner — skip
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// applyLimitsTo helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const LIMIT_TIER_EXTERNAL = 'external'; // input/runtime + persistent external maps
|
||||
const LIMIT_TIER_BASE = 'base'; // DEFAULT_XML_ENTITIES + namedEntities (system) maps
|
||||
const LIMIT_TIER_ALL = 'all'; // every entity regardless of tier
|
||||
|
||||
/**
|
||||
* Resolve `applyLimitsTo` option into a normalised Set of tier strings.
|
||||
* Accepted values: 'external' | 'base' | 'all' | string[]
|
||||
* Default: 'external' (only untrusted injected entities are counted).
|
||||
* @param {string|string[]|undefined} raw
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function parseLimitTiers(raw) {
|
||||
if (!raw || raw === LIMIT_TIER_EXTERNAL) return new Set([LIMIT_TIER_EXTERNAL]);
|
||||
if (raw === LIMIT_TIER_ALL) return new Set([LIMIT_TIER_ALL]);
|
||||
if (raw === LIMIT_TIER_BASE) return new Set([LIMIT_TIER_BASE]);
|
||||
if (Array.isArray(raw)) return new Set(raw);
|
||||
return new Set([LIMIT_TIER_EXTERNAL]); // safe default for unrecognised values
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// NCR (Numeric Character Reference) classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Severity order — higher number = stricter action.
|
||||
// Used to enforce minimum action levels for specific codepoint ranges.
|
||||
const NCR_LEVEL = Object.freeze({ allow: 0, leave: 1, remove: 2, throw: 3 });
|
||||
|
||||
// XML 1.0 §2.2: allowed chars are #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
|
||||
// Restricted C0: U+0001–U+001F excluding U+0009, U+000A, U+000D
|
||||
const XML10_ALLOWED_C0 = new Set([0x09, 0x0A, 0x0D]);
|
||||
|
||||
/**
|
||||
* Parse the `ncr` constructor option into flat, hot-path-friendly fields.
|
||||
* @param {object|undefined} ncr
|
||||
* @returns {{ xmlVersion: number, onLevel: number, nullLevel: number }}
|
||||
*/
|
||||
function parseNCRConfig(ncr) {
|
||||
if (!ncr) {
|
||||
return { xmlVersion: 1.0, onLevel: NCR_LEVEL.allow, nullLevel: NCR_LEVEL.remove };
|
||||
}
|
||||
const xmlVersion = ncr.xmlVersion === 1.1 ? 1.1 : 1.0;
|
||||
const onLevel = NCR_LEVEL[ncr.onNCR] ?? NCR_LEVEL.allow;
|
||||
const nullLevel = NCR_LEVEL[ncr.nullNCR] ?? NCR_LEVEL.remove;
|
||||
// 'allow' is not meaningful for null — clamp to at least 'remove'
|
||||
const clampedNull = Math.max(nullLevel, NCR_LEVEL.remove);
|
||||
return { xmlVersion, onLevel, nullLevel: clampedNull };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EntityReplacer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Single-pass, zero-regex entity replacer for XML/HTML content.
|
||||
*
|
||||
* Algorithm: scan the string once for '&', read to ';', resolve via map
|
||||
* or direct codepoint conversion, build output chunks, join once at the end.
|
||||
*
|
||||
* Entity lookup priority (highest → lowest):
|
||||
* 1. input / runtime (DOCTYPE entities for current document)
|
||||
* 2. persistent external (survive across documents)
|
||||
* 3. base named map (DEFAULT_XML_ENTITIES + user-supplied namedEntities)
|
||||
*
|
||||
* Both input and external resolve as the 'external' tier for limit purposes.
|
||||
* Base map entities resolve as the 'base' tier.
|
||||
*
|
||||
* Numeric / hex references (&#NNN; / &#xHH;) are resolved directly via
|
||||
* String.fromCodePoint() — no map needed. They count as 'base' tier.
|
||||
*
|
||||
* @example
|
||||
* const replacer = new EntityReplacer({ namedEntities: COMMON_HTML });
|
||||
* replacer.setExternalEntities({ brand: 'Acme' });
|
||||
*
|
||||
* const instance = replacer.reset();
|
||||
* instance.addInputEntities({ version: '1.0' });
|
||||
* instance.encode('&brand; v&version; <'); // 'Acme v1.0 <'
|
||||
*/
|
||||
export default class EntityDecoder {
|
||||
/**
|
||||
* @param {object} [options]
|
||||
* @param {object|null} [options.namedEntities] — extra named entities merged into base map
|
||||
* @param {object} [options.limit] — security limits
|
||||
* @param {number} [options.limit.maxTotalExpansions=0] — 0 = unlimited
|
||||
* @param {number} [options.limit.maxExpandedLength=0] — 0 = unlimited
|
||||
* @param {'external'|'base'|'all'|string[]} [options.limit.applyLimitsTo='external']
|
||||
* Which entity tiers count against the security limits:
|
||||
* - 'external' (default) — only input/runtime + persistent external entities
|
||||
* - 'base' — only DEFAULT_XML_ENTITIES + namedEntities
|
||||
* - 'all' — every entity regardless of tier
|
||||
* - string[] — explicit combination, e.g. ['external', 'base']
|
||||
* @param {((resolved: string, original: string) => string)|null} [options.postCheck=null]
|
||||
* @param {string[]} [options.remove=[]] — entity names (e.g. ['nbsp', '#13']) to delete (replace with empty string)
|
||||
* @param {string[]} [options.leave=[]] — entity names to keep as literal (unchanged in output)
|
||||
* @param {object} [options.ncr] — Numeric Character Reference controls
|
||||
* @param {1.0|1.1} [options.ncr.xmlVersion=1.0]
|
||||
* XML version governing which codepoint ranges are restricted:
|
||||
* - 1.0 — C0 controls U+0001–U+001F (except U+0009/000A/000D) are prohibited
|
||||
* - 1.1 — C0 controls are allowed when written as NCRs; C1 (U+007F–U+009F) decoded as-is
|
||||
* @param {'allow'|'leave'|'remove'|'throw'} [options.ncr.onNCR='allow']
|
||||
* Base action for numeric references. Severity order: allow < leave < remove < throw.
|
||||
* For codepoint ranges that carry a minimum level (surrogates → remove, XML 1.0 C0 → remove),
|
||||
* the effective action is max(onNCR, rangeMinimum).
|
||||
* @param {'remove'|'throw'} [options.ncr.nullNCR='remove']
|
||||
* Action for U+0000 (null). 'allow' and 'leave' are clamped to 'remove' since null is never safe.
|
||||
*/
|
||||
constructor(options = {}) {
|
||||
this._limit = options.limit || {};
|
||||
this._maxTotalExpansions = this._limit.maxTotalExpansions || 0;
|
||||
this._maxExpandedLength = this._limit.maxExpandedLength || 0;
|
||||
this._postCheck = typeof options.postCheck === 'function' ? options.postCheck : r => r;
|
||||
this._limitTiers = parseLimitTiers(this._limit.applyLimitsTo ?? LIMIT_TIER_EXTERNAL);
|
||||
this._numericAllowed = options.numericAllowed ?? true;
|
||||
// Base map: DEFAULT_XML_ENTITIES + user-supplied extras. Immutable after construction.
|
||||
this._baseMap = mergeEntityMaps(DEFAULT_XML_ENTITIES, options.namedEntities || null);
|
||||
|
||||
// Persistent external entities — survive across documents.
|
||||
// Stored as a separate map so reset() never touches them.
|
||||
/** @type {Record<string, string>} */
|
||||
this._externalMap = Object.create(null);
|
||||
|
||||
// Input / runtime entities — current document only, wiped on reset().
|
||||
/** @type {Record<string, string>} */
|
||||
this._inputMap = Object.create(null);
|
||||
|
||||
// Per-document counters
|
||||
this._totalExpansions = 0;
|
||||
this._expandedLength = 0;
|
||||
|
||||
// --- New: remove / leave sets ---
|
||||
/** @type {Set<string>} */
|
||||
this._removeSet = new Set(options.remove && Array.isArray(options.remove) ? options.remove : []);
|
||||
/** @type {Set<string>} */
|
||||
this._leaveSet = new Set(options.leave && Array.isArray(options.leave) ? options.leave : []);
|
||||
|
||||
// --- NCR config (parsed into flat fields for hot-path speed) ---
|
||||
const ncrCfg = parseNCRConfig(options.ncr);
|
||||
this._ncrXmlVersion = ncrCfg.xmlVersion;
|
||||
this._ncrOnLevel = ncrCfg.onLevel;
|
||||
this._ncrNullLevel = ncrCfg.nullLevel;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Persistent external entity registration
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Replace the full set of persistent external entities.
|
||||
* All keys are validated — throws on invalid characters.
|
||||
* @param {Record<string, string | { regex?: RegExp, val: string }>} map
|
||||
*/
|
||||
setExternalEntities(map) {
|
||||
if (map) {
|
||||
for (const key of Object.keys(map)) {
|
||||
validateEntityName(key);
|
||||
}
|
||||
}
|
||||
this._externalMap = mergeEntityMaps(map);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a single persistent external entity.
|
||||
* @param {string} key
|
||||
* @param {string} value
|
||||
*/
|
||||
addExternalEntity(key, value) {
|
||||
validateEntityName(key);
|
||||
if (typeof value === 'string' && value.indexOf('&') === -1) {
|
||||
this._externalMap[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Input / runtime entity registration (per document)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Inject DOCTYPE entities for the current document.
|
||||
* Also resets per-document expansion counters.
|
||||
* @param {Record<string, string | { regx?: RegExp, regex?: RegExp, val: string }>} map
|
||||
*/
|
||||
addInputEntities(map) {
|
||||
this._totalExpansions = 0;
|
||||
this._expandedLength = 0;
|
||||
this._inputMap = mergeEntityMaps(map);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Per-document reset
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Wipe input/runtime entities and reset counters.
|
||||
* Call this before processing each new document.
|
||||
* @returns {this}
|
||||
*/
|
||||
reset() {
|
||||
this._inputMap = Object.create(null);
|
||||
this._totalExpansions = 0;
|
||||
this._expandedLength = 0;
|
||||
return this;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// XML version (can be set after construction, e.g. once parser reads <?xml?>)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Update the XML version used for NCR classification.
|
||||
* Call this as soon as the document's `<?xml version="...">` declaration is parsed.
|
||||
* @param {1.0|1.1|number} version
|
||||
*/
|
||||
setXmlVersion(version) {
|
||||
this._ncrXmlVersion = version === 1.1 ? 1.1 : 1.0;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Primary API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Replace all entity references in `str` in a single pass.
|
||||
*
|
||||
* @param {string} str
|
||||
* @returns {string}
|
||||
*/
|
||||
decode(str) {
|
||||
if (typeof str !== 'string' || str.length === 0) return str;
|
||||
//TODO: check if needed
|
||||
//if (str.indexOf('&') === -1) return str; // fast path — no entities at all
|
||||
|
||||
const original = str;
|
||||
const chunks = [];
|
||||
const len = str.length;
|
||||
let last = 0; // start of next unprocessed literal chunk
|
||||
let i = 0;
|
||||
|
||||
const limitExpansions = this._maxTotalExpansions > 0;
|
||||
const limitLength = this._maxExpandedLength > 0;
|
||||
const checkLimits = limitExpansions || limitLength;
|
||||
|
||||
while (i < len) {
|
||||
// Scan forward to next '&'
|
||||
if (str.charCodeAt(i) !== 38 /* '&' */) { i++; continue; }
|
||||
|
||||
// --- Found '&' at position i ---
|
||||
|
||||
// Scan forward to ';'
|
||||
let j = i + 1;
|
||||
while (j < len && str.charCodeAt(j) !== 59 /* ';' */ && (j - i) <= 32) j++;
|
||||
|
||||
if (j >= len || str.charCodeAt(j) !== 59) {
|
||||
// No closing ';' within window — treat '&' as literal
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Raw token between '&' and ';' (exclusive)
|
||||
const token = str.slice(i + 1, j);
|
||||
if (token.length === 0) { i++; continue; }
|
||||
|
||||
let replacement;
|
||||
let tier; // which limit tier this entity belongs to
|
||||
|
||||
if (this._removeSet.has(token)) {
|
||||
// Remove entity: replace with empty string
|
||||
replacement = '';
|
||||
// If entity was unknown (replacement undefined), we still need a tier for limits.
|
||||
// Treat as external tier because it's user-directed removal of an unknown reference.
|
||||
if (tier === undefined) {
|
||||
tier = LIMIT_TIER_EXTERNAL;
|
||||
}
|
||||
} else if (this._leaveSet.has(token)) {
|
||||
// Do not replace — keep original &token; as literal
|
||||
i++;
|
||||
continue;
|
||||
} else if (token.charCodeAt(0) === 35 /* '#' */) {
|
||||
// ---- Numeric / NCR reference ----
|
||||
// NCR classification always runs first — prohibited codepoints must be
|
||||
// caught regardless of numericAllowed.
|
||||
const ncrResult = this._resolveNCR(token);
|
||||
if (ncrResult === undefined) {
|
||||
// 'leave' action — keep original &token; as-is
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
replacement = ncrResult; // '' for remove, char string for allow
|
||||
tier = LIMIT_TIER_BASE;
|
||||
} else {
|
||||
// ---- Named reference ----
|
||||
const resolved = this._resolveName(token);
|
||||
replacement = resolved?.value;
|
||||
tier = resolved?.tier;
|
||||
}
|
||||
|
||||
if (replacement === undefined) {
|
||||
// Unknown entity — leave as-is, advance past '&' only
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Flush literal chunk before this entity
|
||||
if (i > last) chunks.push(str.slice(last, i));
|
||||
chunks.push(replacement);
|
||||
last = j + 1; // skip past ';'
|
||||
i = last;
|
||||
|
||||
// Apply expansion limits only if this tier is being tracked
|
||||
if (checkLimits && this._tierCounts(tier)) {
|
||||
if (limitExpansions) {
|
||||
this._totalExpansions++;
|
||||
if (this._totalExpansions > this._maxTotalExpansions) {
|
||||
throw new Error(
|
||||
`[EntityReplacer] Entity expansion count limit exceeded: ` +
|
||||
`${this._totalExpansions} > ${this._maxTotalExpansions}`
|
||||
);
|
||||
}
|
||||
}
|
||||
if (limitLength) {
|
||||
// delta: replacement.length minus the raw &token; length (token.length + 2 for '&' and ';')
|
||||
const delta = replacement.length - (token.length + 2);
|
||||
if (delta > 0) {
|
||||
this._expandedLength += delta;
|
||||
if (this._expandedLength > this._maxExpandedLength) {
|
||||
throw new Error(
|
||||
`[EntityReplacer] Expanded content length limit exceeded: ` +
|
||||
`${this._expandedLength} > ${this._maxExpandedLength}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flush trailing literal
|
||||
if (last < len) chunks.push(str.slice(last));
|
||||
|
||||
// If nothing was replaced, chunks is empty — return original
|
||||
const result = chunks.length === 0 ? str : chunks.join('');
|
||||
|
||||
return this._postCheck(result, original);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Private: limit tier check
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns true if a resolved entity of the given tier should count
|
||||
* against the expansion/length limits.
|
||||
* @param {string} tier — LIMIT_TIER_EXTERNAL | LIMIT_TIER_BASE
|
||||
* @returns {boolean}
|
||||
*/
|
||||
_tierCounts(tier) {
|
||||
if (this._limitTiers.has(LIMIT_TIER_ALL)) return true;
|
||||
return this._limitTiers.has(tier);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Private: entity resolution
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve a named entity token (without & and ;).
|
||||
* Priority: inputMap > externalMap > baseMap
|
||||
* Returns the resolved value tagged with its limit tier.
|
||||
*
|
||||
* @param {string} name
|
||||
* @returns {{ value: string, tier: string }|undefined}
|
||||
*/
|
||||
_resolveName(name) {
|
||||
// input and external both count as 'external' tier for limit purposes —
|
||||
// they are injected at runtime and are the untrusted surface.
|
||||
if (name in this._inputMap) return { value: this._inputMap[name], tier: LIMIT_TIER_EXTERNAL };
|
||||
if (name in this._externalMap) return { value: this._externalMap[name], tier: LIMIT_TIER_EXTERNAL };
|
||||
if (name in this._baseMap) return { value: this._baseMap[name], tier: LIMIT_TIER_BASE };
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify a codepoint and return the minimum action level that must be applied.
|
||||
* Returns -1 when no minimum is imposed (normal allow path).
|
||||
*
|
||||
* Ranges checked (in priority order):
|
||||
* 1. U+0000 — null, governed by nullNCR (always ≥ remove)
|
||||
* 2. U+D800–U+DFFF — surrogates, always prohibited (min: remove)
|
||||
* 3. U+0001–U+001F \ {0x09,0x0A,0x0D} — XML 1.0 restricted C0 (min: remove)
|
||||
* (skipped in XML 1.1 — C0 controls are allowed when written as NCRs)
|
||||
*
|
||||
* @param {number} cp — codepoint
|
||||
* @returns {number} — minimum NCR_LEVEL value, or -1 for no restriction
|
||||
*/
|
||||
_classifyNCR(cp) {
|
||||
// 1. Null
|
||||
if (cp === 0) return this._ncrNullLevel;
|
||||
|
||||
// 2. Surrogates — always prohibited, minimum 'remove'
|
||||
if (cp >= 0xD800 && cp <= 0xDFFF) return NCR_LEVEL.remove;
|
||||
|
||||
// 3. XML 1.0 restricted C0 controls
|
||||
if (this._ncrXmlVersion === 1.0) {
|
||||
if (cp >= 0x01 && cp <= 0x1F && !XML10_ALLOWED_C0.has(cp)) return NCR_LEVEL.remove;
|
||||
}
|
||||
|
||||
return -1; // no restriction
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a resolved NCR action.
|
||||
*
|
||||
* @param {number} action — NCR_LEVEL value
|
||||
* @param {string} token — raw token (e.g. '#38') for error messages
|
||||
* @param {number} cp — codepoint, used only for error messages
|
||||
* @returns {string|undefined}
|
||||
* - decoded character string → 'allow'
|
||||
* - '' → 'remove'
|
||||
* - undefined → 'leave' (caller must skip past '&' only)
|
||||
* - throws Error → 'throw'
|
||||
*/
|
||||
_applyNCRAction(action, token, cp) {
|
||||
switch (action) {
|
||||
case NCR_LEVEL.allow: return String.fromCodePoint(cp);
|
||||
case NCR_LEVEL.remove: return '';
|
||||
case NCR_LEVEL.leave: return undefined; // signal: keep literal
|
||||
case NCR_LEVEL.throw:
|
||||
throw new Error(
|
||||
`[EntityDecoder] Prohibited numeric character reference ` +
|
||||
`&${token}; (U+${cp.toString(16).toUpperCase().padStart(4, '0')})`
|
||||
);
|
||||
default: return String.fromCodePoint(cp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Full NCR resolution pipeline for a numeric token.
|
||||
*
|
||||
* Steps:
|
||||
* 1. Parse the codepoint (decimal or hex).
|
||||
* 2. Validate the raw codepoint range (NaN, <0, >0x10FFFF).
|
||||
* 3. If numericAllowed is false and no minimum restriction applies → leave as-is.
|
||||
* 4. Classify the codepoint to find the minimum required action level.
|
||||
* 5. Resolve effective action = max(onNCR, minimum).
|
||||
* 6. Apply and return.
|
||||
*
|
||||
* @param {string} token — e.g. '#38', '#x26', '#X26'
|
||||
* @returns {string|undefined}
|
||||
* - string (incl. '') — replacement ('' = remove)
|
||||
* - undefined — leave original &token; as-is
|
||||
*/
|
||||
_resolveNCR(token) {
|
||||
// Step 1: parse codepoint
|
||||
const second = token.charCodeAt(1);
|
||||
let cp;
|
||||
if (second === 120 /* x */ || second === 88 /* X */) {
|
||||
cp = parseInt(token.slice(2), 16);
|
||||
} else {
|
||||
cp = parseInt(token.slice(1), 10);
|
||||
}
|
||||
|
||||
// Step 2: out-of-range → leave as-is unconditionally
|
||||
if (Number.isNaN(cp) || cp < 0 || cp > 0x10FFFF) return undefined;
|
||||
|
||||
// Step 3: classify to get minimum action level
|
||||
const minimum = this._classifyNCR(cp);
|
||||
|
||||
// Step 4: if numericAllowed is false and no hard minimum → leave
|
||||
if (!this._numericAllowed && minimum < NCR_LEVEL.remove) return undefined;
|
||||
|
||||
// Step 5: effective action = max(configured onNCR, range minimum)
|
||||
const effective = minimum === -1
|
||||
? this._ncrOnLevel
|
||||
: Math.max(this._ncrOnLevel, minimum);
|
||||
|
||||
// Step 6: apply
|
||||
return this._applyNCRAction(effective, token, cp);
|
||||
}
|
||||
}
|
||||
194
node_modules/@nodable/entities/src/EntityEncoder.js
generated
vendored
Normal file
194
node_modules/@nodable/entities/src/EntityEncoder.js
generated
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
// EntityDecoder.js
|
||||
import { trie1, trie2, trie3 } from './entityTries.js';
|
||||
|
||||
// Replacement strings indexed by char code — direct array access, no hashing
|
||||
const XML_UNSAFE_REPLACEMENT = new Array(128);
|
||||
XML_UNSAFE_REPLACEMENT[38] = '&'; // &
|
||||
XML_UNSAFE_REPLACEMENT[60] = '<'; // <
|
||||
XML_UNSAFE_REPLACEMENT[62] = '>'; // >
|
||||
XML_UNSAFE_REPLACEMENT[34] = '"'; // "
|
||||
XML_UNSAFE_REPLACEMENT[39] = '''; // '
|
||||
|
||||
// Typed bitmask for O(1) "is this ASCII code XML-unsafe?" check
|
||||
const IS_XML_UNSAFE = new Uint8Array(128);
|
||||
IS_XML_UNSAFE[38] = 1;
|
||||
IS_XML_UNSAFE[60] = 1;
|
||||
IS_XML_UNSAFE[62] = 1;
|
||||
IS_XML_UNSAFE[34] = 1;
|
||||
IS_XML_UNSAFE[39] = 1;
|
||||
|
||||
// Fast pre-scan: bail out immediately if nothing needs encoding
|
||||
const NEEDS_PROCESSING = /[&<>"'\u0080-\uFFFF]/;
|
||||
|
||||
export default class EntityEncoder {
|
||||
constructor(options = {}) {
|
||||
this.encodeXmlSafe = options.encodeXmlSafe !== false;
|
||||
this.encodeAllNamed = options.encodeAllNamed !== false;
|
||||
this.maxReplacements = options.maxReplacements || 0;
|
||||
this.replacementsCount = 0;
|
||||
}
|
||||
|
||||
encode(str) {
|
||||
if (typeof str !== 'string' || str.length === 0) return str;
|
||||
if (!NEEDS_PROCESSING.test(str)) return str;
|
||||
|
||||
const maxRep = this.maxReplacements;
|
||||
if (maxRep > 0 && this.replacementsCount >= maxRep) return str;
|
||||
|
||||
// Hoist to locals — avoids `this` property lookup inside the hot loop
|
||||
const encodeXmlSafe = this.encodeXmlSafe;
|
||||
const encodeAllNamed = this.encodeAllNamed;
|
||||
|
||||
const len = str.length;
|
||||
|
||||
let result = '';
|
||||
let last = 0;
|
||||
let i = 0;
|
||||
let limitReached = false;
|
||||
|
||||
// ── Main loop: runs to len-2 so trie3 never needs a bounds check ────────
|
||||
// The last 2 characters are handled by the tail block below.
|
||||
const mainEnd = len - 2; // i <= mainEnd guarantees i+1 and i+2 are valid
|
||||
|
||||
while (i <= mainEnd && !limitReached) {
|
||||
const c0 = str.charCodeAt(i);
|
||||
|
||||
// ── ASCII branch ───────────────────────────────────────────────────
|
||||
if (c0 < 128) {
|
||||
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
|
||||
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
|
||||
last = ++i;
|
||||
if (maxRep > 0) {
|
||||
this.replacementsCount++;
|
||||
if (this.replacementsCount >= maxRep) {
|
||||
limitReached = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Bulk-skip: advance to the next interesting position without
|
||||
// touching the outer loop overhead on every safe character
|
||||
i++;
|
||||
while (i <= mainEnd && !limitReached) {
|
||||
const c = str.charCodeAt(i);
|
||||
if (c >= 128 || (encodeXmlSafe && IS_XML_UNSAFE[c] === 1)) break;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Non-ASCII: integer-keyed trie lookup ───────────────────────────
|
||||
// No bounds checks needed for c1/c2 because i <= mainEnd guarantees
|
||||
// i+1 and i+2 are both within the string.
|
||||
let matchedEntity = null;
|
||||
let advance = 1;
|
||||
|
||||
// Try 3-char match first (longest wins)
|
||||
const mid3 = trie3.get(c0);
|
||||
if (mid3 !== undefined) {
|
||||
const c1 = str.charCodeAt(i + 1);
|
||||
const inner3 = mid3.get(c1);
|
||||
if (inner3 !== undefined) {
|
||||
const c2 = str.charCodeAt(i + 2);
|
||||
const candidate = inner3.get(c2);
|
||||
if (candidate !== undefined) { matchedEntity = candidate; advance = 3; }
|
||||
}
|
||||
}
|
||||
|
||||
// Try 2-char match
|
||||
if (matchedEntity === null) {
|
||||
const inner2 = trie2.get(c0);
|
||||
if (inner2 !== undefined) {
|
||||
const c1 = str.charCodeAt(i + 1);
|
||||
const candidate = inner2.get(c1);
|
||||
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
|
||||
}
|
||||
}
|
||||
|
||||
// Try 1-char match
|
||||
if (matchedEntity === null && encodeAllNamed) {
|
||||
const candidate = trie1.get(c0);
|
||||
if (candidate !== undefined) { matchedEntity = candidate; }
|
||||
}
|
||||
|
||||
if (matchedEntity !== null) {
|
||||
result += str.substring(last, i) + matchedEntity;
|
||||
i += advance;
|
||||
last = i;
|
||||
if (maxRep > 0) {
|
||||
this.replacementsCount++;
|
||||
if (this.replacementsCount >= maxRep) {
|
||||
limitReached = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Tail: handle the last 1-2 characters (no 3-char match possible) ────
|
||||
while (i < len && !limitReached) {
|
||||
const c0 = str.charCodeAt(i);
|
||||
|
||||
if (c0 < 128) {
|
||||
if (encodeXmlSafe && IS_XML_UNSAFE[c0] === 1) {
|
||||
result += str.substring(last, i) + XML_UNSAFE_REPLACEMENT[c0];
|
||||
last = ++i;
|
||||
if (maxRep > 0) {
|
||||
this.replacementsCount++;
|
||||
if (this.replacementsCount >= maxRep) {
|
||||
limitReached = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Non-ASCII tail — only 2-char and 1-char matches are possible here
|
||||
let matchedEntity = null;
|
||||
let advance = 1;
|
||||
|
||||
if (i + 1 < len) {
|
||||
const inner2 = trie2.get(c0);
|
||||
if (inner2 !== undefined) {
|
||||
const c1 = str.charCodeAt(i + 1);
|
||||
const candidate = inner2.get(c1);
|
||||
if (candidate !== undefined) { matchedEntity = candidate; advance = 2; }
|
||||
}
|
||||
}
|
||||
|
||||
if (matchedEntity === null && encodeAllNamed) {
|
||||
const candidate = trie1.get(c0);
|
||||
if (candidate !== undefined) { matchedEntity = candidate; }
|
||||
}
|
||||
|
||||
if (matchedEntity !== null) {
|
||||
result += str.substring(last, i) + matchedEntity;
|
||||
i += advance;
|
||||
last = i;
|
||||
if (maxRep > 0) {
|
||||
this.replacementsCount++;
|
||||
if (this.replacementsCount >= maxRep) {
|
||||
limitReached = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Flush any remaining literal suffix ────────────────────────────────
|
||||
if (last < len) result += str.substring(last);
|
||||
return result;
|
||||
}
|
||||
|
||||
reset() {
|
||||
this.replacementsCount = 0;
|
||||
}
|
||||
}
|
||||
1177
node_modules/@nodable/entities/src/entities.js
generated
vendored
Normal file
1177
node_modules/@nodable/entities/src/entities.js
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
49
node_modules/@nodable/entities/src/entityTries.js
generated
vendored
Normal file
49
node_modules/@nodable/entities/src/entityTries.js
generated
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
// entityTries.js
|
||||
// Builds integer-keyed tries so the decoder never allocates a string object
|
||||
// during lookup — every key is a plain charCode number.
|
||||
//
|
||||
// trie1: Map<code0, entity>
|
||||
// trie2: Map<code0, Map<code1, entity>>
|
||||
// trie3: Map<code0, Map<code1, Map<code2, entity>>>
|
||||
|
||||
import { ALL_ENTITIES } from './entities.js';
|
||||
|
||||
// Reverse map: character sequence → "&name;"
|
||||
const CHAR_TO_ENTITY = new Map();
|
||||
for (const [name, chars] of Object.entries(ALL_ENTITIES)) {
|
||||
CHAR_TO_ENTITY.set(chars, `&${name};`);
|
||||
}
|
||||
|
||||
export const trie1 = new Map(); // code0 → entity string
|
||||
export const trie2 = new Map(); // code0 → Map → entity string
|
||||
export const trie3 = new Map(); // code0 → Map → Map → entity string
|
||||
|
||||
for (const [chars, entity] of CHAR_TO_ENTITY) {
|
||||
const len = chars.length;
|
||||
|
||||
if (len === 1) {
|
||||
const c0 = chars.charCodeAt(0);
|
||||
// Keep shortest match only if no longer match already claimed this code
|
||||
// (longer matches are inserted in the same pass so we just overwrite —
|
||||
// trie1 is only consulted after trie2/trie3 both miss, so no conflict)
|
||||
trie1.set(c0, entity);
|
||||
|
||||
} else if (len === 2) {
|
||||
const c0 = chars.charCodeAt(0);
|
||||
const c1 = chars.charCodeAt(1);
|
||||
let inner = trie2.get(c0);
|
||||
if (inner === undefined) { inner = new Map(); trie2.set(c0, inner); }
|
||||
inner.set(c1, entity);
|
||||
|
||||
} else if (len === 3) {
|
||||
const c0 = chars.charCodeAt(0);
|
||||
const c1 = chars.charCodeAt(1);
|
||||
const c2 = chars.charCodeAt(2);
|
||||
let mid = trie3.get(c0);
|
||||
if (mid === undefined) { mid = new Map(); trie3.set(c0, mid); }
|
||||
let inner = mid.get(c1);
|
||||
if (inner === undefined) { inner = new Map(); mid.set(c1, inner); }
|
||||
inner.set(c2, entity);
|
||||
}
|
||||
// HTML5 has no named entity whose character sequence is longer than 3 chars
|
||||
}
|
||||
264
node_modules/@nodable/entities/src/index.d.ts
generated
vendored
Normal file
264
node_modules/@nodable/entities/src/index.d.ts
generated
vendored
Normal file
@@ -0,0 +1,264 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
// @nodable/entities — TypeScript declarations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A function-based entity replacement value (used for numeric refs). */
|
||||
export type EntityValFn = (match: string, captured: string, ...rest: unknown[]) => string;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Encoder options
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface EntityEncoderOptions {
|
||||
/**
|
||||
* Whether to encode XML unsafe characters: `&`, `<`, `>`, `"`, `'`.
|
||||
* @default true
|
||||
*/
|
||||
encodeXmlSafe?: boolean;
|
||||
|
||||
/**
|
||||
* Whether to encode non‑ASCII characters (e.g. `é` → `é`) using the
|
||||
* built‑in named entity trie.
|
||||
* @default true
|
||||
*/
|
||||
encodeAllNamed?: boolean;
|
||||
|
||||
/**
|
||||
* Maximum number of replacements performed **cumulatively** across all
|
||||
* `encode()` calls. `0` means unlimited.
|
||||
*
|
||||
* Use `reset()` to reset the internal counter.
|
||||
* @default 0
|
||||
*/
|
||||
maxReplacements?: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EntityEncoder class
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* High‑performance encoder that replaces characters with XML/HTML entities.
|
||||
*
|
||||
* - Escapes XML unsafe characters (`&`, `<`, `>`, `"`, `'`) when `encodeXmlSafe` is true.
|
||||
* - Replaces non‑ASCII characters (e.g. `é`, `©`) with named entities using
|
||||
* a compact trie‑based lookup when `encodeAllNamed` is true.
|
||||
* - Supports a cumulative replacement limit (`maxReplacements`) that persists
|
||||
* across multiple `encode()` calls until `reset()` is called.
|
||||
*
|
||||
* @example
|
||||
* const encoder = new EntityEncoder({ encodeXmlSafe: true, encodeAllNamed: true });
|
||||
* encoder.encode('<foo>'); // "<foo>"
|
||||
* encoder.encode('© 2025'); // "© 2025"
|
||||
*
|
||||
* // With limit
|
||||
* const limited = new EntityEncoder({ maxReplacements: 2 });
|
||||
* limited.encode('<>&'); // "<>&" (third replacement omitted)
|
||||
* limited.reset(); // reset counter
|
||||
*/
|
||||
export class EntityEncoder {
|
||||
constructor(options?: EntityEncoderOptions);
|
||||
|
||||
/**
|
||||
* Encode a string by replacing XML‑unsafe characters and (optionally)
|
||||
* non‑ASCII characters with named entities.
|
||||
*
|
||||
* If `maxReplacements` is set and the cumulative limit has been reached,
|
||||
* the input string is returned unchanged.
|
||||
*
|
||||
* @returns Encoded string (may be identical to input if no replacements needed
|
||||
* or the limit has been exhausted).
|
||||
*/
|
||||
encode(str: string): string;
|
||||
|
||||
/**
|
||||
* Reset the internal replacement counter.
|
||||
* Does **not** change `encodeXmlSafe`, `encodeAllNamed`, or `maxReplacements`.
|
||||
*/
|
||||
reset(): void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constructor options for EntityDecoder (existing)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Controls which entity categories count toward the expansion limits.
|
||||
*
|
||||
* - `'external'` — only untrusted / injected entities (default)
|
||||
* - `'base'` — only built‑in XML entities + user‑supplied `namedEntities`
|
||||
* - `'all'` — all entities regardless of tier
|
||||
* - `string[]` — explicit combination, e.g. `['external', 'base']`
|
||||
*/
|
||||
export type ApplyLimitsTo = 'external' | 'base' | 'all' | Array<'external' | 'base'>;
|
||||
|
||||
export interface EntityDecoderLimitOptions {
|
||||
/**
|
||||
* Maximum number of entity references expanded **per document**.
|
||||
* `0` means unlimited.
|
||||
* @default 0
|
||||
*/
|
||||
maxTotalExpansions?: number;
|
||||
|
||||
/**
|
||||
* Maximum number of characters **added** by entity expansion per document.
|
||||
* `0` means unlimited.
|
||||
* @default 0
|
||||
*/
|
||||
maxExpandedLength?: number;
|
||||
|
||||
/**
|
||||
* Which entity tiers count toward the expansion limits.
|
||||
*
|
||||
* - `'external'` (default) – only input/runtime + persistent external entities
|
||||
* - `'base'` – only built‑in XML + `namedEntities`
|
||||
* - `'all'` – every entity regardless of tier
|
||||
* - `string[]` – explicit combination, e.g. `['external', 'base']`
|
||||
*
|
||||
* @default 'external'
|
||||
*/
|
||||
applyLimitsTo?: ApplyLimitsTo;
|
||||
}
|
||||
|
||||
export interface EntityDecoderNCROptions {
|
||||
/**
|
||||
* XML version used for NCR classification.
|
||||
* @default 1.0
|
||||
*/
|
||||
xmlVersion?: 1.0 | 1.1;
|
||||
|
||||
/**
|
||||
* Base action for all numeric references.
|
||||
* @default 'allow'
|
||||
*/
|
||||
onNCR?: 'allow' | 'leave' | 'remove' | 'throw';
|
||||
|
||||
/**
|
||||
* Action for null NCR (U+0000).
|
||||
* @default 'remove'
|
||||
*/
|
||||
nullNCR?: 'remove' | 'throw';
|
||||
}
|
||||
|
||||
export interface EntityDecoderOptions {
|
||||
/**
|
||||
* Extra named entities merged into the **base map** (trusted, counts as `'base'` tier).
|
||||
* These are combined with the built‑in XML entities (`lt`, `gt`, `quot`, `apos`).
|
||||
* Values containing `&` are silently skipped to prevent recursive expansion.
|
||||
*
|
||||
* @default null
|
||||
*/
|
||||
namedEntities?: Record<string, string | { regex: RegExp; val: string | EntityValFn }> | null;
|
||||
|
||||
|
||||
/**
|
||||
* Hook called once on the fully decoded string (after all replacements).
|
||||
*
|
||||
* - Receives `(resolved, original)` and **must return a string**.
|
||||
* - To reject expansion, return `original`.
|
||||
* - To sanitize, return a cleaned version of `resolved`.
|
||||
*
|
||||
* @example
|
||||
* postCheck: (resolved, original) =>
|
||||
* /<[a-z]/i.test(resolved) ? original : resolved
|
||||
*/
|
||||
postCheck?: ((resolved: string, original: string) => string) | null;
|
||||
|
||||
/**
|
||||
* Whether numeric character references (`&#NNN;`, `&#xHH;`) are allowed.
|
||||
* @default true
|
||||
*/
|
||||
numericAllowed?: boolean;
|
||||
|
||||
/**
|
||||
* Array of entity names or numeric references to leave unexpanded.
|
||||
* @default []
|
||||
*/
|
||||
leave?: string[];
|
||||
|
||||
/**
|
||||
* Array of entity names or numeric references to remove.
|
||||
* @default []
|
||||
*/
|
||||
remove?: string[];
|
||||
|
||||
/**
|
||||
* Security limits for entity expansion.
|
||||
*/
|
||||
limit?: EntityDecoderLimitOptions;
|
||||
|
||||
/**
|
||||
* Numeric Character Reference (NCR) policy.
|
||||
*/
|
||||
ncr?: EntityDecoderNCROptions;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EntityDecoder class (default export)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Single‑pass, zero‑regex entity decoder for XML/HTML content.
|
||||
*
|
||||
* ## Entity lookup priority (highest → lowest)
|
||||
* 1. **input / runtime** – injected via `addInputEntities()` (DOCTYPE per document)
|
||||
* 2. **persistent external** – set via `setExternalEntities()` / `addExternalEntity()`
|
||||
* 3. **base map** – built‑in XML entities + user‑supplied `namedEntities`
|
||||
*
|
||||
* Numeric references (`&#NNN;`, `&#xHH;`) are resolved directly and count as the `'base'` tier.
|
||||
*
|
||||
* @example
|
||||
* const decoder = new EntityDecoder({
|
||||
* namedEntities: COMMON_HTML,
|
||||
* maxTotalExpansions: 100
|
||||
* });
|
||||
* decoder.setExternalEntities({ brand: 'Acme' });
|
||||
*
|
||||
* decoder.addInputEntities({ version: '1.0' });
|
||||
* decoder.decode('&brand; v&version; <'); // 'Acme v1.0 <'
|
||||
*
|
||||
* decoder.reset(); // clears input entities + counters, keeps external entities
|
||||
*/
|
||||
export default class EntityDecoder {
|
||||
constructor(options?: EntityDecoderOptions);
|
||||
|
||||
setExternalEntities(
|
||||
map: Record<string, string | { regex: RegExp; val: string | EntityValFn }>
|
||||
): void;
|
||||
|
||||
addExternalEntity(key: string, value: string): void;
|
||||
|
||||
addInputEntities(
|
||||
map: Record<
|
||||
string,
|
||||
| string
|
||||
| { regx: RegExp; val: string | EntityValFn }
|
||||
| { regex: RegExp; val: string | EntityValFn }
|
||||
>
|
||||
): void;
|
||||
|
||||
reset(): this;
|
||||
|
||||
decode(str: string): string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Named entity group exports (for use with `namedEntities` option)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const COMMON_HTML: Record<string, string>;
|
||||
export const ALL_ENTITIES: Record<string, string>;
|
||||
export const XML: Record<string, string>;
|
||||
export const BASIC_LATIN: Record<string, string>;
|
||||
export const LATIN_ACCENTS: Record<string, string>;
|
||||
export const LATIN_EXTENDED: Record<string, string>;
|
||||
export const GREEK: Record<string, string>;
|
||||
export const CYRILLIC: Record<string, string>;
|
||||
export const MATH: Record<string, string>;
|
||||
export const MATH_ADVANCED: Record<string, string>;
|
||||
export const ARROWS: Record<string, string>;
|
||||
export const SHAPES: Record<string, string>;
|
||||
export const PUNCTUATION: Record<string, string>;
|
||||
export const CURRENCY: Record<string, string>;
|
||||
export const FRACTIONS: Record<string, string>;
|
||||
export const MISC_SYMBOLS: Record<string, string>;
|
||||
29
node_modules/@nodable/entities/src/index.js
generated
vendored
Normal file
29
node_modules/@nodable/entities/src/index.js
generated
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* @nodable/entities
|
||||
*
|
||||
* Standalone, zero-dependency XML/HTML entity replacement.
|
||||
*
|
||||
|
||||
*/
|
||||
|
||||
export { default as EntityDecoder } from './EntityDecoder.js';
|
||||
export {
|
||||
COMMON_HTML,
|
||||
XML,
|
||||
ALL_ENTITIES,
|
||||
ARROWS,
|
||||
BASIC_LATIN,
|
||||
CURRENCY,
|
||||
MATH,
|
||||
MATH_ADVANCED,
|
||||
CYRILLIC,
|
||||
FRACTIONS,
|
||||
GREEK,
|
||||
LATIN_ACCENTS,
|
||||
LATIN_EXTENDED,
|
||||
MISC_SYMBOLS,
|
||||
PUNCTUATION,
|
||||
SHAPES,
|
||||
} from './entities.js';
|
||||
|
||||
export { default as EntityEncoder } from './EntityEncoder.js';
|
||||
Reference in New Issue
Block a user