#!/usr/bin/env node // convert-hljs-to-json.mjs // // Usage: // node convert-hljs-to-json.mjs input.js output.json [--id mirc] [--name "mIRC"] [--aliases mrc,mircs] import fs from 'fs'; import vm from 'vm'; import path from 'path'; const argv = process.argv.slice(2); if (argv.length < 2) { console.error('Usage: node convert-hljs-to-json.mjs input.js output.json [--id id] [--name "Name"] [--aliases a,b,c]'); process.exit(1); } const inFile = argv[0]; const outFile = argv[1]; function readOpt(flag) { const i = argv.indexOf(flag); return (i !== -1 && argv[i+1]) ? argv[i+1] : null; } const optId = readOpt('--id'); const optName = readOpt('--name'); const optAliasesS = readOpt('--aliases'); const optAliases = optAliasesS ? optAliasesS.split(',').map(s => s.trim()).filter(Boolean) : null; // --- Minimal hljs stub with common modes & helpers function makeHLJS() { const BACKSLASH_ESCAPE = { begin: /\\[\s\S]/ }; const APOS_STRING_MODE = { className: 'string', begin: /'/, end: /'/, illegal: /\n/, contains: [BACKSLASH_ESCAPE] }; const QUOTE_STRING_MODE = { className: 'string', begin: /"/, end: /"/, illegal: /\n/, contains: [BACKSLASH_ESCAPE] }; const NUMBER_MODE = { className: 'number', begin: /\b\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/, relevance: 0 }; function inherit(obj, props={}) { return Object.assign({}, obj, props); } let captured = null; function registerLanguage(name, defOrFn) { let def = defOrFn; if (typeof defOrFn === 'function') def = defOrFn(hljs); captured = { name, def }; } const hljs = { BACKSLASH_ESCAPE, APOS_STRING_MODE, QUOTE_STRING_MODE, NUMBER_MODE, inherit, registerLanguage }; Object.defineProperty(hljs, '__captured', { get() { return captured; } }); return hljs; } // --- Load & execute the language module in a sandbox async function loadLanguageDef(modulePath) { let src = await fs.promises.readFile(modulePath, 'utf8'); // Simple ESM default export shims if (/\bexport\s+default\s+function\b/.test(src)) { src = src.replace(/\bexport\s+default\s+function\b/, 'module.exports = function'); } else if (/\bexport\s+default\b/.test(src)) { src = src.replace(/\bexport\s+default\b/, 'module.exports ='); } const hljs = makeHLJS(); const sandbox = { module: { exports: {} }, exports: {}, hljs, require: () => { throw new Error('require() disabled in sandbox'); }, __dirname: path.dirname(modulePath), __filename: path.resolve(modulePath), console }; vm.createContext(sandbox, { name: 'hljs-lang-sandbox' }); try { vm.runInContext(src, sandbox, { filename: modulePath, displayErrors: true, timeout: 5000 }); } catch (e) { throw new Error(`Execution error in ${modulePath}: ${e.message}`); } // Get a definition let def = null; let nameHint = null; if (typeof sandbox.module.exports === 'function') { const maybe = sandbox.module.exports(sandbox.hljs); if (maybe && typeof maybe === 'object') def = maybe; } else if (sandbox.module.exports && typeof sandbox.module.exports === 'object') { def = sandbox.module.exports; } if (!def && sandbox.hljs.__captured) { def = sandbox.hljs.__captured.def; nameHint = sandbox.hljs.__captured.name || null; } if (!def || typeof def !== 'object') { throw new Error('Could not obtain a language definition object from the module.'); } return { def, nameHint }; } // --- Helpers: robust RegExp detection across VM realms const toString = Object.prototype.toString; function isRegExp(x) { return toString.call(x) === '[object RegExp]'; } // Deep convert RegExp to strings function toPlain(obj) { if (obj == null) return obj; if (isRegExp(obj)) return obj.source; if (Array.isArray(obj)) return obj.map(toPlain); if (typeof obj !== 'object') return obj; const out = {}; for (const [k, v] of Object.entries(obj)) { out[k] = toPlain(v); } return out; } // Normalize classes & structure function normalizeClassNames(node) { if (!node || typeof node !== 'object') return; if (Array.isArray(node)) { node.forEach(normalizeClassNames); return; } const map = { code: 'meta' }; if (node.className && map[node.className]) node.className = map[node.className]; if (!node.className && node.scope) { node.className = node.scope; delete node.scope; } for (const key of ['contains','variants','starts']) { if (node[key]) normalizeClassNames(node[key]); } } function normalizeKeywords(kw) { if (!kw) return kw; if (typeof kw === 'string') return kw; if (Array.isArray(kw)) return kw.join(' '); if (typeof kw === 'object') { return Object.values(kw) .map(v => Array.isArray(v) ? v.join(' ') : String(v)) .join(' ') .trim(); } return String(kw); } function finalize(def, idOpt, nameOpt, nameHint, aliasesOpt) { const plain = toPlain(def); normalizeClassNames(plain); if (plain.keywords) plain.keywords = normalizeKeywords(plain.keywords); const out = {}; if (nameOpt) out.name = nameOpt; else if (plain.name) out.name = plain.name; else if (nameHint) out.name = nameHint; if (idOpt) out.id = idOpt; if (aliasesOpt) out.aliases = aliasesOpt; // Copy common top-level fields const copyKeys = new Set([ 'aliases','case_insensitive','keywords','contains','illegal','variants', 'begin','end','className','relevance','lexemes','match','starts' ]); for (const [k, v] of Object.entries(plain)) { if (k in out) continue; if (copyKeys.has(k)) out[k] = v; } if (out.aliases && !Array.isArray(out.aliases)) { out.aliases = String(out.aliases).split(',').map(s => s.trim()).filter(Boolean); } return out; } (async () => { try { const { def, nameHint } = await loadLanguageDef(inFile); const result = finalize(def, optId, optName, nameHint, optAliases); await fs.promises.writeFile(outFile, JSON.stringify(result, null, 2), 'utf8'); console.log(`Wrote ${path.basename(outFile)}${result.id ? ` (id: ${result.id}` : ''}${result.name ? `${result.id ? ', ' : ' ('}name: ${result.name}` : ''}${(result.id || result.name) ? ')' : ''}`); } catch (e) { console.error(e.message); process.exit(2); } })();