- 1#!/usr/bin/env node
- 2// convert-hljs-to-json.mjs
- 3//
- 4// Usage:
- 5// node convert-hljs-to-json.mjs input.js output.json [--id mirc] [--name "mIRC"] [--aliases mrc,mircs]
- 6
- 7import fs from 'fs';
- 8import vm from 'vm';
- 9import path from 'path';
- 10
- 11const argv = process.argv.slice(2);
- 12if (argv.length < 2) {
- 13 console.error('Usage: node convert-hljs-to-json.mjs input.js output.json [--id id] [--name "Name"] [--aliases a,b,c]');
- 14 process.exit(1);
- 15}
- 16const inFile = argv[0];
- 17const outFile = argv[1];
- 18
- 19function readOpt(flag) {
- 20 const i = argv.indexOf(flag);
- 21 return (i !== -1 && argv[i+1]) ? argv[i+1] : null;
- 22}
- 23const optId = readOpt('--id');
- 24const optName = readOpt('--name');
- 25const optAliasesS = readOpt('--aliases');
- 26const optAliases = optAliasesS ? optAliasesS.split(',').map(s => s.trim()).filter(Boolean) : null;
- 27
- 28// --- Minimal hljs stub with common modes & helpers
- 29function makeHLJS() {
- 30 const BACKSLASH_ESCAPE = { begin: /\\[\s\S]/ };
- 31
- 32 const APOS_STRING_MODE = {
- 33 className: 'string',
- 34 begin: /'/, end: /'/, illegal: /\n/,
- 35 contains: [BACKSLASH_ESCAPE]
- 36 };
- 37
- 38 const QUOTE_STRING_MODE = {
- 39 className: 'string',
- 40 begin: /"/, end: /"/, illegal: /\n/,
- 41 contains: [BACKSLASH_ESCAPE]
- 42 };
- 43
- 44 const NUMBER_MODE = {
- 45 className: 'number',
- 46 begin: /\b\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/,
- 47 relevance: 0
- 48 };
- 49
- 50 function inherit(obj, props={}) { return Object.assign({}, obj, props); }
- 51
- 52 let captured = null;
- 53 function registerLanguage(name, defOrFn) {
- 54 let def = defOrFn;
- 55 if (typeof defOrFn === 'function') def = defOrFn(hljs);
- 56 captured = { name, def };
- 57 }
- 58
- 59 const hljs = {
- 60 BACKSLASH_ESCAPE,
- 61 APOS_STRING_MODE,
- 62 QUOTE_STRING_MODE,
- 63 NUMBER_MODE,
- 64 inherit,
- 65 registerLanguage
- 66 };
- 67 Object.defineProperty(hljs, '__captured', { get() { return captured; } });
- 68 return hljs;
- 69}
- 70
- 71// --- Load & execute the language module in a sandbox
- 72async function loadLanguageDef(modulePath) {
- 73 let src = await fs.promises.readFile(modulePath, 'utf8');
- 74
- 75 // Simple ESM default export shims
- 76 if (/\bexport\s+default\s+function\b/.test(src)) {
- 77 src = src.replace(/\bexport\s+default\s+function\b/, 'module.exports = function');
- 78 } else if (/\bexport\s+default\b/.test(src)) {
- 79 src = src.replace(/\bexport\s+default\b/, 'module.exports =');
- 80 }
- 81
- 82 const hljs = makeHLJS();
- 83 const sandbox = {
- 84 module: { exports: {} },
- 85 exports: {},
- 86 hljs,
- 87 require: () => { throw new Error('require() disabled in sandbox'); },
- 88 __dirname: path.dirname(modulePath),
- 89 __filename: path.resolve(modulePath),
- 90 console
- 91 };
- 92
- 93 vm.createContext(sandbox, { name: 'hljs-lang-sandbox' });
- 94 try {
- 95 vm.runInContext(src, sandbox, {
- 96 filename: modulePath,
- 97 displayErrors: true,
- 98 timeout: 5000
- 99 });
- 100 } catch (e) {
- 101 throw new Error(`Execution error in ${modulePath}: ${e.message}`);
- 102 }
- 103
- 104 // Get a definition
- 105 let def = null;
- 106 let nameHint = null;
- 107
- 108 if (typeof sandbox.module.exports === 'function') {
- 109 const maybe = sandbox.module.exports(sandbox.hljs);
- 110 if (maybe && typeof maybe === 'object') def = maybe;
- 111 } else if (sandbox.module.exports && typeof sandbox.module.exports === 'object') {
- 112 def = sandbox.module.exports;
- 113 }
- 114 if (!def && sandbox.hljs.__captured) {
- 115 def = sandbox.hljs.__captured.def;
- 116 nameHint = sandbox.hljs.__captured.name || null;
- 117 }
- 118 if (!def || typeof def !== 'object') {
- 119 throw new Error('Could not obtain a language definition object from the module.');
- 120 }
- 121 return { def, nameHint };
- 122}
- 123
- 124// --- Helpers: robust RegExp detection across VM realms
- 125const toString = Object.prototype.toString;
- 126function isRegExp(x) { return toString.call(x) === '[object RegExp]'; }
- 127
- 128// Deep convert RegExp to strings
- 129function toPlain(obj) {
- 130 if (obj == null) return obj;
- 131 if (isRegExp(obj)) return obj.source;
- 132 if (Array.isArray(obj)) return obj.map(toPlain);
- 133 if (typeof obj !== 'object') return obj;
- 134
- 135 const out = {};
- 136 for (const [k, v] of Object.entries(obj)) {
- 137 out[k] = toPlain(v);
- 138 }
- 139 return out;
- 140}
- 141
- 142// Normalize classes & structure
- 143function normalizeClassNames(node) {
- 144 if (!node || typeof node !== 'object') return;
- 145 if (Array.isArray(node)) { node.forEach(normalizeClassNames); return; }
- 146
- 147 const map = { code: 'meta' };
- 148 if (node.className && map[node.className]) node.className = map[node.className];
- 149
- 150 if (!node.className && node.scope) { node.className = node.scope; delete node.scope; }
- 151
- 152 for (const key of ['contains','variants','starts']) {
- 153 if (node[key]) normalizeClassNames(node[key]);
- 154 }
- 155}
- 156
- 157function normalizeKeywords(kw) {
- 158 if (!kw) return kw;
- 159 if (typeof kw === 'string') return kw;
- 160 if (Array.isArray(kw)) return kw.join(' ');
- 161 if (typeof kw === 'object') {
- 162 return Object.values(kw)
- 163 .map(v => Array.isArray(v) ? v.join(' ') : String(v))
- 164 .join(' ')
- 165 .trim();
- 166 }
- 167 return String(kw);
- 168}
- 169
- 170function finalize(def, idOpt, nameOpt, nameHint, aliasesOpt) {
- 171 const plain = toPlain(def);
- 172 normalizeClassNames(plain);
- 173 if (plain.keywords) plain.keywords = normalizeKeywords(plain.keywords);
- 174
- 175 const out = {};
- 176 if (nameOpt) out.name = nameOpt;
- 177 else if (plain.name) out.name = plain.name;
- 178 else if (nameHint) out.name = nameHint;
- 179
- 180 if (idOpt) out.id = idOpt;
- 181 if (aliasesOpt) out.aliases = aliasesOpt;
- 182
- 183 // Copy common top-level fields
- 184 const copyKeys = new Set([
- 185 'aliases','case_insensitive','keywords','contains','illegal','variants',
- 186 'begin','end','className','relevance','lexemes','match','starts'
- 187 ]);
- 188 for (const [k, v] of Object.entries(plain)) {
- 189 if (k in out) continue;
- 190 if (copyKeys.has(k)) out[k] = v;
- 191 }
- 192
- 193 if (out.aliases && !Array.isArray(out.aliases)) {
- 194 out.aliases = String(out.aliases).split(',').map(s => s.trim()).filter(Boolean);
- 195 }
- 196 return out;
- 197}
- 198
- 199(async () => {
- 200 try {
- 201 const { def, nameHint } = await loadLanguageDef(inFile);
- 202 const result = finalize(def, optId, optName, nameHint, optAliases);
- 203 await fs.promises.writeFile(outFile, JSON.stringify(result, null, 2), 'utf8');
- 204 console.log(`Wrote ${path.basename(outFile)}${result.id ? ` (id: ${result.id}` : ''}${result.name ? `${result.id ? ', ' : ' ('}name: ${result.name}` : ''}${(result.id || result.name) ? ')' : ''}`);
- 205 } catch (e) {
- 206 console.error(e.message);
- 207 process.exit(2);
- 208 }
- 209})();
Raw Paste