import { tokenType } from './cst.js'; import { Lexer } from './lexer.js'; function includesToken(list, type) { for (let i = 0; i < list.length; ++i) if (list[i].type === type) return true; return false; } function findNonEmptyIndex(list) { for (let i = 0; i < list.length; ++i) { switch (list[i].type) { case 'space': case 'comment': case 'newline': break; default: return i; } } return -1; } function isFlowToken(token) { switch (token?.type) { case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': case 'flow-collection': return true; default: return false; } } function getPrevProps(parent) { switch (parent.type) { case 'document': return parent.start; case 'block-map': { const it = parent.items[parent.items.length - 1]; return it.sep ?? it.start; } case 'block-seq': return parent.items[parent.items.length - 1].start; /* istanbul ignore next should not happen */ default: return []; } } /** Note: May modify input array */ function getFirstKeyStartProps(prev) { if (prev.length === 0) return []; let i = prev.length; loop: while (--i >= 0) { switch (prev[i].type) { case 'doc-start': case 'explicit-key-ind': case 'map-value-ind': case 'seq-item-ind': case 'newline': break loop; } } while (prev[++i]?.type === 'space') { /* loop */ } return prev.splice(i, prev.length); } function fixFlowSeqItems(fc) { if (fc.start.type === 'flow-seq-start') { for (const it of fc.items) { if (it.sep && !it.value && !includesToken(it.start, 'explicit-key-ind') && !includesToken(it.sep, 'map-value-ind')) { if (it.key) it.value = it.key; delete it.key; if (isFlowToken(it.value)) { if (it.value.end) Array.prototype.push.apply(it.value.end, it.sep); else it.value.end = it.sep; } else Array.prototype.push.apply(it.start, it.sep); delete it.sep; } } } } /** * A YAML concrete syntax tree (CST) parser * * ```ts * const src: string = ... * for (const token of new Parser().parse(src)) { * // token: Token * } * ``` * * To use the parser with a user-provided lexer: * * ```ts * function* parse(source: string, lexer: Lexer) { * const parser = new Parser() * for (const lexeme of lexer.lex(source)) * yield* parser.next(lexeme) * yield* parser.end() * } * * const src: string = ... * const lexer = new Lexer() * for (const token of parse(src, lexer)) { * // token: Token * } * ``` */ class Parser { /** * @param onNewLine - If defined, called separately with the start position of * each new line (in `parse()`, including the start of input). */ constructor(onNewLine) { /** If true, space and sequence indicators count as indentation */ this.atNewLine = true; /** If true, next token is a scalar value */ this.atScalar = false; /** Current indentation level */ this.indent = 0; /** Current offset since the start of parsing */ this.offset = 0; /** On the same line with a block map key */ this.onKeyLine = false; /** Top indicates the node that's currently being built */ this.stack = []; /** The source of the current token, set in parse() */ this.source = ''; /** The type of the current token, set in parse() */ this.type = ''; // Must be defined after `next()` this.lexer = new Lexer(); this.onNewLine = onNewLine; } /** * Parse `source` as a YAML stream. * If `incomplete`, a part of the last line may be left as a buffer for the next call. * * Errors are not thrown, but yielded as `{ type: 'error', message }` tokens. * * @returns A generator of tokens representing each directive, document, and other structure. */ *parse(source, incomplete = false) { if (this.onNewLine && this.offset === 0) this.onNewLine(0); for (const lexeme of this.lexer.lex(source, incomplete)) yield* this.next(lexeme); if (!incomplete) yield* this.end(); } /** * Advance the parser by the `source` of one lexical token. */ *next(source) { this.source = source; if (this.atScalar) { this.atScalar = false; yield* this.step(); this.offset += source.length; return; } const type = tokenType(source); if (!type) { const message = `Not a YAML token: ${source}`; yield* this.pop({ type: 'error', offset: this.offset, message, source }); this.offset += source.length; } else if (type === 'scalar') { this.atNewLine = false; this.atScalar = true; this.type = 'scalar'; } else { this.type = type; yield* this.step(); switch (type) { case 'newline': this.atNewLine = true; this.indent = 0; if (this.onNewLine) this.onNewLine(this.offset + source.length); break; case 'space': if (this.atNewLine && source[0] === ' ') this.indent += source.length; break; case 'explicit-key-ind': case 'map-value-ind': case 'seq-item-ind': if (this.atNewLine) this.indent += source.length; break; case 'doc-mode': case 'flow-error-end': return; default: this.atNewLine = false; } this.offset += source.length; } } /** Call at end of input to push out any remaining constructions */ *end() { while (this.stack.length > 0) yield* this.pop(); } get sourceToken() { const st = { type: this.type, offset: this.offset, indent: this.indent, source: this.source }; return st; } *step() { const top = this.peek(1); if (this.type === 'doc-end' && (!top || top.type !== 'doc-end')) { while (this.stack.length > 0) yield* this.pop(); this.stack.push({ type: 'doc-end', offset: this.offset, source: this.source }); return; } if (!top) return yield* this.stream(); switch (top.type) { case 'document': return yield* this.document(top); case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': return yield* this.scalar(top); case 'block-scalar': return yield* this.blockScalar(top); case 'block-map': return yield* this.blockMap(top); case 'block-seq': return yield* this.blockSequence(top); case 'flow-collection': return yield* this.flowCollection(top); case 'doc-end': return yield* this.documentEnd(top); } /* istanbul ignore next should not happen */ yield* this.pop(); } peek(n) { return this.stack[this.stack.length - n]; } *pop(error) { const token = error ?? this.stack.pop(); /* istanbul ignore if should not happen */ if (!token) { const message = 'Tried to pop an empty stack'; yield { type: 'error', offset: this.offset, source: '', message }; } else if (this.stack.length === 0) { yield token; } else { const top = this.peek(1); if (token.type === 'block-scalar') { // Block scalars use their parent rather than header indent token.indent = 'indent' in top ? top.indent : 0; } else if (token.type === 'flow-collection' && top.type === 'document') { // Ignore all indent for top-level flow collections token.indent = 0; } if (token.type === 'flow-collection') fixFlowSeqItems(token); switch (top.type) { case 'document': top.value = token; break; case 'block-scalar': top.props.push(token); // error break; case 'block-map': { const it = top.items[top.items.length - 1]; if (it.value) { top.items.push({ start: [], key: token, sep: [] }); this.onKeyLine = true; return; } else if (it.sep) { it.value = token; } else { Object.assign(it, { key: token, sep: [] }); this.onKeyLine = !it.explicitKey; return; } break; } case 'block-seq': { const it = top.items[top.items.length - 1]; if (it.value) top.items.push({ start: [], value: token }); else it.value = token; break; } case 'flow-collection': { const it = top.items[top.items.length - 1]; if (!it || it.value) top.items.push({ start: [], key: token, sep: [] }); else if (it.sep) it.value = token; else Object.assign(it, { key: token, sep: [] }); return; } /* istanbul ignore next should not happen */ default: yield* this.pop(); yield* this.pop(token); } if ((top.type === 'document' || top.type === 'block-map' || top.type === 'block-seq') && (token.type === 'block-map' || token.type === 'block-seq')) { const last = token.items[token.items.length - 1]; if (last && !last.sep && !last.value && last.start.length > 0 && findNonEmptyIndex(last.start) === -1 && (token.indent === 0 || last.start.every(st => st.type !== 'comment' || st.indent < token.indent))) { if (top.type === 'document') top.end = last.start; else top.items.push({ start: last.start }); token.items.splice(-1, 1); } } } } *stream() { switch (this.type) { case 'directive-line': yield { type: 'directive', offset: this.offset, source: this.source }; return; case 'byte-order-mark': case 'space': case 'comment': case 'newline': yield this.sourceToken; return; case 'doc-mode': case 'doc-start': { const doc = { type: 'document', offset: this.offset, start: [] }; if (this.type === 'doc-start') doc.start.push(this.sourceToken); this.stack.push(doc); return; } } yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML stream`, source: this.source }; } *document(doc) { if (doc.value) return yield* this.lineEnd(doc); switch (this.type) { case 'doc-start': { if (findNonEmptyIndex(doc.start) !== -1) { yield* this.pop(); yield* this.step(); } else doc.start.push(this.sourceToken); return; } case 'anchor': case 'tag': case 'space': case 'comment': case 'newline': doc.start.push(this.sourceToken); return; } const bv = this.startBlockValue(doc); if (bv) this.stack.push(bv); else { yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML document`, source: this.source }; } } *scalar(scalar) { if (this.type === 'map-value-ind') { const prev = getPrevProps(this.peek(2)); const start = getFirstKeyStartProps(prev); let sep; if (scalar.end) { sep = scalar.end; sep.push(this.sourceToken); delete scalar.end; } else sep = [this.sourceToken]; const map = { type: 'block-map', offset: scalar.offset, indent: scalar.indent, items: [{ start, key: scalar, sep }] }; this.onKeyLine = true; this.stack[this.stack.length - 1] = map; } else yield* this.lineEnd(scalar); } *blockScalar(scalar) { switch (this.type) { case 'space': case 'comment': case 'newline': scalar.props.push(this.sourceToken); return; case 'scalar': scalar.source = this.source; // block-scalar source includes trailing newline this.atNewLine = true; this.indent = 0; if (this.onNewLine) { let nl = this.source.indexOf('\n') + 1; while (nl !== 0) { this.onNewLine(this.offset + nl); nl = this.source.indexOf('\n', nl) + 1; } } yield* this.pop(); break; /* istanbul ignore next should not happen */ default: yield* this.pop(); yield* this.step(); } } *blockMap(map) { const it = map.items[map.items.length - 1]; // it.sep is true-ish if pair already has key or : separator switch (this.type) { case 'newline': this.onKeyLine = false; if (it.value) { const end = 'end' in it.value ? it.value.end : undefined; const last = Array.isArray(end) ? end[end.length - 1] : undefined; if (last?.type === 'comment') end?.push(this.sourceToken); else map.items.push({ start: [this.sourceToken] }); } else if (it.sep) { it.sep.push(this.sourceToken); } else { it.start.push(this.sourceToken); } return; case 'space': case 'comment': if (it.value) { map.items.push({ start: [this.sourceToken] }); } else if (it.sep) { it.sep.push(this.sourceToken); } else { if (this.atIndentedComment(it.start, map.indent)) { const prev = map.items[map.items.length - 2]; const end = prev?.value?.end; if (Array.isArray(end)) { Array.prototype.push.apply(end, it.start); end.push(this.sourceToken); map.items.pop(); return; } } it.start.push(this.sourceToken); } return; } if (this.indent >= map.indent) { const atMapIndent = !this.onKeyLine && this.indent === map.indent; const atNextItem = atMapIndent && (it.sep || it.explicitKey) && this.type !== 'seq-item-ind'; // For empty nodes, assign newline-separated not indented empty tokens to following node let start = []; if (atNextItem && it.sep && !it.value) { const nl = []; for (let i = 0; i < it.sep.length; ++i) { const st = it.sep[i]; switch (st.type) { case 'newline': nl.push(i); break; case 'space': break; case 'comment': if (st.indent > map.indent) nl.length = 0; break; default: nl.length = 0; } } if (nl.length >= 2) start = it.sep.splice(nl[1]); } switch (this.type) { case 'anchor': case 'tag': if (atNextItem || it.value) { start.push(this.sourceToken); map.items.push({ start }); this.onKeyLine = true; } else if (it.sep) { it.sep.push(this.sourceToken); } else { it.start.push(this.sourceToken); } return; case 'explicit-key-ind': if (!it.sep && !it.explicitKey) { it.start.push(this.sourceToken); it.explicitKey = true; } else if (atNextItem || it.value) { start.push(this.sourceToken); map.items.push({ start, explicitKey: true }); } else { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start: [this.sourceToken], explicitKey: true }] }); } this.onKeyLine = true; return; case 'map-value-ind': if (it.explicitKey) { if (!it.sep) { if (includesToken(it.start, 'newline')) { Object.assign(it, { key: null, sep: [this.sourceToken] }); } else { const start = getFirstKeyStartProps(it.start); this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }); } } else if (it.value) { map.items.push({ start: [], key: null, sep: [this.sourceToken] }); } else if (includesToken(it.sep, 'map-value-ind')) { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }); } else if (isFlowToken(it.key) && !includesToken(it.sep, 'newline')) { const start = getFirstKeyStartProps(it.start); const key = it.key; const sep = it.sep; sep.push(this.sourceToken); // @ts-expect-error type guard is wrong here delete it.key; // @ts-expect-error type guard is wrong here delete it.sep; this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key, sep }] }); } else if (start.length > 0) { // Not actually at next item it.sep = it.sep.concat(start, this.sourceToken); } else { it.sep.push(this.sourceToken); } } else { if (!it.sep) { Object.assign(it, { key: null, sep: [this.sourceToken] }); } else if (it.value || atNextItem) { map.items.push({ start, key: null, sep: [this.sourceToken] }); } else if (includesToken(it.sep, 'map-value-ind')) { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start: [], key: null, sep: [this.sourceToken] }] }); } else { it.sep.push(this.sourceToken); } } this.onKeyLine = true; return; case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': { const fs = this.flowScalar(this.type); if (atNextItem || it.value) { map.items.push({ start, key: fs, sep: [] }); this.onKeyLine = true; } else if (it.sep) { this.stack.push(fs); } else { Object.assign(it, { key: fs, sep: [] }); this.onKeyLine = true; } return; } default: { const bv = this.startBlockValue(map); if (bv) { if (atMapIndent && bv.type !== 'block-seq') { map.items.push({ start }); } this.stack.push(bv); return; } } } } yield* this.pop(); yield* this.step(); } *blockSequence(seq) { const it = seq.items[seq.items.length - 1]; switch (this.type) { case 'newline': if (it.value) { const end = 'end' in it.value ? it.value.end : undefined; const last = Array.isArray(end) ? end[end.length - 1] : undefined; if (last?.type === 'comment') end?.push(this.sourceToken); else seq.items.push({ start: [this.sourceToken] }); } else it.start.push(this.sourceToken); return; case 'space': case 'comment': if (it.value) seq.items.push({ start: [this.sourceToken] }); else { if (this.atIndentedComment(it.start, seq.indent)) { const prev = seq.items[seq.items.length - 2]; const end = prev?.value?.end; if (Array.isArray(end)) { Array.prototype.push.apply(end, it.start); end.push(this.sourceToken); seq.items.pop(); return; } } it.start.push(this.sourceToken); } return; case 'anchor': case 'tag': if (it.value || this.indent <= seq.indent) break; it.start.push(this.sourceToken); return; case 'seq-item-ind': if (this.indent !== seq.indent) break; if (it.value || includesToken(it.start, 'seq-item-ind')) seq.items.push({ start: [this.sourceToken] }); else it.start.push(this.sourceToken); return; } if (this.indent > seq.indent) { const bv = this.startBlockValue(seq); if (bv) { this.stack.push(bv); return; } } yield* this.pop(); yield* this.step(); } *flowCollection(fc) { const it = fc.items[fc.items.length - 1]; if (this.type === 'flow-error-end') { let top; do { yield* this.pop(); top = this.peek(1); } while (top && top.type === 'flow-collection'); } else if (fc.end.length === 0) { switch (this.type) { case 'comma': case 'explicit-key-ind': if (!it || it.sep) fc.items.push({ start: [this.sourceToken] }); else it.start.push(this.sourceToken); return; case 'map-value-ind': if (!it || it.value) fc.items.push({ start: [], key: null, sep: [this.sourceToken] }); else if (it.sep) it.sep.push(this.sourceToken); else Object.assign(it, { key: null, sep: [this.sourceToken] }); return; case 'space': case 'comment': case 'newline': case 'anchor': case 'tag': if (!it || it.value) fc.items.push({ start: [this.sourceToken] }); else if (it.sep) it.sep.push(this.sourceToken); else it.start.push(this.sourceToken); return; case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': { const fs = this.flowScalar(this.type); if (!it || it.value) fc.items.push({ start: [], key: fs, sep: [] }); else if (it.sep) this.stack.push(fs); else Object.assign(it, { key: fs, sep: [] }); return; } case 'flow-map-end': case 'flow-seq-end': fc.end.push(this.sourceToken); return; } const bv = this.startBlockValue(fc); /* istanbul ignore else should not happen */ if (bv) this.stack.push(bv); else { yield* this.pop(); yield* this.step(); } } else { const parent = this.peek(2); if (parent.type === 'block-map' && ((this.type === 'map-value-ind' && parent.indent === fc.indent) || (this.type === 'newline' && !parent.items[parent.items.length - 1].sep))) { yield* this.pop(); yield* this.step(); } else if (this.type === 'map-value-ind' && parent.type !== 'flow-collection') { const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); fixFlowSeqItems(fc); const sep = fc.end.splice(1, fc.end.length); sep.push(this.sourceToken); const map = { type: 'block-map', offset: fc.offset, indent: fc.indent, items: [{ start, key: fc, sep }] }; this.onKeyLine = true; this.stack[this.stack.length - 1] = map; } else { yield* this.lineEnd(fc); } } } flowScalar(type) { if (this.onNewLine) { let nl = this.source.indexOf('\n') + 1; while (nl !== 0) { this.onNewLine(this.offset + nl); nl = this.source.indexOf('\n', nl) + 1; } } return { type, offset: this.offset, indent: this.indent, source: this.source }; } startBlockValue(parent) { switch (this.type) { case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': return this.flowScalar(this.type); case 'block-scalar-header': return { type: 'block-scalar', offset: this.offset, indent: this.indent, props: [this.sourceToken], source: '' }; case 'flow-map-start': case 'flow-seq-start': return { type: 'flow-collection', offset: this.offset, indent: this.indent, start: this.sourceToken, items: [], end: [] }; case 'seq-item-ind': return { type: 'block-seq', offset: this.offset, indent: this.indent, items: [{ start: [this.sourceToken] }] }; case 'explicit-key-ind': { this.onKeyLine = true; const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); start.push(this.sourceToken); return { type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, explicitKey: true }] }; } case 'map-value-ind': { this.onKeyLine = true; const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); return { type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }; } } return null; } atIndentedComment(start, indent) { if (this.type !== 'comment') return false; if (this.indent <= indent) return false; return start.every(st => st.type === 'newline' || st.type === 'space'); } *documentEnd(docEnd) { if (this.type !== 'doc-mode') { if (docEnd.end) docEnd.end.push(this.sourceToken); else docEnd.end = [this.sourceToken]; if (this.type === 'newline') yield* this.pop(); } } *lineEnd(token) { switch (this.type) { case 'comma': case 'doc-start': case 'doc-end': case 'flow-seq-end': case 'flow-map-end': case 'map-value-ind': yield* this.pop(); yield* this.step(); break; case 'newline': this.onKeyLine = false; // fallthrough case 'space': case 'comment': default: // all other values are errors if (token.end) token.end.push(this.sourceToken); else token.end = [this.sourceToken]; if (this.type === 'newline') yield* this.pop(); } } } export { Parser };