Files
rspade_system/node_modules/@jqhtml/parser/dist/lexer.js
2025-12-03 21:28:08 +00:00

1446 lines
60 KiB
JavaScript
Executable File

// JQHTML Lexer - Simple character scanner, no regex
// Tracks positions for source map support
import { JQHTMLParseError } from './errors.js';
export var TokenType;
(function (TokenType) {
// Literals
TokenType["TEXT"] = "TEXT";
// JQHTML tags
TokenType["EXPRESSION_START"] = "EXPRESSION_START";
TokenType["EXPRESSION_UNESCAPED"] = "EXPRESSION_UNESCAPED";
TokenType["CODE_START"] = "CODE_START";
TokenType["TAG_END"] = "TAG_END";
// Comments
TokenType["COMMENT"] = "COMMENT";
// Component definition
TokenType["DEFINE_START"] = "DEFINE_START";
TokenType["DEFINE_END"] = "DEFINE_END";
TokenType["COMPONENT_NAME"] = "COMPONENT_NAME";
// Slots (v2)
TokenType["SLOT_START"] = "SLOT_START";
TokenType["SLOT_END"] = "SLOT_END";
TokenType["SLOT_NAME"] = "SLOT_NAME";
// HTML tags
TokenType["TAG_OPEN"] = "TAG_OPEN";
TokenType["TAG_CLOSE"] = "TAG_CLOSE";
TokenType["TAG_NAME"] = "TAG_NAME";
TokenType["SELF_CLOSING"] = "SELF_CLOSING";
// Attributes
TokenType["ATTR_NAME"] = "ATTR_NAME";
TokenType["ATTR_VALUE"] = "ATTR_VALUE";
// Delimiters
TokenType["COLON"] = "COLON";
TokenType["SEMICOLON"] = "SEMICOLON";
TokenType["GT"] = "GT";
TokenType["LT"] = "LT";
TokenType["SLASH"] = "SLASH";
TokenType["EQUALS"] = "EQUALS";
TokenType["QUOTE"] = "QUOTE";
// Special
TokenType["EOF"] = "EOF";
TokenType["NEWLINE"] = "NEWLINE";
TokenType["WHITESPACE"] = "WHITESPACE";
// JavaScript code
TokenType["JAVASCRIPT"] = "JAVASCRIPT";
})(TokenType || (TokenType = {}));
export class Lexer {
input;
position = 0;
line = 1;
column = 1;
tokens = [];
// Track saved positions for accurate token creation
savedPosition = null;
constructor(input) {
// Preprocess: Normalize all line endings to \n (handles \r\n and \r)
// This ensures the lexer only needs to handle \n throughout
let processed = input.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
// Preprocess: Replace JQHTML comments (<%-- --%) with equivalent newlines to preserve line mapping
processed = this.preprocessComments(processed);
// Preprocess: Replace HTML comments (<!-- -->) outside Define tags with equivalent newlines
processed = this.preprocessHTMLComments(processed);
// Preprocess: Insert // for empty lines in code blocks to preserve line mapping
processed = this.preprocessCodeBlocks(processed);
this.input = processed;
}
/**
* Save current position for later token creation
*/
savePosition() {
this.savedPosition = {
line: this.line,
column: this.column,
offset: this.position
};
return this.savedPosition;
}
/**
* Get saved position or current position
*/
getSavedPosition() {
if (this.savedPosition) {
const pos = this.savedPosition;
this.savedPosition = null;
return pos;
}
return {
line: this.line,
column: this.column,
offset: this.position
};
}
/**
* Replace <%-- comment --%> with equivalent number of newlines
* This ensures line mapping stays accurate while removing comment content
*/
preprocessComments(input) {
let result = input;
let searchPos = 0;
while (true) {
// Find next comment start
const startIdx = result.indexOf('<%--', searchPos);
if (startIdx === -1)
break;
// Find matching comment end
const endIdx = result.indexOf('--%>', startIdx + 4);
if (endIdx === -1) {
// Unclosed comment - leave it for parser to report error
break;
}
// Extract the comment including delimiters
const commentText = result.substring(startIdx, endIdx + 4);
// Count newlines in the comment
const newlineCount = (commentText.match(/\n/g) || []).length;
// Replace comment with spaces and same number of newlines
// We preserve the same total length to keep position tracking accurate
let replacement = '';
let charsNeeded = commentText.length;
// First, add the newlines at their original positions
let commentPos = 0;
for (let i = 0; i < commentText.length; i++) {
if (commentText[i] === '\n') {
replacement += '\n';
}
else {
replacement += ' ';
}
}
// Replace the comment with the spacing
result = result.substring(0, startIdx) + replacement + result.substring(endIdx + 4);
// Move search position past this replacement
searchPos = startIdx + replacement.length;
}
return result;
}
/**
* Replace HTML comments (<!-- -->) that appear OUTSIDE of <Define> tags
* This strips documentation comments before component definitions
* HTML comments INSIDE <Define> tags are preserved in the output
*/
preprocessHTMLComments(input) {
let result = input;
let searchPos = 0;
let insideDefine = false;
while (searchPos < result.length) {
// Check if we're entering or leaving a Define tag
if (result.substring(searchPos, searchPos + 8) === '<Define:') {
insideDefine = true;
searchPos += 8;
continue;
}
if (result.substring(searchPos, searchPos + 9) === '</Define:') {
insideDefine = false;
searchPos += 9;
continue;
}
// Only strip HTML comments if we're outside Define tags
if (!insideDefine && result.substring(searchPos, searchPos + 4) === '<!--') {
const startIdx = searchPos;
// Find matching comment end
const endIdx = result.indexOf('-->', searchPos + 4);
if (endIdx === -1) {
// Unclosed comment - leave it for lexer to report error
break;
}
// Extract the comment including delimiters
const commentText = result.substring(startIdx, endIdx + 3);
// Replace comment with spaces and same number of newlines to preserve line mapping
let replacement = '';
for (let i = 0; i < commentText.length; i++) {
if (commentText[i] === '\n') {
replacement += '\n';
}
else {
replacement += ' ';
}
}
// Replace the comment with the spacing
result = result.substring(0, startIdx) + replacement + result.substring(endIdx + 3);
// Move search position past this replacement
searchPos = startIdx + replacement.length;
}
else {
searchPos++;
}
}
return result;
}
/**
* Preprocess code blocks and expressions
* - Insert comment markers for empty lines in code blocks
* - Collapse multi-line expressions to single line with trailing newlines
* This ensures 1:1 line mapping in generated code
*/
preprocessCodeBlocks(input) {
let result = input;
let searchPos = 0;
while (true) {
// Find next <% sequence
let startIdx = result.indexOf('<%', searchPos);
if (startIdx === -1)
break;
// Check what type of block this is
const isExpression = result[startIdx + 2] === '=';
const isUnescapedExpression = result.substring(startIdx + 2, startIdx + 4) === '!=';
if (isExpression || isUnescapedExpression) {
// Handle expressions: collapse to single line
const exprStart = isUnescapedExpression ? startIdx + 4 : startIdx + 3;
// Find matching %> considering strings
const endIdx = this.findClosingTag(result, exprStart);
if (endIdx === -1) {
// Unclosed expression - leave it for parser to report error
searchPos = startIdx + 2;
continue;
}
// Extract the expression content
const exprContent = result.substring(exprStart, endIdx);
// Count newlines in the expression
const newlineCount = (exprContent.match(/\n/g) || []).length;
if (newlineCount > 0) {
// Strip line comments BEFORE collapsing to avoid breaking parser
let processedExpr = exprContent;
// Replace // comments with spaces (preserve length for sourcemaps)
let cleaned = '';
let inString = false;
let stringDelim = '';
let escaped = false;
for (let i = 0; i < processedExpr.length; i++) {
const ch = processedExpr[i];
const next = processedExpr[i + 1] || '';
// Handle escape sequences
if (escaped) {
cleaned += ch;
escaped = false;
continue;
}
if (ch === '\\' && inString) {
cleaned += ch;
escaped = true;
continue;
}
// Track strings
if (!inString && (ch === '"' || ch === "'" || ch === '`')) {
inString = true;
stringDelim = ch;
cleaned += ch;
}
else if (inString && ch === stringDelim) {
inString = false;
cleaned += ch;
}
else if (!inString && ch === '/' && next === '/') {
// Found line comment - replace with spaces until newline
cleaned += ' ';
i++; // skip second /
cleaned += ' ';
while (i + 1 < processedExpr.length && processedExpr[i + 1] !== '\n') {
i++;
cleaned += ' ';
}
}
else {
cleaned += ch;
}
}
// Collapse multi-line expression to single line
// Replace all newlines with spaces to preserve token separation
const collapsedExpr = cleaned.replace(/\n/g, ' ');
// Add trailing newlines after the expression
const trailingNewlines = '\n'.repeat(newlineCount);
// Reconstruct with collapsed expression and trailing newlines
const prefix = result.substring(0, exprStart);
const suffix = result.substring(endIdx);
result = prefix + collapsedExpr + suffix.substring(0, 2) + trailingNewlines + suffix.substring(2);
}
searchPos = startIdx + 2;
}
else {
// Handle code blocks: insert /* empty line */ for empty lines
const endIdx = this.findClosingTag(result, startIdx + 2);
if (endIdx === -1) {
// Unclosed code block - leave it for parser to report error
break;
}
// Extract the code block content between <% and %>
const blockContent = result.substring(startIdx + 2, endIdx);
// Process the content line by line
const lines = blockContent.split('\n');
const processedLines = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const trimmed = line.trim();
// Don't add placeholders on the last line if it's empty
// (this would be right before the %>)
if (trimmed === '' && i < lines.length - 1) {
// Empty line - use /* */ instead of // to avoid breaking code
// Extra trailing space helps with alignment
processedLines.push(' /* empty line */ ');
}
else {
// Line has code or is the last line - keep as is
processedLines.push(line);
}
}
// Reconstruct the code block
const processedContent = processedLines.join('\n');
result = result.substring(0, startIdx + 2) + processedContent + result.substring(endIdx);
// Move search position past this block
searchPos = startIdx + 2 + processedContent.length + 2; // +2 for %>
}
}
return result;
}
/**
* Find the closing %> tag, properly handling strings and comments
*/
findClosingTag(input, startPos) {
let pos = startPos;
let inString = false;
let stringDelimiter = '';
let inComment = false;
let commentType = '';
while (pos < input.length - 1) {
const char = input[pos];
const nextChar = input[pos + 1];
// Handle string literals
if (!inComment) {
if (!inString && (char === '"' || char === "'" || char === '`')) {
inString = true;
stringDelimiter = char;
}
else if (inString && char === stringDelimiter) {
// Check if it's escaped
let escapeCount = 0;
let checkPos = pos - 1;
while (checkPos >= 0 && input[checkPos] === '\\') {
escapeCount++;
checkPos--;
}
if (escapeCount % 2 === 0) {
inString = false;
stringDelimiter = '';
}
}
}
// Handle comments
if (!inString && !inComment) {
if (char === '/' && nextChar === '/') {
inComment = true;
commentType = 'line';
}
else if (char === '/' && nextChar === '*') {
inComment = true;
commentType = 'block';
}
}
else if (inComment) {
if (commentType === 'line' && char === '\n') {
inComment = false;
commentType = '';
}
else if (commentType === 'block' && char === '*' && nextChar === '/') {
inComment = false;
commentType = '';
pos++; // Skip the /
}
}
// Check for closing tag only if not in string or comment
if (!inString && !inComment) {
if (char === '%' && nextChar === '>') {
return pos;
}
}
pos++;
}
return -1; // Not found
}
tokenize() {
while (this.position < this.input.length) {
this.scan_next();
}
this.add_token(TokenType.EOF, '', this.position, this.position);
return this.tokens;
}
scan_next() {
const start = this.position;
const start_line = this.line;
const start_column = this.column;
// Check for JQHTML tags first
// Comments are now preprocessed out, so we don't need to check for them
// Check for invalid <%== syntax (common mistake)
if (this.match_sequence('<%==')) {
const error = new JQHTMLParseError('Invalid expression syntax: <%== is not valid JQHTML syntax', this.line, this.column - 4, // Point to the start of <%==
this.input);
error.suggestion = '\n\nValid expression syntax:\n' +
' <%= expr %> - Escaped output (safe, default)\n' +
' <%!= expr %> - Unescaped HTML output (raw)\n\n' +
'Did you mean:\n' +
' <%= ... %> for escaped output, or\n' +
' <%!= ... %> for unescaped/raw HTML output?';
throw error;
}
if (this.match_sequence('<%!=')) {
this.add_token(TokenType.EXPRESSION_UNESCAPED, '<%!=', start, this.position);
this.scan_expression();
return;
}
if (this.match_sequence('<%=')) {
this.add_token(TokenType.EXPRESSION_START, '<%=', start, this.position);
this.scan_expression();
return;
}
if (this.match_sequence('<%')) {
this.add_token(TokenType.CODE_START, '<%', start, this.position);
this.scan_code_block();
return;
}
if (this.match_sequence('%>')) {
this.add_token(TokenType.TAG_END, '%>', start, this.position);
return;
}
// Check for Define tags
if (this.match_sequence('<Define:')) {
this.add_token(TokenType.DEFINE_START, '<Define:', start, this.position);
this.scan_component_name();
return;
}
if (this.match_sequence('</Define:')) {
this.add_token(TokenType.DEFINE_END, '</Define:', start, this.position);
this.scan_component_name();
return;
}
// Check for slot tags (v2)
if (this.match_sequence('</Slot:')) {
this.add_token(TokenType.SLOT_END, '</Slot:', start, this.position);
this.scan_slot_name();
return;
}
if (this.match_sequence('<Slot:')) {
this.add_token(TokenType.SLOT_START, '<Slot:', start, this.position);
this.scan_slot_name();
return;
}
// Check for HTML comment first
if (this.current_char() === '<' && this.peek_ahead(1) === '!' &&
this.peek_ahead(2) === '-' && this.peek_ahead(3) === '-') {
this.scan_html_comment();
return;
}
// Check for HTML tags (including components)
if (this.current_char() === '<') {
// Peek ahead to see if this is an HTML tag
if (this.peek_ahead(1) === '/') {
// Closing tag
if (this.is_tag_name_char(this.peek_ahead(2))) {
this.scan_closing_tag();
return;
}
}
else if (this.is_tag_name_char(this.peek_ahead(1))) {
// Opening tag
this.scan_opening_tag();
return;
}
}
// Single character tokens
const char = this.current_char();
// Don't tokenize < and > separately when they're part of HTML
// They should be part of TEXT tokens
/*
if (char === '<') {
this.advance();
this.add_token(TokenType.LT, '<', start, this.position);
return;
}
if (char === '>') {
this.advance();
this.add_token(TokenType.GT, '>', start, this.position);
return;
}
*/
if (char === '\n') {
this.advance();
this.add_token(TokenType.NEWLINE, '\n', start, this.position);
return;
}
// Default: scan as text until next special character
this.scan_text();
}
scan_text() {
const start = this.position;
const start_line = this.line;
const start_column = this.column;
let text = '';
while (this.position < this.input.length) {
const char = this.current_char();
// Stop at any potential tag start
if (char === '<') {
// Check for HTML comment first - don't break, these should be in text
if (this.peek_ahead(1) === '!' &&
this.peek_ahead(2) === '-' &&
this.peek_ahead(3) === '-') {
break; // HTML comment will be handled separately
}
// Peek ahead for special sequences
if (this.peek_ahead(1) === '%' ||
this.peek_sequence_at(1, 'Slot:') || // Slot start
this.peek_sequence_at(1, '/Slot:') || // Slot end
this.peek_ahead(1) === 'D' && this.peek_sequence_at(1, 'Define:') ||
this.peek_ahead(1) === '/' && this.peek_sequence_at(1, '/Define:')) {
break;
}
// Also stop for HTML tags
if (this.peek_ahead(1) === '/' && this.is_tag_name_char(this.peek_ahead(2))) {
break; // Closing tag
}
if (this.is_tag_name_char(this.peek_ahead(1))) {
break; // Opening tag
}
}
if (char === '%' && this.peek_ahead(1) === '>') {
break;
}
text += char;
this.advance();
}
if (text.length > 0) {
this.add_token(TokenType.TEXT, text, start, this.position, start_line, start_column);
}
}
scan_code_block() {
// After <%, save the original position INCLUDING whitespace
const position_with_whitespace = this.position;
// Now skip whitespace to check for keywords
this.skip_whitespace();
const saved_position = this.position;
// It's regular JavaScript code - rewind to include whitespace
this.position = position_with_whitespace;
this.scan_javascript();
}
scan_comment() {
// Scan comment from <%-- to --%>
const start = this.position - 4; // Already consumed <%--
let comment = '';
while (this.position < this.input.length) {
if (this.match_sequence('--%>')) {
// Found end of comment
// Don't trim - we need to preserve whitespace for line mapping
this.add_token(TokenType.COMMENT, comment, start, this.position);
return;
}
const char = this.current_char();
comment += char;
this.advance();
}
// Error: unterminated comment
throw new JQHTMLParseError('Unterminated comment', this.line, this.column, this.input);
}
scan_html_comment() {
// Scan HTML comment from <!-- to -->
// Everything inside should be treated as raw text, no parsing
const start = this.position;
// Consume <!--
this.advance(); // <
this.advance(); // !
this.advance(); // -
this.advance(); // -
let comment = '<!--';
// Scan until we find -->
while (this.position < this.input.length) {
if (this.current_char() === '-' &&
this.peek_ahead(1) === '-' &&
this.peek_ahead(2) === '>') {
// Found end of HTML comment
comment += '-->';
this.advance(); // -
this.advance(); // -
this.advance(); // >
// Add the entire HTML comment as a TEXT token
// This ensures it gets passed through as-is to the output
this.add_token(TokenType.TEXT, comment, start, this.position);
return;
}
const char = this.current_char();
comment += char;
// Track line numbers for error reporting
if (char === '\n') {
this.line++;
this.column = 1;
}
else {
this.column++;
}
this.advance();
}
// Error: unterminated HTML comment
throw new JQHTMLParseError('Unterminated HTML comment', this.line, this.column, this.input);
}
scan_expression() {
// After <%=, scan JavaScript until %>
// Strip line comments from interpolation blocks to avoid breaking parser
this.scan_javascript(true);
}
scan_javascript(strip_line_comments = false) {
const start = this.position;
let code = '';
let in_string = false;
let string_delimiter = '';
let escape_next = false;
while (this.position < this.input.length) {
const char = this.current_char();
// Handle escape sequences in strings
if (escape_next) {
code += char;
this.advance();
escape_next = false;
continue;
}
// Handle backslash (escape character)
if (char === '\\' && in_string) {
escape_next = true;
code += char;
this.advance();
continue;
}
// Handle string delimiters
if ((char === '"' || char === "'" || char === '`') && !in_string) {
in_string = true;
string_delimiter = char;
}
else if (char === string_delimiter && in_string) {
in_string = false;
string_delimiter = '';
}
// Strip line comments in interpolation blocks (outside strings)
if (strip_line_comments && !in_string && char === '/' && this.peek_ahead(1) === '/') {
// Replace EVERY character from // up to (but not including) newline with = for debugging
// This maintains exact position alignment for sourcemaps
// Replace first /
code += ' ';
this.advance();
// Replace second /
code += ' ';
this.advance();
// Replace all comment text with spaces until we hit newline or %>
while (this.position < this.input.length) {
const next = this.current_char();
// Found newline - preserve it and stop
if (next === '\n') {
code += next;
this.advance();
break;
}
// Handle \r\n or \r
if (next === '\r') {
code += next;
this.advance();
// Check for \n following \r
if (this.current_char() === '\n') {
code += '\n';
this.advance();
}
break;
}
// Found closing %> - stop (don't consume it)
if (next === '%' && this.peek_ahead(1) === '>') {
break;
}
// Replace this comment character with space
code += ' ';
this.advance();
}
continue;
}
// Only look for %> when not inside a string
if (!in_string && char === '%' && this.peek_ahead(1) === '>') {
break;
}
code += char;
this.advance();
}
// Don't trim when stripping comments - preserve whitespace for proper parsing
const finalCode = strip_line_comments ? code : code.trim();
if (finalCode.trim().length > 0) {
this.add_token(TokenType.JAVASCRIPT, finalCode, start, this.position);
}
}
scan_component_name() {
const start = this.position;
let name = '';
while (this.position < this.input.length) {
const char = this.current_char();
// Component names are alphanumeric with underscores
if ((char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
(char >= '0' && char <= '9') ||
char === '_') {
name += char;
this.advance();
}
else {
break;
}
}
if (name.length > 0) {
this.add_token(TokenType.COMPONENT_NAME, name, start, this.position);
}
// Skip whitespace after component name
this.skip_whitespace();
// Also skip newlines after component name (for multiline Define tags)
while (this.current_char() === '\n' || this.current_char() === '\r') {
if (this.current_char() === '\n') {
this.add_token(TokenType.NEWLINE, '\n', this.position, this.position + 1);
}
this.advance();
this.skip_whitespace();
}
// If we see attributes, scan them
if (this.is_attribute_start_char(this.current_char())) {
this.scan_attributes();
}
else if (this.current_char() === '>') {
// Otherwise scan the closing >
const gt_start = this.position;
this.advance();
this.add_token(TokenType.GT, '>', gt_start, this.position);
}
}
scan_slot_name() {
const start = this.position;
let name = '';
while (this.position < this.input.length) {
const char = this.current_char();
// Slot names are alphanumeric with underscores, same as components
if ((char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
(char >= '0' && char <= '9') ||
char === '_') {
name += char;
this.advance();
}
else {
break;
}
}
if (name.length > 0) {
this.add_token(TokenType.SLOT_NAME, name, start, this.position);
}
// Skip whitespace before attributes or closing
this.skip_whitespace();
// For self-closing slots, check for />
if (this.current_char() === '/' && this.peek_ahead(1) === '>') {
const slash_start = this.position;
this.advance(); // consume /
this.add_token(TokenType.SLASH, '/', slash_start, this.position);
const gt_start = this.position;
this.advance(); // consume >
this.add_token(TokenType.GT, '>', gt_start, this.position);
}
else if (this.current_char() === '>') {
// Regular closing >
const gt_start = this.position;
this.advance();
this.add_token(TokenType.GT, '>', gt_start, this.position);
}
}
match_sequence(sequence) {
if (this.position + sequence.length > this.input.length) {
return false;
}
for (let i = 0; i < sequence.length; i++) {
if (this.input[this.position + i] !== sequence[i]) {
return false;
}
}
// Consume the sequence
for (let i = 0; i < sequence.length; i++) {
this.advance();
}
return true;
}
match_keyword(keyword) {
const start = this.position;
// Match the keyword
for (let i = 0; i < keyword.length; i++) {
if (this.position + i >= this.input.length ||
this.input[this.position + i] !== keyword[i]) {
return false;
}
}
// Ensure it's not part of a larger word
const next_pos = this.position + keyword.length;
if (next_pos < this.input.length) {
const next_char = this.input[next_pos];
if ((next_char >= 'a' && next_char <= 'z') ||
(next_char >= 'A' && next_char <= 'Z') ||
(next_char >= '0' && next_char <= '9') ||
next_char === '_') {
return false;
}
}
// Consume the keyword
for (let i = 0; i < keyword.length; i++) {
this.advance();
}
return true;
}
peek_sequence(sequence) {
if (this.position + sequence.length > this.input.length) {
return false;
}
for (let i = 0; i < sequence.length; i++) {
if (this.input[this.position + i] !== sequence[i]) {
return false;
}
}
return true;
}
peek_sequence_at(offset, sequence) {
const start = this.position + offset;
if (start + sequence.length > this.input.length) {
return false;
}
for (let i = 0; i < sequence.length; i++) {
if (this.input[start + i] !== sequence[i]) {
return false;
}
}
return true;
}
skip_whitespace() {
while (this.position < this.input.length) {
const char = this.current_char();
if (char === ' ' || char === '\t' || char === '\r') {
this.advance();
}
else {
break;
}
}
}
current_char() {
return this.input[this.position] || '';
}
peek_ahead(offset) {
return this.input[this.position + offset] || '';
}
advance() {
if (this.current_char() === '\n') {
this.line++;
this.column = 1;
}
else {
this.column++;
}
this.position++;
}
add_token(type, value, start, end, line, column) {
// Calculate start position details
const startLine = line ?? this.line;
const startColumn = column ?? this.column;
// Calculate end position by scanning the value
let endLine = startLine;
let endColumn = startColumn;
let endOffset = end;
// Count lines and columns in the value to get accurate end position
for (let i = 0; i < value.length; i++) {
if (value[i] === '\n') {
endLine++;
endColumn = 1;
}
else {
endColumn++;
}
}
// For single character tokens, end column is start + 1
if (value.length === 1 && value !== '\n') {
endColumn = startColumn + 1;
}
this.tokens.push({
type,
value,
line: startLine, // Keep for backward compatibility
column: startColumn, // Keep for backward compatibility
start, // Keep for backward compatibility
end, // Keep for backward compatibility
loc: {
start: {
line: startLine,
column: startColumn,
offset: start
},
end: {
line: endLine,
column: endColumn,
offset: endOffset
}
}
});
}
is_tag_name_char(char) {
if (!char)
return false;
return (char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z');
}
is_tag_name_continue_char(char) {
if (!char)
return false;
return this.is_tag_name_char(char) ||
(char >= '0' && char <= '9') ||
char === '-' || char === '_' || char === ':';
}
scan_opening_tag() {
const start = this.position;
this.advance(); // consume <
this.add_token(TokenType.TAG_OPEN, '<', start, this.position);
// Scan tag name
const name_start = this.position;
let name = '';
while (this.position < this.input.length &&
this.is_tag_name_continue_char(this.current_char())) {
name += this.current_char();
this.advance();
}
if (name.length > 0) {
this.add_token(TokenType.TAG_NAME, name, name_start, this.position);
}
// Scan attributes until > or />
this.scan_attributes();
}
scan_closing_tag() {
const start = this.position;
this.advance(); // consume <
this.advance(); // consume /
this.add_token(TokenType.TAG_CLOSE, '</', start, this.position);
// Scan tag name
const name_start = this.position;
let name = '';
while (this.position < this.input.length &&
this.is_tag_name_continue_char(this.current_char())) {
name += this.current_char();
this.advance();
}
if (name.length > 0) {
this.add_token(TokenType.TAG_NAME, name, name_start, this.position);
}
// Skip whitespace
this.skip_whitespace();
// Expect >
if (this.current_char() === '>') {
const gt_start = this.position;
this.advance();
this.add_token(TokenType.GT, '>', gt_start, this.position);
}
}
scan_attributes() {
while (this.position < this.input.length) {
this.skip_whitespace();
// Also skip newlines in attribute area
while (this.current_char() === '\n' || this.current_char() === '\r') {
if (this.current_char() === '\n') {
this.add_token(TokenType.NEWLINE, '\n', this.position, this.position + 1);
}
this.advance();
this.skip_whitespace();
}
const char = this.current_char();
// Check for />
if (char === '/' && this.peek_ahead(1) === '>') {
const slash_start = this.position;
this.advance(); // consume /
this.advance(); // consume >
this.add_token(TokenType.SELF_CLOSING, '/>', slash_start, this.position);
return;
}
// Check for >
if (char === '>') {
const gt_start = this.position;
this.advance();
this.add_token(TokenType.GT, '>', gt_start, this.position);
return;
}
// Check for <% (conditional attribute start)
if (char === '<' && this.peek_ahead(1) === '%') {
const start = this.position;
this.advance(); // <
this.advance(); // %
this.add_token(TokenType.CODE_START, '<%', start, this.position);
this.scan_code_block();
// Consume the %> that scan_code_block left behind
if (this.current_char() === '%' && this.peek_ahead(1) === '>') {
const tag_end_start = this.position;
this.advance(); // %
this.advance(); // >
this.add_token(TokenType.TAG_END, '%>', tag_end_start, this.position);
}
// Continue scanning attributes - DO NOT return
continue;
}
// Must be an attribute
if (this.is_attribute_start_char(char)) {
this.scan_attribute();
}
else {
break;
}
}
}
is_attribute_start_char(char) {
if (!char)
return false;
return this.is_tag_name_char(char) || char === '$' || char === ':' || char === '@';
}
scan_attribute() {
const start = this.position;
let name = '';
// Scan attribute name
while (this.position < this.input.length) {
const char = this.current_char();
if (char === '=' || char === ' ' || char === '\t' ||
char === '\n' || char === '\r' || char === '>' ||
(char === '/' && this.peek_ahead(1) === '>') ||
(char === '<' && this.peek_ahead(1) === '%')) {
break;
}
name += char;
this.advance();
}
if (name.length > 0) {
this.add_token(TokenType.ATTR_NAME, name, start, this.position);
}
this.skip_whitespace();
// Check for = and value
if (this.current_char() === '=') {
const eq_start = this.position;
this.advance();
this.add_token(TokenType.EQUALS, '=', eq_start, this.position);
this.skip_whitespace();
// Scan attribute value
this.scan_attribute_value();
}
}
scan_attribute_value() {
const char = this.current_char();
// Check for common mistake: attr=<%= instead of attr="<%=
if (char === '<' && this.peek_ahead(1) === '%') {
const attr_context = this.get_current_attribute_context(); // Returns "attrname="
const is_dollar_attr = attr_context.startsWith('$');
const error = new JQHTMLParseError(`Attribute value cannot be assigned directly to <%= %> interpolation block.\n` +
` Attribute: ${attr_context}<%`, this.line, this.column, this.input);
if (is_dollar_attr) {
error.suggestion = `\n\n$ attributes can use either:\n\n` +
`1. Quoted interpolation (for string values):\n` +
` ✗ Wrong: ${attr_context}<%= JSON.stringify(data) %>\n` +
` ✓ Correct: ${attr_context}"<%= JSON.stringify(data) %>"\n\n` +
`2. Unquoted literal JavaScript (preferred):\n` +
` ✓ Correct: ${attr_context}JSON.stringify(data)\n` +
` ✓ Correct: ${attr_context}this.data.options\n` +
` ✓ Correct: ${attr_context}myVariable\n\n` +
`The preferred approach is to use unquoted literal JavaScript\n` +
`without <%= %> tags for $ attributes.`;
}
else {
error.suggestion = `\n\nRegular attributes must use quoted values:\n\n` +
` ✗ Wrong: ${attr_context}<%= value %>\n` +
` ✓ Correct: ${attr_context}"<%= value %>"\n` +
` ✓ Correct: ${attr_context}"static text"\n\n` +
`Always wrap attribute values in quotes.`;
}
throw error;
}
if (char === '"' || char === "'") {
// Check if this is an @ event attribute - they MUST be unquoted
const attr_context = this.get_current_attribute_context();
if (attr_context.startsWith('@')) {
const error = new JQHTMLParseError(`Event attributes (@) must have unquoted values to pass function references.\n` +
` Attribute: ${attr_context}=`, this.line, this.column, this.input);
error.suggestion = `\n\nEvent attributes must be unquoted:\n` +
` ✗ Wrong: @click="handleClick" (passes string, not function)\n` +
` ✓ Correct: @click=handleClick (passes function reference)\n` +
` ✓ Correct: @click=this.handleClick (passes method reference)\n\n` +
`Quoted values only pass strings and cannot pass functions or callbacks.\n` +
`In the component scope, 'this' refers to the component instance.`;
throw error;
}
// Quoted value - check for interpolation
const quote = char;
const quote_start = this.position;
this.advance(); // consume opening quote
// Check if value contains <%= or <%!=
if (this.value_contains_interpolation(quote)) {
// Rewind and scan with interpolation
this.position = quote_start;
this.advance(); // skip quote again
this.scan_interpolated_attribute_value(quote);
}
else {
// Simple value without interpolation
const value_start = this.position - 1; // Include opening quote
let value = quote; // Start with the quote
while (this.position < this.input.length && this.current_char() !== quote) {
value += this.current_char();
this.advance();
}
if (this.current_char() === quote) {
value += quote; // Add closing quote
this.advance(); // consume closing quote
}
if (value.length > 2 || value === '""' || value === "''") {
this.add_token(TokenType.ATTR_VALUE, value, value_start, this.position);
}
}
}
else if (char === '(') {
// Parenthesized expression: $attr=(condition ? 'online' : 'offline')
const value_start = this.position;
let value = '';
let paren_depth = 0;
let in_string = false;
let string_delimiter = '';
let escape_next = false;
while (this.position < this.input.length) {
const ch = this.current_char();
// Handle escape sequences in strings
if (escape_next) {
value += ch;
this.advance();
escape_next = false;
continue;
}
// Handle backslash (escape character) inside strings
if (ch === '\\' && in_string) {
escape_next = true;
value += ch;
this.advance();
continue;
}
// Handle string delimiters
if ((ch === '"' || ch === "'" || ch === '`') && !in_string) {
in_string = true;
string_delimiter = ch;
}
else if (ch === string_delimiter && in_string) {
in_string = false;
string_delimiter = '';
}
// Count parentheses only outside strings
if (!in_string) {
if (ch === '(') {
paren_depth++;
}
else if (ch === ')') {
paren_depth--;
value += ch;
this.advance();
// Stop when we close the last parenthesis
if (paren_depth === 0) {
break;
}
continue;
}
// Stop at whitespace or tag end if not inside parentheses
if (paren_depth === 0 && (ch === ' ' || ch === '\t' || ch === '\n' ||
ch === '\r' || ch === '>' ||
(ch === '/' && this.peek_ahead(1) === '>'))) {
break;
}
}
value += ch;
this.advance();
}
if (value.length > 0) {
this.add_token(TokenType.ATTR_VALUE, value, value_start, this.position);
}
}
else {
// Unquoted value - JavaScript identifier or member expression
// Valid chars: alphanumeric, underscore, period, dollar sign
// Can be prefixed with ! for negation
// Examples: myVar, this.method, obj.prop.subprop, $element, !this.canEdit
//
// RULES:
// - @ event attributes: MUST be unquoted (to pass functions)
// - $ attributes: Can be quoted (string) or unquoted (any JS value)
// - Regular attributes: MUST be quoted (strings only)
// Check attribute type
const attr_context = this.get_current_attribute_context();
const is_event_attr = attr_context.startsWith('@');
const is_dollar_attr = attr_context.startsWith('$');
// Regular attributes (not @ or $) must be quoted
if (!is_event_attr && !is_dollar_attr) {
const error = new JQHTMLParseError(`Regular HTML attributes must have quoted values.\n` +
` Attribute: ${attr_context}`, this.line, this.column, this.input);
error.suggestion = `\n\nRegular attributes must be quoted:\n` +
` ✗ Wrong: ${attr_context}myValue\n` +
` ✓ Correct: ${attr_context}"myValue"\n` +
` ✓ Correct: ${attr_context}"prefix <%= this.data.value %> suffix"\n\n` +
`Only @ event attributes (unquoted) and $ attributes (either) allow unquoted values:\n` +
` ✓ Correct: @click=this.handleClick (passes function reference)\n` +
` ✓ Correct: $data=this.complexObject (passes object)\n` +
` ✓ Correct: $sid="my-id" (passes string)`;
throw error;
}
const value_start = this.position;
let value = '';
let isFirstChar = true;
while (this.position < this.input.length) {
const ch = this.current_char();
// Allow ! only as the first character (negation operator)
if (isFirstChar && ch === '!') {
value += ch;
this.advance();
isFirstChar = false;
continue;
}
isFirstChar = false;
// Check if character is valid for JavaScript identifier/member expression/function call
const isValidChar = (ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') ||
ch === '_' ||
ch === '.' ||
ch === '$' ||
ch === '(' || // Allow parentheses for function calls
ch === ')' ||
ch === ',' || // Allow commas in function arguments
ch === '"' || // Allow quoted strings in function arguments
ch === "'"; // Allow quoted strings in function arguments
if (!isValidChar) {
// Stop at first non-valid character
break;
}
value += ch;
this.advance();
}
if (value.length > 0) {
// Validate the pattern before accepting it
this.validate_unquoted_value(value, attr_context);
this.add_token(TokenType.ATTR_VALUE, value, value_start, this.position);
}
}
}
validate_unquoted_value(value, attr_context) {
// Allowed patterns:
// 1. Literals: true, false, null, undefined, 123, 45.67
// 2. Identifiers: myVar, $variable, _private
// 3. Property chains: obj.prop, MyClass.method, deep.nested.property
// 4. Function calls: func(), obj.method(), func(arg1, arg2)
// 5. Chains with calls: obj.method().property.another()
//
// NOT allowed:
// - Operators: +, -, *, /, %, =, ==, ===, &&, ||, etc.
// - Objects: {key: value}
// - Arrays: [1, 2, 3]
// - Ternary: condition ? a : b
// Check for disallowed operators
if (/[+\-*/%=<>!&|^~?:]/.test(value)) {
const error = new JQHTMLParseError(`Operators are not allowed in unquoted $ attribute values.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nUnquoted $ attribute values must be simple references or function calls:\n\n` +
`✓ Allowed patterns:\n` +
` - Literals: $count=42 or $active=true\n` +
` - Variables: $data=myVariable\n` +
` - Property access: $handler=Controller.method\n` +
` - Function calls: $value=getData()\n` +
` - Complex chains: $fetch=API.users.getAll()\n\n` +
`✗ Not allowed:\n` +
` - Operators: $value=a+b (use quoted string or component logic)\n` +
` - Ternary: $class=active?'on':'off' (use quoted string)\n` +
` - Comparisons: $show=count>5 (handle in component logic)\n\n` +
`If you need complex expressions, handle them in the component's on_load() or on_ready() method.`;
throw error;
}
// Check for object literals
if (value.trim().startsWith('{')) {
const error = new JQHTMLParseError(`Object literals are not allowed in unquoted $ attribute values.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nUnquoted $ attribute values cannot contain object literals.\n\n` +
`If you need to pass an object, create it in the component:\n` +
` ✗ Wrong: $config={key:"value"}\n` +
` ✓ Correct: In component: this.data.config = {key: "value"}\n` +
` In template: $config=this.data.config`;
throw error;
}
// Check for array literals
if (value.trim().startsWith('[')) {
const error = new JQHTMLParseError(`Array literals are not allowed in unquoted $ attribute values.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nUnquoted $ attribute values cannot contain array literals.\n\n` +
`If you need to pass an array, create it in the component:\n` +
` ✗ Wrong: $items=[1,2,3]\n` +
` ✓ Correct: In component: this.data.items = [1, 2, 3]\n` +
` In template: $items=this.data.items`;
throw error;
}
// Validate that parentheses are balanced
let parenDepth = 0;
let inString = false;
let stringChar = '';
for (let i = 0; i < value.length; i++) {
const ch = value[i];
// Track string boundaries
if ((ch === '"' || ch === "'") && !inString) {
inString = true;
stringChar = ch;
}
else if (ch === stringChar && inString) {
inString = false;
stringChar = '';
}
// Only count parentheses outside strings
if (!inString) {
if (ch === '(')
parenDepth++;
if (ch === ')')
parenDepth--;
if (parenDepth < 0) {
const error = new JQHTMLParseError(`Unmatched closing parenthesis in unquoted $ attribute value.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nCheck for mismatched parentheses in the attribute value.`;
throw error;
}
}
}
if (parenDepth !== 0) {
const error = new JQHTMLParseError(`Unmatched opening parenthesis in unquoted $ attribute value.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nCheck for mismatched parentheses in the attribute value.`;
throw error;
}
// Validate the overall pattern using regex
// Pattern: identifier(.identifier)*(( args? ))*
// This allows: var, obj.prop, func(), obj.method(arg1, arg2).chain().more
const pattern = /^!?[a-zA-Z_$][a-zA-Z0-9_$]*(\.[a-zA-Z_$][a-zA-Z0-9_$]*|\([^)]*\))*$|^(true|false|null|undefined|\d+(\.\d+)?)$/;
if (!pattern.test(value)) {
const error = new JQHTMLParseError(`Invalid syntax in unquoted $ attribute value.\n` +
` Found: ${attr_context}${value}`, this.line, this.column, this.input);
error.suggestion = `\n\nUnquoted $ attribute values must follow these patterns:\n\n` +
`✓ Allowed:\n` +
` - Number literals: 42, 3.14\n` +
` - Boolean literals: true, false\n` +
` - Null/undefined: null, undefined\n` +
` - Identifiers: myVar, _private, $jQuery\n` +
` - Property chains: Controller.method, obj.deep.property\n` +
` - Function calls: getData(), API.fetch("url")\n` +
` - Mixed chains: obj.method().property.call()\n\n` +
`✗ Not allowed:\n` +
` - Spaces in names\n` +
` - Complex expressions with operators\n` +
` - Object or array literals\n\n` +
`The value should be a simple reference to data, not complex logic.`;
throw error;
}
}
get_current_attribute_context() {
// Look back in tokens to find the current attribute name for error reporting
let i = this.tokens.length - 1;
while (i >= 0) {
const token = this.tokens[i];
if (token.type === TokenType.ATTR_NAME) {
// The @ or $ is already part of the attribute name
return token.value + '=';
}
// Stop if we hit a tag boundary
if (token.type === TokenType.TAG_OPEN || token.type === TokenType.TAG_CLOSE) {
break;
}
i--;
}
return '';
}
value_contains_interpolation(quote) {
// Look ahead to see if this quoted value contains <%= or <%!=
let pos = this.position;
while (pos < this.input.length && this.input[pos] !== quote) {
if (pos + 2 < this.input.length &&
this.input[pos] === '<' &&
this.input[pos + 1] === '%') {
return true;
}
pos++;
}
return false;
}
scan_interpolated_attribute_value(quote) {
let text_start = this.position;
let text = '';
while (this.position < this.input.length && this.current_char() !== quote) {
// Check for interpolation start
if (this.current_char() === '<' && this.peek_ahead(1) === '%') {
// Save any text before the interpolation
if (text.length > 0) {
this.add_token(TokenType.ATTR_VALUE, text, text_start, this.position);
text = '';
}
// Check what kind of expression
if (this.peek_ahead(2) === '!' && this.peek_ahead(3) === '=') {
// <%!= expression %>
this.advance(); // <
this.advance(); // %
this.advance(); // !
this.advance(); // =
this.add_token(TokenType.EXPRESSION_UNESCAPED, '<%!=', this.position - 4, this.position);
}
else if (this.peek_ahead(2) === '=') {
// <%= expression %>
this.advance(); // <
this.advance(); // %
this.advance(); // =
this.add_token(TokenType.EXPRESSION_START, '<%=', this.position - 3, this.position);
}
else {
// Just add as text
text += this.current_char();
this.advance();
continue;
}
// Scan the JavaScript expression
this.scan_javascript();
// Consume %>
if (this.current_char() === '%' && this.peek_ahead(1) === '>') {
const tag_end_start = this.position;
this.advance(); // %
this.advance(); // >
this.add_token(TokenType.TAG_END, '%>', tag_end_start, this.position);
}
// Reset text tracking
text_start = this.position;
}
else {
text += this.current_char();
this.advance();
}
}
// Add any remaining text
if (text.length > 0) {
this.add_token(TokenType.ATTR_VALUE, text, text_start, this.position);
}
// Consume closing quote
if (this.current_char() === quote) {
this.advance();
}
}
}
//# sourceMappingURL=lexer.js.map