Skip to content

Commit

Permalink
feat: add splitInputAfterSyntaxError
Browse files Browse the repository at this point in the history
  • Loading branch information
liuxy0551 committed Aug 26, 2024
1 parent 4f05a97 commit d841e3c
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 34 deletions.
175 changes: 141 additions & 34 deletions src/parser/common/basicSQL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ export abstract class BasicSQL<
*/
protected abstract preferredRules: Set<number>;

/**
* keywords which can start a single statement
*/
protected abstract statementStartKeywords: Set<string>;

/**
* Create a antlr4 Lexer instance.
* @param input source string
Expand Down Expand Up @@ -251,6 +256,63 @@ export abstract class BasicSQL<
return res;
}

/**
* Try to get a small range as possible after syntax error.
* @param allTokens all tokens from input
* @param caretTokenIndex tokenIndex of caretPosition
* @returns { startToken: Token; stopToken: Token }
*/
private splitInputAfterSyntaxError(
allTokens: Token[],
caretTokenIndex: number
): { startToken: Token; stopToken: Token } {
let startToken: Token | null = null;
for (let tokenIndex = caretTokenIndex; tokenIndex >= 0; tokenIndex--) {
const token = allTokens[tokenIndex];
// end with semi
if (token?.text === ';') {
startToken = allTokens[tokenIndex + 1];
break;
}
// keywords which can start a single statement
if (
Array.from(this.statementStartKeywords).some((item) => item === token?.text) &&
tokenIndex !== 0
) {
startToken = allTokens[tokenIndex - 1];
break;
}
}
// If there is no semicolon, start from the first token
if (startToken === null) {
startToken = allTokens[0];
}

let stopToken: Token | null = null;
for (let tokenIndex = caretTokenIndex; tokenIndex < allTokens.length; tokenIndex++) {
const token = allTokens[tokenIndex];
// end with semi
if (token?.text === ';') {
stopToken = token;
break;
}
// keywords which can start a single statement
if (
Array.from(this.statementStartKeywords).some((item) => item === token?.text) &&
tokenIndex !== 0
) {
stopToken = allTokens[tokenIndex - 1];
break;
}
}
// If there is no semicolon, start from the first token
if (stopToken === null) {
stopToken = allTokens[allTokens.length - 1];
}

return { startToken, stopToken };
}

/**
* Get suggestions of syntax and token at caretPosition
* @param input source string
Expand Down Expand Up @@ -282,53 +344,98 @@ export abstract class BasicSQL<
const statementCount = splitListener.statementsContext?.length;
const statementsContext = splitListener.statementsContext;

// If there are multiple statements.
if (statementCount > 1) {
/**
* Find a minimum valid range, reparse the fragment, and provide a new parse tree to C3.
* The boundaries of this range must be statements with no syntax errors.
* This can ensure the stable performance of the C3.
*/
let startStatement: ParserRuleContext | null = null;
let stopStatement: ParserRuleContext | null = null;
const { startToken, stopToken } = this.splitInputAfterSyntaxError(
allTokens,
caretTokenIndex
);

for (let index = 0; index < statementCount; index++) {
const ctx = statementsContext[index];
const isCurrentCtxValid = !ctx.exception;
if (!isCurrentCtxValid) continue;
let startIndex: number = 0;
let stopIndex: number = 0;

/**
* If there is no semi
* and if there is no keyword which can start a single statement
* and if there are multiple statements
*/
if (startToken.tokenIndex === 1 && stopToken.tokenIndex === allTokens.length - 1) {
if (statementCount > 1) {
/**
* Ensure that the statementContext before the left boundary
* and the last statementContext on the right boundary are qualified SQL statements.
* Find a minimum valid range, reparse the fragment, and provide a new parse tree to C3.
* The boundaries of this range must be statements with no syntax errors.
* This can ensure the stable performance of the C3.
*/
const isPrevCtxValid = index === 0 || !statementsContext[index - 1]?.exception;
const isNextCtxValid =
index === statementCount - 1 || !statementsContext[index + 1]?.exception;

if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
startStatement = ctx;
let startStatement: ParserRuleContext | null = null;
let stopStatement: ParserRuleContext | null = null;

for (let index = 0; index < statementCount; index++) {
const ctx = statementsContext[index];
const isCurrentCtxValid = !ctx.exception;
if (!isCurrentCtxValid) continue;

/**
* Ensure that the statementContext before the left boundary
* and the last statementContext on the right boundary are qualified SQL statements.
*/
const isPrevCtxValid = index === 0 || !statementsContext[index - 1]?.exception;
const isNextCtxValid =
index === statementCount - 1 || !statementsContext[index + 1]?.exception;

if (ctx.stop && ctx.stop.tokenIndex < caretTokenIndex && isPrevCtxValid) {
startStatement = ctx;
}

if (
ctx.start &&
!stopStatement &&
ctx.start.tokenIndex > caretTokenIndex &&
isNextCtxValid
) {
stopStatement = ctx;
break;
}
}

if (
ctx.start &&
!stopStatement &&
ctx.start.tokenIndex > caretTokenIndex &&
isNextCtxValid
) {
stopStatement = ctx;
break;
}
}
// A boundary consisting of the index of the input.
startIndex = startStatement?.start?.start ?? 0;
stopIndex = stopStatement?.stop?.stop ?? input.length - 1;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
caretTokenIndex = caretTokenIndex - tokenIndexOffset;

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree.
*/
const inputSlice = input.slice(startIndex, stopIndex);

const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();

const parser = this.createParserFromTokenStream(tokenStream);
parser.interpreter.predictionMode = PredictionMode.SLL;
parser.removeErrorListeners();
parser.buildParseTrees = true;
parser.errorHandler = new ErrorStrategy();

sqlParserIns = parser;
c3Context = parser.program();
}
} else {
// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
startIndex = startToken?.start ?? 0;
stopIndex = stopToken?.stop + 1 ?? input.length;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
tokenIndexOffset = startToken?.tokenIndex ?? 0;
caretTokenIndex = caretTokenIndex - tokenIndexOffset;

/**
Expand Down
2 changes: 2 additions & 0 deletions src/parser/flink/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
FlinkSqlParser.RULE_columnNameCreate,
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new FlinkSqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/hive/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
HiveSqlParser.RULE_columnNameCreate,
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new HiveSqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/impala/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export class ImpalaSQL extends BasicSQL<ImpalaSqlLexer, ProgramContext, ImpalaSq
ImpalaSqlParser.RULE_columnNamePath,
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new ImpalaSqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/mysql/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export class MySQL extends BasicSQL<MySqlLexer, ProgramContext, MySqlParser> {
MySqlParser.RULE_columnNameCreate,
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new MysqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/postgresql/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ export class PostgreSQL extends BasicSQL<PostgreSqlLexer, ProgramContext, Postgr
PostgreSqlParser.RULE_column_name, // column name
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new PostgreSqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/spark/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
SparkSqlParser.RULE_columnNameCreate,
]);

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new SparkSqlSplitListener();
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser/trino/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ export class TrinoSQL extends BasicSQL<TrinoSqlLexer, ProgramContext, TrinoSqlPa
return new TrinoSqlParser(tokenStream);
}

protected statementStartKeywords: Set<string> = new Set(['SELECT', 'INSERT']);

protected get splitListener() {
return new TrinoSqlSplitListener();
}
Expand Down

0 comments on commit d841e3c

Please sign in to comment.