Skip to content

Commit

Permalink
feat: add splitInputBySeparator
Browse files Browse the repository at this point in the history
  • Loading branch information
liuxy0551 committed Oct 15, 2024
1 parent 18e55e4 commit 9ce9722
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 68 deletions.
127 changes: 108 additions & 19 deletions src/parser/common/basicSQL.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import type { SplitListener } from './splitListener';
import type { EntityCollector } from './entityCollector';
import { EntityContext } from './entityCollector';

const SEPARATOR: string = ';';

/**
* Basic SQL class, every sql needs extends it.
*/
Expand Down Expand Up @@ -65,13 +67,11 @@ export abstract class BasicSQL<
* @param candidates candidate list
* @param allTokens all tokens from input
* @param caretTokenIndex tokenIndex of caretPosition
* @param tokenIndexOffset offset of the tokenIndex in the candidates compared to the tokenIndex in allTokens
*/
protected abstract processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token>;

/**
Expand Down Expand Up @@ -251,6 +251,78 @@ export abstract class BasicSQL<
return res;
}

/**
* Get the smaller range of input
* @param input string
* @param allTokens all tokens from input
* @param tokenIndexOffset offset of the tokenIndex in the range of input
* @param caretTokenIndex tokenIndex of caretPosition
* @returns inputSlice: string, caretTokenIndex: number
*/
private splitInputBySeparator(
input: string,
allTokens: Token[],
tokenIndexOffset: number,
caretTokenIndex: number
): { inputSlice: string; allTokens: Token[]; caretTokenIndex: number } {
const _allTokens = allTokens.slice(tokenIndexOffset);
/**
* Set startToken
*/
let startToken: Token | null = null;
for (let tokenIndex = caretTokenIndex - tokenIndexOffset; tokenIndex >= 0; tokenIndex--) {
const token = _allTokens[tokenIndex];
if (token?.text === SEPARATOR) {
startToken = _allTokens[tokenIndex + 1];
break;
}
}
if (startToken === null) {
startToken = _allTokens[0];
}

/**
* Set stopToken
*/
let stopToken: Token | null = null;
for (
let tokenIndex = caretTokenIndex - tokenIndexOffset;
tokenIndex < _allTokens.length;
tokenIndex++
) {
const token = _allTokens[tokenIndex];
if (token?.text === SEPARATOR) {
stopToken = token;
break;
}
}
if (stopToken === null) {
stopToken = _allTokens[_allTokens.length - 1];
}

const indexOffset = _allTokens[0].start;
let startIndex = startToken.start - indexOffset;
let stopIndex = stopToken.stop + 1 - indexOffset;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
const _tokenIndexOffset = startToken.tokenIndex;
const _caretTokenIndex = caretTokenIndex - _tokenIndexOffset;

/**
* Get the smaller range of _input
*/
const _input = input.slice(startIndex, stopIndex);

return {
inputSlice: _input,
allTokens: allTokens.slice(_tokenIndexOffset),
caretTokenIndex: _caretTokenIndex,
};
}

/**
* Get suggestions of syntax and token at caretPosition
* @param input source string
Expand All @@ -262,12 +334,13 @@ export abstract class BasicSQL<
caretPosition: CaretPosition
): Suggestions | null {
const splitListener = this.splitListener;
let inputSlice = input;

this.parseWithCache(input);
this.parseWithCache(inputSlice);
if (!this._parseTree) return null;

let sqlParserIns = this._parser;
const allTokens = this.getAllTokens(input);
let allTokens = this.getAllTokens(inputSlice);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
let c3Context: ParserRuleContext = this._parseTree;
let tokenIndexOffset: number = 0;
Expand Down Expand Up @@ -321,22 +394,43 @@ export abstract class BasicSQL<
}

// A boundary consisting of the index of the input.
const startIndex = startStatement?.start?.start ?? 0;
const stopIndex = stopStatement?.stop?.stop ?? input.length - 1;
let startIndex = startStatement?.start?.start ?? 0;
let stopIndex = stopStatement?.stop?.stop ?? inputSlice.length - 1;

/**
* Save offset of the tokenIndex in the range of input
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = startStatement?.start?.tokenIndex ?? 0;
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
inputSlice = inputSlice.slice(startIndex, stopIndex);
}

/**
* Reparse the input fragment,
* and c3 will collect candidates in the newly generated parseTree.
*/
const inputSlice = input.slice(startIndex, stopIndex);
/**
* Split the inputSlice by separator to get the smaller range of inputSlice.
*/
if (inputSlice.includes(SEPARATOR)) {
const {
inputSlice: _input,
allTokens: _allTokens,
caretTokenIndex: _caretTokenIndex,
} = this.splitInputBySeparator(
inputSlice,
allTokens,
tokenIndexOffset,
caretTokenIndex
);

allTokens = _allTokens;
caretTokenIndex = _caretTokenIndex;
inputSlice = _input;
} else {
caretTokenIndex = caretTokenIndex - tokenIndexOffset;
}

/**
* Reparse the input fragment, and c3 will collect candidates in the newly generated parseTree when input changed.
*/
if (inputSlice !== input) {
const lexer = this.createLexer(inputSlice);
lexer.removeErrorListeners();
const tokenStream = new CommonTokenStream(lexer);
Expand All @@ -356,12 +450,7 @@ export abstract class BasicSQL<
core.preferredRules = this.preferredRules;

const candidates = core.collectCandidates(caretTokenIndex, c3Context);
const originalSuggestions = this.processCandidates(
candidates,
allTokens,
caretTokenIndex,
tokenIndexOffset
);
const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex);

const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax.map(
(syntaxCtx) => {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/flink/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,15 @@ export class FlinkSQL extends BasicSQL<FlinkSqlLexer, ProgramContext, FlinkSqlPa
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/hive/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,14 @@ export class HiveSQL extends BasicSQL<HiveSqlLexer, ProgramContext, HiveSqlParse
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];
for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/impala/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,14 @@ export class ImpalaSQL extends BasicSQL<ImpalaSqlLexer, ProgramContext, ImpalaSq
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];
for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/mysql/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,15 @@ export class MySQL extends BasicSQL<MySqlLexer, ProgramContext, MySqlParser> {
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (const candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/postgresql/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,14 @@ export class PostgreSQL extends BasicSQL<PostgreSqlLexer, ProgramContext, Postgr
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];
for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/spark/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,15 @@ export class SparkSQL extends BasicSQL<SparkSqlLexer, ProgramContext, SparkSqlPa
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (const candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down
10 changes: 3 additions & 7 deletions src/parser/trino/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,15 @@ export class TrinoSQL extends BasicSQL<TrinoSqlLexer, ProgramContext, TrinoSqlPa
protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number
caretTokenIndex: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(
startTokenIndex,
caretTokenIndex + tokenIndexOffset + 1
);
const startTokenIndex = candidateRule.startTokenIndex;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: EntityContextType | StmtContextType | undefined = void 0;
switch (ruleType) {
Expand Down

0 comments on commit 9ce9722

Please sign in to comment.