Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Changelog


## 0.1.1 - 2026.02.23
## 0.1.2 - 2026.02.25
### Fixed
- Prioritize tables with mentioned columns in the suggestions [#6](https://github.com/questdb/sql-parser/pull/6)


## 0.1.1 - 2026.02.23
### Fixed
- grammar-level table/column classification, join-specific suggestions [#2](https://github.com/questdb/sql-parser/pull/2)

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@questdb/sql-parser",
"version": "0.1.1",
"version": "0.1.2",
"description": "SQL parser for QuestDB syntax using Chevrotain",
"type": "module",
"main": "dist/index.cjs",
Expand Down
97 changes: 97 additions & 0 deletions src/autocomplete/content-assist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ export interface ContentAssistResult {
suggestColumns: boolean
/** Whether the grammar context expects table names (tableName positions, or expression context) */
suggestTables: boolean
/**
* Bare column names (lowercase) referenced before the cursor in expression
* context. Used by the provider to boost tables containing all these columns.
*/
referencedColumns: Set<string>
}

// =============================================================================
Expand Down Expand Up @@ -791,6 +796,81 @@ function inferTableFromQualifiedRef(
return { table }
}

/**
* Extract bare column names referenced in expression context from a token list.
*
* Scans the tokens and collects identifier names that are likely column
* references, excluding:
* - Qualified identifiers (followed by a Dot token — table/alias qualifiers)
* - Middle segments of multi-part names (preceded AND followed by a Dot)
* - Known table names and aliases (matched against tableAndAliasSet)
* - Function calls (followed by a left-parenthesis token)
*
* @param tokens - Tokens to scan
* @param tableAndAliasSet - Lowercase table names and aliases already in scope
* (built from tablesInScope by the caller). Identifiers matching any of these
* are excluded because they are table/alias references, not column names.
*
* Returns a Set of lowercase column names for efficient lookup.
*/
export function extractReferencedColumns(
tokens: IToken[],
tableAndAliasSet: Set<string>,
): Set<string> {
const result = new Set<string>()

for (let i = 0; i < tokens.length; i++) {
const token = tokens[i]
const name = token.tokenType.name

// Only consider identifier-like tokens
if (
name !== "Identifier" &&
name !== "QuotedIdentifier" &&
!IDENTIFIER_KEYWORD_TOKENS.has(name)
) {
continue
}

// Exclude: followed by Dot → this is a table/alias qualifier (e.g. "t1" in "t1.col")
if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "Dot") {
continue
}

// Exclude: preceded by Dot AND followed by Dot → middle segment of a multi-part name.
// But a trailing segment (preceded by Dot, NOT followed by Dot) IS a column name
// (e.g. "ecn" in "c.ecn") and should be included for table ranking.
if (
i > 0 &&
tokens[i - 1].tokenType.name === "Dot" &&
i + 1 < tokens.length &&
tokens[i + 1].tokenType.name === "Dot"
) {
continue
}

// Exclude: followed by "(" → function call
if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "LParen") {
continue
}

const image =
name === "QuotedIdentifier" ? token.image.slice(1, -1) : token.image
const lower = image.toLowerCase()

// Exclude: matches a known table name or alias → this is a table reference,
// not a column name. This replaces the keyword-whitelist approach and is
// grammar-aware: tablesInScope is already built from the parsed AST.
if (tableAndAliasSet.has(lower)) {
continue
}

result.add(lower)
}

return result
}

/**
* Get content assist suggestions for a SQL string at a given cursor position
*
Expand Down Expand Up @@ -825,6 +905,7 @@ export function getContentAssist(
lexErrors: [],
suggestColumns: false,
suggestTables: false,
referencedColumns: new Set(),
}
}
}
Expand Down Expand Up @@ -918,6 +999,21 @@ export function getContentAssist(
tablesInScope.push(qualifiedRef)
}

// Build a set of known table names and aliases so extractReferencedColumns
// can exclude them without a keyword whitelist.
const tableAndAliasSet = new Set<string>()
for (const t of tablesInScope) {
tableAndAliasSet.add(t.table.toLowerCase())
if (t.alias) tableAndAliasSet.add(t.alias.toLowerCase())
}

// Extract bare column references for table ranking (use tokensForAssist so
// a partial mid-word token isn't mistaken for a complete column name).
const referencedColumns = extractReferencedColumns(
tokensForAssist,
tableAndAliasSet,
)

return {
nextTokenTypes,
tablesInScope,
Expand All @@ -928,6 +1024,7 @@ export function getContentAssist(
qualifiedTableRef: qualifiedRef?.table,
suggestColumns,
suggestTables,
referencedColumns,
}
}

Expand Down
76 changes: 75 additions & 1 deletion src/autocomplete/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,71 @@
"View",
])

/**
* Pre-built index: lowercase table name → Set of lowercase column names.
* Built once at provider creation time so per-request ranking is O(N×M)
* rather than O(N×C).
*/
function buildColumnIndex(

Check warning on line 46 in src/autocomplete/provider.ts

View workflow job for this annotation

GitHub Actions / build-and-test

Replace `⏎··schema:·SchemaInfo,⏎` with `schema:·SchemaInfo`
schema: SchemaInfo,
): Map<string, Set<string>> {
const index = new Map<string, Set<string>>()
for (const table of schema.tables) {
const key = table.name.toLowerCase()
const cols = schema.columns[key]
if (cols) {
index.set(key, new Set(cols.map((c) => c.name.toLowerCase())))
}
}
return index
}

/**
* Boost the priority of table suggestions based on how many of the referenced
* columns they contain:
*
* - ALL referenced columns present → SuggestionPriority.High (full match)
* - SOME referenced columns present → SuggestionPriority.Medium (partial match)
* - No referenced columns → priority unchanged (no match)
*
* Graceful fallback: if no table has any referenced column at all, nothing is
* changed so the caller still sees all tables at their default priority.
*
* @param suggestions - The suggestion array (mutated in place)
* @param referencedColumns - Lowercase column names found in expression context
* @param columnIndex - Pre-built map of table → column name set
*/
function rankTableSuggestions(
suggestions: Suggestion[],
referencedColumns: Set<string>,
columnIndex: Map<string, Set<string>>,
): void {
if (referencedColumns.size === 0) return

// Score each table: how many referenced columns does it contain?
const scores = new Map<string, number>()
for (const [tableName, colNames] of columnIndex) {
let count = 0
for (const ref of referencedColumns) {
if (colNames.has(ref)) count++
}
if (count > 0) scores.set(tableName, count)
}

// Graceful fallback: no table has any of the referenced columns
if (scores.size === 0) return

for (const s of suggestions) {
if (s.kind !== SuggestionKind.Table) continue
const score = scores.get(s.label.toLowerCase())
if (score === undefined) continue
s.priority =
score === referencedColumns.size
? SuggestionPriority.High // full match

Check warning on line 101 in src/autocomplete/provider.ts

View workflow job for this annotation

GitHub Actions / build-and-test

Delete `···`
: SuggestionPriority.Medium // partial match

Check warning on line 102 in src/autocomplete/provider.ts

View workflow job for this annotation

GitHub Actions / build-and-test

Delete `·`
}
}

function getLastSignificantTokens(tokens: IToken[]): string[] {
const result: string[] = []
for (let i = tokens.length - 1; i >= 0; i--) {
Expand Down Expand Up @@ -84,6 +149,9 @@
),
}

// Pre-build column index once so per-request ranking is fast
const columnIndex = buildColumnIndex(normalizedSchema)

return {
getSuggestions(query: string, cursorOffset: number): Suggestion[] {
// Get content assist from parser
Expand All @@ -96,6 +164,7 @@
qualifiedTableRef,
suggestColumns,
suggestTables,
referencedColumns,
} = getContentAssist(query, cursorOffset)

// Merge CTE columns into the schema so getColumnsInScope() can find them
Expand Down Expand Up @@ -146,7 +215,7 @@

// If parser returned valid next tokens, use grammar-based classification
if (nextTokenTypes.length > 0) {
return buildSuggestions(
const suggestions = buildSuggestions(
nextTokenTypes,
effectiveSchema,
effectiveTablesInScope,
Expand All @@ -156,6 +225,10 @@
isMidWord,
},
)
if (suggestTables) {
rankTableSuggestions(suggestions, referencedColumns, columnIndex)
}
return suggestions
}

// Fallback: when Chevrotain returns no suggestions (malformed SQL like
Expand Down Expand Up @@ -194,6 +267,7 @@
})
}
}
rankTableSuggestions(suggestions, referencedColumns, columnIndex)
return suggestions
}

Expand Down
123 changes: 123 additions & 0 deletions tests/autocomplete.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2902,4 +2902,127 @@ describe("CTE autocomplete", () => {
expect(columns.map((s) => s.label)).toContain("symbol")
})
})

// ===========================================================================
// Column-based table ranking
// ===========================================================================
describe("column-based table ranking", () => {
it("boosts tables that contain all referenced columns", () => {
// "symbol" and "price" both exist in trades but not in orders or users
const sql = "SELECT symbol, price FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const trades = tables.find((s) => s.label === "trades")
const orders = tables.find((s) => s.label === "orders")
const users = tables.find((s) => s.label === "users")
expect(trades?.priority).toBe(SuggestionPriority.High)
expect(orders?.priority).toBe(SuggestionPriority.MediumLow)
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
})

it("partially matching tables get Medium priority; no-match tables stay MediumLow", () => {
// "symbol" is in trades; "id" is in orders — each table has one of the two
const sql = "SELECT symbol, id FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const trades = tables.find((s) => s.label === "trades")
const orders = tables.find((s) => s.label === "orders")
const users = tables.find((s) => s.label === "users")
// partial match → Medium (boosted but not full match)
expect(trades?.priority).toBe(SuggestionPriority.Medium)
expect(orders?.priority).toBe(SuggestionPriority.Medium)
// no match → default
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
})

it("columns from two tables: both partially-matching tables get Medium", () => {
// "symbol" and "price" only in trades; "status" only in orders; "name" only in users
// → trades and orders both partially match (2 and 1 out of 3); users has none
const sql = "SELECT symbol, price, status FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const trades = tables.find((s) => s.label === "trades")
const orders = tables.find((s) => s.label === "orders")
const users = tables.find((s) => s.label === "users")
expect(trades?.priority).toBe(SuggestionPriority.Medium)
expect(orders?.priority).toBe(SuggestionPriority.Medium)
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
})

it("graceful fallback: no boost when no table has any referenced column", () => {
// "nonexistent_col" doesn't exist in any table
const sql = "SELECT nonexistent_col FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
for (const t of tables) {
expect(t.priority).toBe(SuggestionPriority.MediumLow)
}
})

it("qualified references: the alias/qualifier is excluded but the column name is used", () => {
// "t1.symbol" → "symbol" is extracted; "t1" (alias qualifier) is not
const sql = "SELECT t1.symbol FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const trades = tables.find((s) => s.label === "trades")
const orders = tables.find((s) => s.label === "orders")
// trades has "symbol" → boosted; orders does not
expect(trades?.priority).toBe(SuggestionPriority.High)
expect(orders?.priority).toBe(SuggestionPriority.MediumLow)
})

it("qualified references from multiple aliases boost the correct tables", () => {
// c.symbol → symbol in trades; o.id → id in orders
const sql = "SELECT c.symbol, o.id FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const trades = tables.find((s) => s.label === "trades")
const orders = tables.find((s) => s.label === "orders")
const users = tables.find((s) => s.label === "users")
expect(trades?.priority).toBe(SuggestionPriority.Medium) // partial: symbol but not id
expect(orders?.priority).toBe(SuggestionPriority.Medium) // partial: id but not symbol
expect(users?.priority).toBe(SuggestionPriority.MediumLow)
})

it("function calls are excluded from column inference", () => {
// "count()" is a function call — should not influence ranking
const sql = "SELECT count() FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
for (const t of tables) {
expect(t.priority).toBe(SuggestionPriority.MediumLow)
}
})

it("all tables remain in the suggestion list even when some are boosted", () => {
const sql = "SELECT symbol, price FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tableLabels = suggestions
.filter((s) => s.kind === SuggestionKind.Table)
.map((s) => s.label)
expect(tableLabels).toContain("trades")
expect(tableLabels).toContain("orders")
expect(tableLabels).toContain("users")
})

it("boosts a single-column match correctly", () => {
// "status" only exists in orders
const sql = "SELECT status FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
const orders = tables.find((s) => s.label === "orders")
const trades = tables.find((s) => s.label === "trades")
expect(orders?.priority).toBe(SuggestionPriority.High)
expect(trades?.priority).toBe(SuggestionPriority.MediumLow)
})

it("SELECT * FROM does not boost any table (no referenced columns)", () => {
const sql = "SELECT * FROM "
const suggestions = provider.getSuggestions(sql, sql.length)
const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table)
for (const t of tables) {
expect(t.priority).toBe(SuggestionPriority.MediumLow)
}
})
})
})