diff --git a/CHANGELOG.md b/CHANGELOG.md index 8267563..eba3a01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,12 @@ # Changelog -## 0.1.1 - 2026.02.23 +## 0.1.2 - 2026.02.25 +### Fixed +- Prioritize tables with mentioned columns in the suggestions [#6](https://github.com/questdb/sql-parser/pull/6) + +## 0.1.1 - 2026.02.23 ### Fixed - grammar-level table/column classification, join-specific suggestions [#2](https://github.com/questdb/sql-parser/pull/2) diff --git a/package.json b/package.json index 676061f..f3bcea5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@questdb/sql-parser", - "version": "0.1.1", + "version": "0.1.2", "description": "SQL parser for QuestDB syntax using Chevrotain", "type": "module", "main": "dist/index.cjs", diff --git a/src/autocomplete/content-assist.ts b/src/autocomplete/content-assist.ts index a1c9415..cb1a569 100644 --- a/src/autocomplete/content-assist.ts +++ b/src/autocomplete/content-assist.ts @@ -80,6 +80,11 @@ export interface ContentAssistResult { suggestColumns: boolean /** Whether the grammar context expects table names (tableName positions, or expression context) */ suggestTables: boolean + /** + * Bare column names (lowercase) referenced before the cursor in expression + * context. Used by the provider to boost tables containing all these columns. + */ + referencedColumns: Set } // ============================================================================= @@ -791,6 +796,81 @@ function inferTableFromQualifiedRef( return { table } } +/** + * Extract bare column names referenced in expression context from a token list. + * + * Scans the tokens and collects identifier names that are likely column + * references, excluding: + * - Qualified identifiers (followed by a Dot token — table/alias qualifiers) + * - Middle segments of multi-part names (preceded AND followed by a Dot) + * - Known table names and aliases (matched against tableAndAliasSet) + * - Function calls (followed by a left-parenthesis token) + * + * @param tokens - Tokens to scan + * @param tableAndAliasSet - Lowercase table names and aliases already in scope + * (built from tablesInScope by the caller). Identifiers matching any of these + * are excluded because they are table/alias references, not column names. + * + * Returns a Set of lowercase column names for efficient lookup. + */ +export function extractReferencedColumns( + tokens: IToken[], + tableAndAliasSet: Set, +): Set { + const result = new Set() + + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i] + const name = token.tokenType.name + + // Only consider identifier-like tokens + if ( + name !== "Identifier" && + name !== "QuotedIdentifier" && + !IDENTIFIER_KEYWORD_TOKENS.has(name) + ) { + continue + } + + // Exclude: followed by Dot → this is a table/alias qualifier (e.g. "t1" in "t1.col") + if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "Dot") { + continue + } + + // Exclude: preceded by Dot AND followed by Dot → middle segment of a multi-part name. + // But a trailing segment (preceded by Dot, NOT followed by Dot) IS a column name + // (e.g. "ecn" in "c.ecn") and should be included for table ranking. + if ( + i > 0 && + tokens[i - 1].tokenType.name === "Dot" && + i + 1 < tokens.length && + tokens[i + 1].tokenType.name === "Dot" + ) { + continue + } + + // Exclude: followed by "(" → function call + if (i + 1 < tokens.length && tokens[i + 1].tokenType.name === "LParen") { + continue + } + + const image = + name === "QuotedIdentifier" ? token.image.slice(1, -1) : token.image + const lower = image.toLowerCase() + + // Exclude: matches a known table name or alias → this is a table reference, + // not a column name. This replaces the keyword-whitelist approach and is + // grammar-aware: tablesInScope is already built from the parsed AST. + if (tableAndAliasSet.has(lower)) { + continue + } + + result.add(lower) + } + + return result +} + /** * Get content assist suggestions for a SQL string at a given cursor position * @@ -825,6 +905,7 @@ export function getContentAssist( lexErrors: [], suggestColumns: false, suggestTables: false, + referencedColumns: new Set(), } } } @@ -918,6 +999,21 @@ export function getContentAssist( tablesInScope.push(qualifiedRef) } + // Build a set of known table names and aliases so extractReferencedColumns + // can exclude them without a keyword whitelist. + const tableAndAliasSet = new Set() + for (const t of tablesInScope) { + tableAndAliasSet.add(t.table.toLowerCase()) + if (t.alias) tableAndAliasSet.add(t.alias.toLowerCase()) + } + + // Extract bare column references for table ranking (use tokensForAssist so + // a partial mid-word token isn't mistaken for a complete column name). + const referencedColumns = extractReferencedColumns( + tokensForAssist, + tableAndAliasSet, + ) + return { nextTokenTypes, tablesInScope, @@ -928,6 +1024,7 @@ export function getContentAssist( qualifiedTableRef: qualifiedRef?.table, suggestColumns, suggestTables, + referencedColumns, } } diff --git a/src/autocomplete/provider.ts b/src/autocomplete/provider.ts index 7e0e822..19f451f 100644 --- a/src/autocomplete/provider.ts +++ b/src/autocomplete/provider.ts @@ -38,6 +38,71 @@ const TABLE_NAME_TOKENS = new Set([ "View", ]) +/** + * Pre-built index: lowercase table name → Set of lowercase column names. + * Built once at provider creation time so per-request ranking is O(N×M) + * rather than O(N×C). + */ +function buildColumnIndex( + schema: SchemaInfo, +): Map> { + const index = new Map>() + for (const table of schema.tables) { + const key = table.name.toLowerCase() + const cols = schema.columns[key] + if (cols) { + index.set(key, new Set(cols.map((c) => c.name.toLowerCase()))) + } + } + return index +} + +/** + * Boost the priority of table suggestions based on how many of the referenced + * columns they contain: + * + * - ALL referenced columns present → SuggestionPriority.High (full match) + * - SOME referenced columns present → SuggestionPriority.Medium (partial match) + * - No referenced columns → priority unchanged (no match) + * + * Graceful fallback: if no table has any referenced column at all, nothing is + * changed so the caller still sees all tables at their default priority. + * + * @param suggestions - The suggestion array (mutated in place) + * @param referencedColumns - Lowercase column names found in expression context + * @param columnIndex - Pre-built map of table → column name set + */ +function rankTableSuggestions( + suggestions: Suggestion[], + referencedColumns: Set, + columnIndex: Map>, +): void { + if (referencedColumns.size === 0) return + + // Score each table: how many referenced columns does it contain? + const scores = new Map() + for (const [tableName, colNames] of columnIndex) { + let count = 0 + for (const ref of referencedColumns) { + if (colNames.has(ref)) count++ + } + if (count > 0) scores.set(tableName, count) + } + + // Graceful fallback: no table has any of the referenced columns + if (scores.size === 0) return + + for (const s of suggestions) { + if (s.kind !== SuggestionKind.Table) continue + const score = scores.get(s.label.toLowerCase()) + if (score === undefined) continue + s.priority = + score === referencedColumns.size + ? SuggestionPriority.High // full match + : SuggestionPriority.Medium // partial match + } +} + function getLastSignificantTokens(tokens: IToken[]): string[] { const result: string[] = [] for (let i = tokens.length - 1; i >= 0; i--) { @@ -84,6 +149,9 @@ export function createAutocompleteProvider( ), } + // Pre-build column index once so per-request ranking is fast + const columnIndex = buildColumnIndex(normalizedSchema) + return { getSuggestions(query: string, cursorOffset: number): Suggestion[] { // Get content assist from parser @@ -96,6 +164,7 @@ export function createAutocompleteProvider( qualifiedTableRef, suggestColumns, suggestTables, + referencedColumns, } = getContentAssist(query, cursorOffset) // Merge CTE columns into the schema so getColumnsInScope() can find them @@ -146,7 +215,7 @@ export function createAutocompleteProvider( // If parser returned valid next tokens, use grammar-based classification if (nextTokenTypes.length > 0) { - return buildSuggestions( + const suggestions = buildSuggestions( nextTokenTypes, effectiveSchema, effectiveTablesInScope, @@ -156,6 +225,10 @@ export function createAutocompleteProvider( isMidWord, }, ) + if (suggestTables) { + rankTableSuggestions(suggestions, referencedColumns, columnIndex) + } + return suggestions } // Fallback: when Chevrotain returns no suggestions (malformed SQL like @@ -194,6 +267,7 @@ export function createAutocompleteProvider( }) } } + rankTableSuggestions(suggestions, referencedColumns, columnIndex) return suggestions } diff --git a/tests/autocomplete.test.ts b/tests/autocomplete.test.ts index 58c4fe6..4b35e12 100644 --- a/tests/autocomplete.test.ts +++ b/tests/autocomplete.test.ts @@ -2902,4 +2902,127 @@ describe("CTE autocomplete", () => { expect(columns.map((s) => s.label)).toContain("symbol") }) }) + + // =========================================================================== + // Column-based table ranking + // =========================================================================== + describe("column-based table ranking", () => { + it("boosts tables that contain all referenced columns", () => { + // "symbol" and "price" both exist in trades but not in orders or users + const sql = "SELECT symbol, price FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const trades = tables.find((s) => s.label === "trades") + const orders = tables.find((s) => s.label === "orders") + const users = tables.find((s) => s.label === "users") + expect(trades?.priority).toBe(SuggestionPriority.High) + expect(orders?.priority).toBe(SuggestionPriority.MediumLow) + expect(users?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("partially matching tables get Medium priority; no-match tables stay MediumLow", () => { + // "symbol" is in trades; "id" is in orders — each table has one of the two + const sql = "SELECT symbol, id FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const trades = tables.find((s) => s.label === "trades") + const orders = tables.find((s) => s.label === "orders") + const users = tables.find((s) => s.label === "users") + // partial match → Medium (boosted but not full match) + expect(trades?.priority).toBe(SuggestionPriority.Medium) + expect(orders?.priority).toBe(SuggestionPriority.Medium) + // no match → default + expect(users?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("columns from two tables: both partially-matching tables get Medium", () => { + // "symbol" and "price" only in trades; "status" only in orders; "name" only in users + // → trades and orders both partially match (2 and 1 out of 3); users has none + const sql = "SELECT symbol, price, status FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const trades = tables.find((s) => s.label === "trades") + const orders = tables.find((s) => s.label === "orders") + const users = tables.find((s) => s.label === "users") + expect(trades?.priority).toBe(SuggestionPriority.Medium) + expect(orders?.priority).toBe(SuggestionPriority.Medium) + expect(users?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("graceful fallback: no boost when no table has any referenced column", () => { + // "nonexistent_col" doesn't exist in any table + const sql = "SELECT nonexistent_col FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + for (const t of tables) { + expect(t.priority).toBe(SuggestionPriority.MediumLow) + } + }) + + it("qualified references: the alias/qualifier is excluded but the column name is used", () => { + // "t1.symbol" → "symbol" is extracted; "t1" (alias qualifier) is not + const sql = "SELECT t1.symbol FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const trades = tables.find((s) => s.label === "trades") + const orders = tables.find((s) => s.label === "orders") + // trades has "symbol" → boosted; orders does not + expect(trades?.priority).toBe(SuggestionPriority.High) + expect(orders?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("qualified references from multiple aliases boost the correct tables", () => { + // c.symbol → symbol in trades; o.id → id in orders + const sql = "SELECT c.symbol, o.id FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const trades = tables.find((s) => s.label === "trades") + const orders = tables.find((s) => s.label === "orders") + const users = tables.find((s) => s.label === "users") + expect(trades?.priority).toBe(SuggestionPriority.Medium) // partial: symbol but not id + expect(orders?.priority).toBe(SuggestionPriority.Medium) // partial: id but not symbol + expect(users?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("function calls are excluded from column inference", () => { + // "count()" is a function call — should not influence ranking + const sql = "SELECT count() FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + for (const t of tables) { + expect(t.priority).toBe(SuggestionPriority.MediumLow) + } + }) + + it("all tables remain in the suggestion list even when some are boosted", () => { + const sql = "SELECT symbol, price FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tableLabels = suggestions + .filter((s) => s.kind === SuggestionKind.Table) + .map((s) => s.label) + expect(tableLabels).toContain("trades") + expect(tableLabels).toContain("orders") + expect(tableLabels).toContain("users") + }) + + it("boosts a single-column match correctly", () => { + // "status" only exists in orders + const sql = "SELECT status FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + const orders = tables.find((s) => s.label === "orders") + const trades = tables.find((s) => s.label === "trades") + expect(orders?.priority).toBe(SuggestionPriority.High) + expect(trades?.priority).toBe(SuggestionPriority.MediumLow) + }) + + it("SELECT * FROM does not boost any table (no referenced columns)", () => { + const sql = "SELECT * FROM " + const suggestions = provider.getSuggestions(sql, sql.length) + const tables = suggestions.filter((s) => s.kind === SuggestionKind.Table) + for (const t of tables) { + expect(t.priority).toBe(SuggestionPriority.MediumLow) + } + }) + }) })