Skip to content

Commit 5c236b9

Browse files
committed
Time bucketing using auto bins working
1 parent 7075454 commit 5c236b9

File tree

7 files changed

+91
-16
lines changed

7 files changed

+91
-16
lines changed

apps/webapp/app/components/code/tsql/tsqlCompletion.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,16 @@ function createFunctionCompletions(): Completion[] {
123123
});
124124
}
125125

126+
// Add special TSQL functions not in the ClickHouse function registry
127+
functions.push({
128+
label: "timeBucket",
129+
type: "function",
130+
detail: "auto time bucket (0 args)",
131+
apply: "timeBucket()",
132+
boost: 1.5,
133+
info: "Automatically bucket by time using the table's time column. Interval is chosen based on the query's time range.",
134+
});
135+
126136
return functions;
127137
}
128138

apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/ExamplesContent.tsx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,19 @@ ORDER BY p50_duration_ms DESC
3838
LIMIT 20`,
3939
scope: "environment",
4040
},
41+
{
42+
title: "Runs over time",
43+
description:
44+
"Count of runs bucketed over time. The bucket size adjusts automatically to the time range.",
45+
query: `SELECT
46+
timeBucket(),
47+
count() AS run_count
48+
FROM runs
49+
GROUP BY timeBucket
50+
ORDER BY timeBucket
51+
LIMIT 1000`,
52+
scope: "environment",
53+
},
4154
{
4255
title: "Most expensive 100 runs (past 7d)",
4356
description: "Top 100 runs by cost over the last 7 days.",

apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/TRQLGuideContent.tsx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,11 @@ ORDER BY run_count DESC`,
488488
<FunctionCategory
489489
title="Date/time functions"
490490
functions={[
491+
{
492+
name: "timeBucket()",
493+
desc: "Auto-bucket by time period. Uses the table's time column with an interval based on the query's time range.",
494+
example: "SELECT timeBucket(), count() FROM runs GROUP BY timeBucket",
495+
},
491496
{ name: "now()", desc: "Current date and time", example: "now()" },
492497
{ name: "today()", desc: "Current date", example: "today()" },
493498
{ name: "yesterday()", desc: "Yesterday's date", example: "yesterday()" },

apps/webapp/app/services/queryService.server.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import {
1818
GLOBAL_CONCURRENCY_LIMIT,
1919
} from "./queryConcurrencyLimiter.server";
2020
import { getLimit } from "./platform.v3.server";
21-
import { timeFilters } from "~/components/runs/v3/SharedFilters";
21+
import { timeFilters, timeFilterFromTo } from "~/components/runs/v3/SharedFilters";
2222
import parse from "parse-duration";
2323
import { querySchemas } from "~/v3/querySchemas";
2424

@@ -205,6 +205,14 @@ export async function executeQuery<TOut extends z.ZodSchema>(
205205
queue: queues && queues.length > 0 ? { op: "in", values: queues } : undefined,
206206
} satisfies Record<string, WhereClauseCondition | undefined>;
207207

208+
// Compute the effective time range for timeBucket() interval calculation
209+
const timeRange = timeFilterFromTo({
210+
period: period ?? undefined,
211+
from: from ?? undefined,
212+
to: to ?? undefined,
213+
defaultPeriod,
214+
});
215+
208216
try {
209217
// Build field mappings for project_ref → project_id and environment_id → slug translation
210218
const projects = await prisma.project.findMany({
@@ -232,6 +240,7 @@ export async function executeQuery<TOut extends z.ZodSchema>(
232240
whereClauseFallback: {
233241
triggered_at: triggeredAtFallback,
234242
},
243+
timeRange,
235244
clickhouseSettings: {
236245
...getDefaultClickhouseSettings(),
237246
...baseOptions.clickhouseSettings, // Allow caller overrides if needed

apps/webapp/app/v3/querySchemas.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export const runsSchema: TableSchema = {
2828
name: "runs",
2929
clickhouseName: "trigger_dev.task_runs_v2",
3030
description: "Task runs - stores all task execution records",
31+
timeConstraint: "triggered_at",
3132
tenantColumns: {
3233
organizationId: "organization_id",
3334
projectId: "project_id",

apps/webapp/app/v3/services/aiQueryService.server.ts

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -414,13 +414,28 @@ HAVING cnt > 10
414414
\`\`\`
415415
416416
### Date/Time Functions
417+
- timeBucket() - automatically bucket by time. Uses the table's time column and picks the best interval based on the query's time range. Use in SELECT and reference as \`timeBucket\` in GROUP BY / ORDER BY.
417418
- now() - current timestamp
418419
- today() - current date
419420
- toDate(datetime) - extract date
420421
- toStartOfDay/Hour/Minute(datetime)
421422
- dateDiff('unit', start, end) - difference in units (second, minute, hour, day, week, month, year)
422423
- INTERVAL n unit - time interval (e.g., INTERVAL 7 DAY)
423424
425+
### Time Bucketing
426+
When the user wants to see data "over time", "by hour", "by day", or any time-series aggregation, prefer \`timeBucket()\` over manual \`toStartOfHour\`/\`toStartOfDay\` calls. \`timeBucket()\` automatically picks the right interval for the current time range.
427+
428+
\`\`\`sql
429+
-- Runs over time (bucket size auto-selected)
430+
SELECT timeBucket(), count() AS run_count
431+
FROM runs
432+
GROUP BY timeBucket
433+
ORDER BY timeBucket
434+
LIMIT 1000
435+
\`\`\`
436+
437+
Only use explicit \`toStartOfHour\`/\`toStartOfDay\` etc. if the user specifically requests a particular bucket size (e.g., "group by hour", "bucket by day").
438+
424439
### Common Patterns
425440
- Status filter: WHERE status = 'Failed' or WHERE status IN ('Failed', 'Crashed')
426441
- Time filtering: Use the \`setTimeFilter\` tool (NOT triggered_at in WHERE clause)
@@ -432,13 +447,14 @@ HAVING cnt > 10
432447
3. When column selection is ambiguous, use the core columns marked [CORE] in the schema
433448
4. **TIME FILTERING**: When the user wants to filter by time (e.g., "last 7 days", "past hour", "yesterday"), ALWAYS use the \`setTimeFilter\` tool instead of adding \`triggered_at\` conditions to the query. The UI has a time filter that will apply this automatically.
434449
5. Do NOT add \`triggered_at\` to WHERE clauses - use \`setTimeFilter\` tool instead. If the user doesn't specify a time period, do NOT add any time filter (the UI defaults to 7 days).
435-
6. ALWAYS use the validateTSQLQuery tool to check your query before returning it
436-
7. If validation fails, fix the issues and try again (up to 3 attempts)
437-
8. Use column names exactly as defined in the schema (case-sensitive)
438-
9. For enum columns like status, use the allowed values shown in the schema
439-
10. Always include a LIMIT clause (default to 100 if not specified)
440-
11. Use meaningful column aliases with AS for aggregations
441-
12. Format queries with proper indentation for readability
450+
6. **TIME BUCKETING**: When the user wants to see data over time or in time buckets, use \`timeBucket()\` in SELECT and reference it as \`timeBucket\` in GROUP BY / ORDER BY. Only use manual bucketing functions (toStartOfHour, toStartOfDay, etc.) when the user explicitly requests a specific bucket size.
451+
7. ALWAYS use the validateTSQLQuery tool to check your query before returning it
452+
8. If validation fails, fix the issues and try again (up to 3 attempts)
453+
9. Use column names exactly as defined in the schema (case-sensitive)
454+
10. For enum columns like status, use the allowed values shown in the schema
455+
11. Always include a LIMIT clause (default to 100 if not specified)
456+
12. Use meaningful column aliases with AS for aggregations
457+
13. Format queries with proper indentation for readability
442458
443459
## Response Format
444460
@@ -504,25 +520,38 @@ HAVING cnt > 10
504520
\`\`\`
505521
506522
### Date/Time Functions
523+
- timeBucket() - automatically bucket by time. Uses the table's time column and picks the best interval based on the query's time range. Use in SELECT and reference as \`timeBucket\` in GROUP BY / ORDER BY.
507524
- now() - current timestamp
508525
- today() - current date
509526
- toDate(datetime) - extract date
510527
- toStartOfDay/Hour/Minute(datetime)
511528
- dateDiff('unit', start, end) - difference in units (second, minute, hour, day, week, month, year)
512529
- INTERVAL n unit - time interval (e.g., INTERVAL 7 DAY)
513530
531+
### Time Bucketing
532+
When the user wants to see data "over time", "by hour", "by day", or any time-series aggregation, prefer \`timeBucket()\` over manual \`toStartOfHour\`/\`toStartOfDay\` calls unless the user specifically requests a particular bucket size.
533+
534+
\`\`\`sql
535+
SELECT timeBucket(), count() AS run_count
536+
FROM runs
537+
GROUP BY timeBucket
538+
ORDER BY timeBucket
539+
LIMIT 1000
540+
\`\`\`
541+
514542
## Important Rules
515543
516544
1. NEVER use SELECT * - ClickHouse is a columnar database where SELECT * has very poor performance
517545
2. If the existing query uses SELECT *, replace it with specific columns (use core columns marked [CORE] as defaults)
518546
3. **TIME FILTERING**: When the user wants to change time filtering (e.g., "change to last 30 days"), use the \`setTimeFilter\` tool instead of modifying \`triggered_at\` conditions. If the existing query has \`triggered_at\` in WHERE, consider removing it and using \`setTimeFilter\` instead.
519-
4. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it
520-
5. If validation fails, fix the issues and try again (up to 3 attempts)
521-
6. Use column names exactly as defined in the schema (case-sensitive)
522-
7. For enum columns like status, use the allowed values shown in the schema
523-
8. Always include a LIMIT clause (default to 100 if not specified)
524-
9. Preserve the user's existing query structure and style where possible
525-
10. Only make the changes specifically requested by the user
547+
4. **TIME BUCKETING**: When adding time-series grouping, use \`timeBucket()\` in SELECT and reference it as \`timeBucket\` in GROUP BY / ORDER BY. Only use manual bucketing functions (toStartOfHour, toStartOfDay, etc.) when the user explicitly requests a specific bucket size.
548+
5. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it
549+
6. If validation fails, fix the issues and try again (up to 3 attempts)
550+
7. Use column names exactly as defined in the schema (case-sensitive)
551+
8. For enum columns like status, use the allowed values shown in the schema
552+
9. Always include a LIMIT clause (default to 100 if not specified)
553+
10. Preserve the user's existing query structure and style where possible
554+
11. Only make the changes specifically requested by the user
526555
527556
## Response Format
528557

internal-packages/clickhouse/src/client/tsql.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
type TableSchema,
1515
type QuerySettings,
1616
type FieldMappings,
17+
type TimeRange,
1718
type WhereClauseCondition
1819
} from "@internal/tsql";
1920
import type { ClickhouseReader, QueryStats } from "./types.js";
@@ -25,7 +26,7 @@ const logger = new Logger("tsql", "info");
2526

2627
export type { QueryStats };
2728

28-
export type { TableSchema, QuerySettings, FieldMappings, WhereClauseCondition };
29+
export type { TableSchema, QuerySettings, FieldMappings, TimeRange, WhereClauseCondition };
2930

3031
/**
3132
* Options for executing a TSQL query
@@ -101,6 +102,12 @@ export interface ExecuteTSQLOptions<TOut extends z.ZodSchema> {
101102
* ```
102103
*/
103104
whereClauseFallback?: Record<string, WhereClauseCondition>;
105+
/**
106+
* Time range for `timeBucket()` interval calculation.
107+
* When provided, `timeBucket()` uses this to determine the appropriate bucket size
108+
* based on the span of the time range.
109+
*/
110+
timeRange?: TimeRange;
104111
}
105112

106113
/**
@@ -183,6 +190,7 @@ export async function executeTSQL<TOut extends z.ZodSchema>(
183190
settings: compiledSettings,
184191
fieldMappings: options.fieldMappings,
185192
whereClauseFallback: options.whereClauseFallback,
193+
timeRange: options.timeRange,
186194
});
187195

188196
generatedSql = sql;

0 commit comments

Comments
 (0)