From 88a6a37c51708284eabbce4839f301d6eb528677 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 00:09:41 +0100 Subject: [PATCH 1/5] Apply semantic diffs to get_commit, get_diff, and get_files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrates the semantic diff engine into existing tools that return file patches: - get_commit: Adds Patch field to MinimalCommitFile and applies semantic diff for supported formats (JSON, YAML, CSV, TOML) and structural diff for code files - pull_request_read (get_diff): Splits multi-file raw diff by file and applies semantic diff per-file where beneficial - pull_request_read (get_files): Applies semantic diff to each file's Patch field before returning Unsupported file types keep their original unified diff patches unchanged. Patches are reconstructed from hunks to feed the semantic diff engine — this works well for structured data where the full content is typically in the diff. --- pkg/github/diff_integration.go | 167 ++++++++++++++++++++++++++++ pkg/github/diff_integration_test.go | 142 +++++++++++++++++++++++ pkg/github/minimal_types.go | 6 + pkg/github/pullrequests.go | 12 +- 4 files changed, 325 insertions(+), 2 deletions(-) create mode 100644 pkg/github/diff_integration.go create mode 100644 pkg/github/diff_integration_test.go diff --git a/pkg/github/diff_integration.go b/pkg/github/diff_integration.go new file mode 100644 index 000000000..fab6026fb --- /dev/null +++ b/pkg/github/diff_integration.go @@ -0,0 +1,167 @@ +package github + +import ( + "fmt" + "strings" +) + +// applySemanticDiffToUnifiedPatch takes a unified diff patch for a single file +// and attempts to produce a semantic diff. It reconstructs the base and head +// content from the patch hunks, then runs them through SemanticDiff. +// Returns the original patch unchanged if the file type doesn't benefit from +// semantic diffing or if reconstruction fails. +func applySemanticDiffToUnifiedPatch(filename, patch string) string { + if patch == "" { + return patch + } + + format := DetectDiffFormat(filename) + if format == DiffFormatUnified { + // Not a structured data or code file — keep the original patch + return patch + } + + base, head, ok := reconstructFromPatch(patch) + if !ok { + return patch + } + + result := SemanticDiff(filename, base, head) + if result.Format == DiffFormatFallback { + return patch + } + + return result.Diff +} + +// reconstructFromPatch extracts the base and head file content from a unified +// diff patch. Returns the reconstructed contents and true if successful. +// This only works well for complete file diffs — partial context diffs will +// produce incomplete content, which is fine for semantic comparison of +// structured data where the full structure is usually in the diff. +func reconstructFromPatch(patch string) (base, head []byte, ok bool) { + lines := strings.Split(patch, "\n") + + var baseLines, headLines []string + inHunk := false + + for _, line := range lines { + if strings.HasPrefix(line, "@@") { + inHunk = true + continue + } + if !inHunk { + continue + } + + switch { + case strings.HasPrefix(line, "-"): + baseLines = append(baseLines, line[1:]) + case strings.HasPrefix(line, "+"): + headLines = append(headLines, line[1:]) + case strings.HasPrefix(line, " "): + baseLines = append(baseLines, line[1:]) + headLines = append(headLines, line[1:]) + case line == "": + // Could be end of patch or an empty context line + baseLines = append(baseLines, "") + headLines = append(headLines, "") + } + } + + if len(baseLines) == 0 && len(headLines) == 0 { + return nil, nil, false + } + + return []byte(strings.Join(baseLines, "\n")), + []byte(strings.Join(headLines, "\n")), + true +} + +// processMultiFileDiff splits a multi-file unified diff into per-file sections +// and applies semantic diffing to each file where applicable. Returns a +// combined result with structural diffs for supported formats and original +// patches for unsupported ones. +func processMultiFileDiff(rawDiff string) string { + sections := splitDiffByFile(rawDiff) + if len(sections) == 0 { + return rawDiff + } + + var result strings.Builder + for i, section := range sections { + if i > 0 { + result.WriteString("\n") + } + + semanticPatch := applySemanticDiffToUnifiedPatch(section.filename, section.patch) + if semanticPatch != section.patch { + result.WriteString(fmt.Sprintf("--- %s (semantic diff) ---\n", section.filename)) + result.WriteString(semanticPatch) + } else { + result.WriteString(section.header) + if section.patch != "" { + result.WriteString("\n") + result.WriteString(section.patch) + } + } + } + + return result.String() +} + +type diffSection struct { + filename string + header string + patch string +} + +// splitDiffByFile splits a raw multi-file unified diff into per-file sections. +func splitDiffByFile(rawDiff string) []diffSection { + lines := strings.Split(rawDiff, "\n") + var sections []diffSection + var current *diffSection + + for _, line := range lines { + if strings.HasPrefix(line, "diff --git ") { + if current != nil { + sections = append(sections, *current) + } + // Extract filename from "diff --git a/path b/path" + parts := strings.SplitN(line, " b/", 2) + filename := "" + if len(parts) == 2 { + filename = parts[1] + } + current = &diffSection{ + filename: filename, + header: line, + } + continue + } + + if current == nil { + continue + } + + if strings.HasPrefix(line, "---") || strings.HasPrefix(line, "+++") || + strings.HasPrefix(line, "index ") || strings.HasPrefix(line, "new file") || + strings.HasPrefix(line, "deleted file") || strings.HasPrefix(line, "old mode") || + strings.HasPrefix(line, "new mode") || strings.HasPrefix(line, "similarity") || + strings.HasPrefix(line, "rename ") || strings.HasPrefix(line, "Binary") { + current.header += "\n" + line + } else { + if current.patch != "" { + current.patch += "\n" + line + } else { + current.patch = line + } + } + } + + if current != nil { + sections = append(sections, *current) + } + + return sections +} diff --git a/pkg/github/diff_integration_test.go b/pkg/github/diff_integration_test.go new file mode 100644 index 000000000..98edafa8f --- /dev/null +++ b/pkg/github/diff_integration_test.go @@ -0,0 +1,142 @@ +package github + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestApplySemanticDiffToUnifiedPatch(t *testing.T) { + t.Run("JSON file gets semantic diff", func(t *testing.T) { + patch := `@@ -1,3 +1,3 @@ + { +- "name": "old" ++ "name": "new" + }` + result := applySemanticDiffToUnifiedPatch("config.json", patch) + assert.NotEqual(t, patch, result) + assert.Contains(t, result, `name: "old" → "new"`) + }) + + t.Run("Go file gets structural diff", func(t *testing.T) { + patch := `@@ -1,3 +1,4 @@ + func hello() { ++ fmt.Println("world") + }` + result := applySemanticDiffToUnifiedPatch("main.go", patch) + assert.NotEqual(t, patch, result) + assert.Contains(t, result, "function_declaration") + }) + + t.Run("Markdown file keeps original patch", func(t *testing.T) { + patch := `@@ -1,3 +1,3 @@ + # Title +-old text ++new text` + result := applySemanticDiffToUnifiedPatch("README.md", patch) + assert.Equal(t, patch, result) + }) + + t.Run("empty patch returns empty", func(t *testing.T) { + result := applySemanticDiffToUnifiedPatch("config.json", "") + assert.Equal(t, "", result) + }) + + t.Run("YAML file gets semantic diff", func(t *testing.T) { + patch := `@@ -1,2 +1,2 @@ +-name: old ++name: new` + result := applySemanticDiffToUnifiedPatch("config.yaml", patch) + assert.NotEqual(t, patch, result) + assert.Contains(t, result, `name: "old" → "new"`) + }) +} + +func TestReconstructFromPatch(t *testing.T) { + t.Run("simple patch", func(t *testing.T) { + patch := `@@ -1,3 +1,3 @@ + { +- "name": "old" ++ "name": "new" + }` + base, head, ok := reconstructFromPatch(patch) + require.True(t, ok) + assert.Contains(t, string(base), `"name": "old"`) + assert.Contains(t, string(head), `"name": "new"`) + }) + + t.Run("addition only", func(t *testing.T) { + patch := `@@ -0,0 +1,3 @@ ++{ ++ "new": true ++}` + base, head, ok := reconstructFromPatch(patch) + require.True(t, ok) + assert.Empty(t, string(base)) + assert.Contains(t, string(head), `"new": true`) + }) + + t.Run("empty patch", func(t *testing.T) { + _, _, ok := reconstructFromPatch("") + assert.False(t, ok) + }) +} + +func TestSplitDiffByFile(t *testing.T) { + rawDiff := `diff --git a/config.json b/config.json +index abc..def 100644 +--- a/config.json ++++ b/config.json +@@ -1,3 +1,3 @@ + { +- "name": "old" ++ "name": "new" + } +diff --git a/main.go b/main.go +index abc..def 100644 +--- a/main.go ++++ b/main.go +@@ -1,3 +1,4 @@ + func hello() { ++ fmt.Println("world") + }` + + sections := splitDiffByFile(rawDiff) + require.Len(t, sections, 2) + assert.Equal(t, "config.json", sections[0].filename) + assert.Equal(t, "main.go", sections[1].filename) + assert.Contains(t, sections[0].patch, `"name": "old"`) + assert.Contains(t, sections[1].patch, `fmt.Println`) +} + +func TestProcessMultiFileDiff(t *testing.T) { + rawDiff := `diff --git a/config.json b/config.json +index abc..def 100644 +--- a/config.json ++++ b/config.json +@@ -1,3 +1,3 @@ + { +- "name": "old" ++ "name": "new" + } +diff --git a/README.md b/README.md +index abc..def 100644 +--- a/README.md ++++ b/README.md +@@ -1,3 +1,3 @@ + # Title +-old text ++new text` + + result := processMultiFileDiff(rawDiff) + + // JSON file should get semantic diff + assert.Contains(t, result, "semantic diff") + assert.Contains(t, result, `name: "old" → "new"`) + + // Markdown should keep original patch format + assert.Contains(t, result, "README.md") + assert.Contains(t, result, "-old text") + assert.Contains(t, result, "+new text") +} diff --git a/pkg/github/minimal_types.go b/pkg/github/minimal_types.go index c6a0ea849..1d8dcfb08 100644 --- a/pkg/github/minimal_types.go +++ b/pkg/github/minimal_types.go @@ -75,6 +75,7 @@ type MinimalCommitFile struct { Additions int `json:"additions,omitempty"` Deletions int `json:"deletions,omitempty"` Changes int `json:"changes,omitempty"` + Patch string `json:"patch,omitempty"` } // MinimalCommit is the trimmed output type for commit objects. @@ -236,12 +237,17 @@ func convertToMinimalCommit(commit *github.RepositoryCommit, includeDiffs bool) if len(commit.Files) > 0 { minimalCommit.Files = make([]MinimalCommitFile, 0, len(commit.Files)) for _, file := range commit.Files { + patch := file.GetPatch() + if patch != "" { + patch = applySemanticDiffToUnifiedPatch(file.GetFilename(), patch) + } minimalFile := MinimalCommitFile{ Filename: file.GetFilename(), Status: file.GetStatus(), Additions: file.GetAdditions(), Deletions: file.GetDeletions(), Changes: file.GetChanges(), + Patch: patch, } minimalCommit.Files = append(minimalCommit.Files, minimalFile) } diff --git a/pkg/github/pullrequests.go b/pkg/github/pullrequests.go index a11fe29a5..a37826cd8 100644 --- a/pkg/github/pullrequests.go +++ b/pkg/github/pullrequests.go @@ -220,8 +220,8 @@ func GetPullRequestDiff(ctx context.Context, client *github.Client, owner, repo defer func() { _ = resp.Body.Close() }() - // Return the raw response - return utils.NewToolResultText(string(raw)), nil + // Return the raw response, with semantic diffs applied where beneficial + return utils.NewToolResultText(processMultiFileDiff(string(raw))), nil } func GetPullRequestStatus(ctx context.Context, client *github.Client, owner, repo string, pullNumber int) (*mcp.CallToolResult, error) { @@ -293,6 +293,14 @@ func GetPullRequestFiles(ctx context.Context, client *github.Client, owner, repo return ghErrors.NewGitHubAPIStatusErrorResponse(ctx, "failed to get pull request files", resp, body), nil } + // Apply semantic diffs to file patches where beneficial + for _, file := range files { + if file.Patch != nil && file.Filename != nil { + semanticPatch := applySemanticDiffToUnifiedPatch(file.GetFilename(), file.GetPatch()) + file.Patch = &semanticPatch + } + } + r, err := json.Marshal(files) if err != nil { return nil, fmt.Errorf("failed to marshal response: %w", err) From 847374081201a3470e5d59e84eef860c7d30b584 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 00:16:34 +0100 Subject: [PATCH 2/5] Improve import naming and added declaration output Import declarations now show package names instead of opaque _import_declaration_N identifiers. Go imports extract short package names (e.g., 'fmt, http, context'). For large import blocks, shows first two and count (e.g., 'fmt, http, ... (5 packages)'). Import blocks are keyed by position rather than name, so changing the imported packages shows as 'modified' with inline diff instead of separate remove+add entries. Added declarations now show their signature (first line) to give context about what was added, rather than just the word 'added'. --- pkg/github/structural_diff.go | 64 +++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/pkg/github/structural_diff.go b/pkg/github/structural_diff.go index 9aab7c6e7..edead034f 100644 --- a/pkg/github/structural_diff.go +++ b/pkg/github/structural_diff.go @@ -310,7 +310,6 @@ func goNameExtractor(node *sitter.Node, source []byte) string { } return name case "type_declaration", "var_declaration", "const_declaration": - // These contain spec children (type_spec, var_spec, const_spec) with name fields for i := 0; i < int(node.ChildCount()); i++ { child := node.Child(i) nameNode := child.ChildByFieldName("name") @@ -319,11 +318,27 @@ func goNameExtractor(node *sitter.Node, source []byte) string { } } return "" + case "import_declaration": + return summarizeImport(node, source) default: return defaultNameExtractor(node, source) } } +// summarizeImport produces a concise name for an import declaration by +// extracting the imported package paths. +func summarizeImport(node *sitter.Node, source []byte) string { + var paths []string + collectImportPaths(node, source, &paths) + if len(paths) == 0 { + return node.Content(source) + } + if len(paths) <= 3 { + return strings.Join(paths, ", ") + } + return fmt.Sprintf("%s, %s, ... (%d packages)", paths[0], paths[1], len(paths)) +} + // extractReceiverType extracts the type name from a Go method receiver. func extractReceiverType(receiver *sitter.Node, source []byte) string { for i := 0; i < int(receiver.ChildCount()); i++ { @@ -338,6 +353,24 @@ func extractReceiverType(receiver *sitter.Node, source []byte) string { return receiver.Content(source) } +// collectImportPaths extracts package path strings from an import node tree. +func collectImportPaths(node *sitter.Node, source []byte, paths *[]string) { + if node.Type() == "interpreted_string_literal" || node.Type() == "raw_string_literal" { + // Strip quotes + content := node.Content(source) + content = strings.Trim(content, "\"'`") + // Use short form: last path component + if idx := strings.LastIndex(content, "/"); idx >= 0 { + content = content[idx+1:] + } + *paths = append(*paths, content) + return + } + for i := 0; i < int(node.ChildCount()); i++ { + collectImportPaths(node.Child(i), source, paths) + } +} + // jsNameExtractor handles JS/TS-specific naming (variable declarations, exports). func jsNameExtractor(node *sitter.Node, source []byte) string { switch node.Type() { @@ -454,7 +487,12 @@ func diffDeclarations(config *languageConfig, base, head []declaration, indent s case inBase && !inHead: changes = append(changes, fmt.Sprintf("%s%s %s: removed", indent, baseDecl.Kind, baseDecl.Name)) case !inBase && inHead: - changes = append(changes, fmt.Sprintf("%s%s %s: added", indent, headDecl.Kind, headDecl.Name)) + sig := declarationSignature(headDecl.Text) + if sig != "" && sig != headDecl.Name { + changes = append(changes, fmt.Sprintf("%s%s %s: added\n%s %s", indent, headDecl.Kind, headDecl.Name, indent, sig)) + } else { + changes = append(changes, fmt.Sprintf("%s%s %s: added", indent, headDecl.Kind, headDecl.Name)) + } case baseDecl.Text != headDecl.Text: detail := modifiedDetail(config, baseDecl, headDecl, indent, depth) changes = append(changes, fmt.Sprintf("%s%s %s: modified\n%s", indent, baseDecl.Kind, baseDecl.Name, detail)) @@ -466,15 +504,35 @@ func diffDeclarations(config *languageConfig, base, head []declaration, indent s // indexDeclarations creates a lookup map from declaration key to declaration. // The key combines kind and name to handle same-name declarations of different kinds. +// Import and package declarations use kind-only keys since they're typically +// singletons and their "name" changes when contents change. func indexDeclarations(decls []declaration) map[string]declaration { result := make(map[string]declaration, len(decls)) + importCount := 0 for _, d := range decls { - key := d.Kind + ":" + d.Name + var key string + switch d.Kind { + case "import_declaration", "import_statement", "import_from_statement", + "package_clause", "package_declaration": + key = fmt.Sprintf("%s:%d", d.Kind, importCount) + importCount++ + default: + key = d.Kind + ":" + d.Name + } result[key] = d } return result } +// declarationSignature returns the first line of a declaration, which typically +// contains the signature (e.g., "func hello(name string) error {"). +func declarationSignature(text string) string { + if idx := strings.Index(text, "\n"); idx >= 0 { + return strings.TrimSpace(text[:idx]) + } + return strings.TrimSpace(text) +} + // modifiedDetail produces the detail output for a modified declaration. If the // declaration contains sub-declarations (e.g. methods in a class) and we haven't // hit the depth limit, it recurses to show which children changed. Otherwise it From ab759201c82c544f13c1ed4d4e52d5d31e0e4780 Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 00:20:16 +0100 Subject: [PATCH 3/5] Fix package_clause naming and add declaration summaries for new/deleted files Package clauses now extract the package name from the package_identifier child node, showing 'package handler' instead of '_package_clause_0'. New and deleted code files now show a declaration summary listing all top-level symbols with their signatures, rather than just 'file added' or 'file deleted'. This gives the model a table-of-contents view of what was added/removed. --- pkg/github/semantic_diff.go | 40 ++++++++++++++++++++++++++++++++ pkg/github/semantic_diff_test.go | 3 ++- pkg/github/structural_diff.go | 8 +++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pkg/github/semantic_diff.go b/pkg/github/semantic_diff.go index e05262e4e..b4eab9f0d 100644 --- a/pkg/github/semantic_diff.go +++ b/pkg/github/semantic_diff.go @@ -48,6 +48,12 @@ func SemanticDiff(path string, base, head []byte) SemanticDiffResult { } if base == nil { + if summary := summarizeNewOrDeletedFile(path, head, "added"); summary != "" { + return SemanticDiffResult{ + Format: DetectDiffFormat(path), + Diff: summary, + } + } return SemanticDiffResult{ Format: DetectDiffFormat(path), Diff: "file added", @@ -55,6 +61,12 @@ func SemanticDiff(path string, base, head []byte) SemanticDiffResult { } if head == nil { + if summary := summarizeNewOrDeletedFile(path, base, "deleted"); summary != "" { + return SemanticDiffResult{ + Format: DetectDiffFormat(path), + Diff: summary, + } + } return SemanticDiffResult{ Format: DetectDiffFormat(path), Diff: "file deleted", @@ -91,6 +103,34 @@ func SemanticDiff(path string, base, head []byte) SemanticDiffResult { } } +// summarizeNewOrDeletedFile returns a structural summary of a new or deleted code file. +// It extracts top-level declarations and lists them with their signatures. +// Returns empty string if the file type is not supported by tree-sitter. +func summarizeNewOrDeletedFile(path string, content []byte, action string) string { + lang := languageForPath(path) + if lang == nil { + return "" + } + + decls, err := extractDeclarations(lang, content) + if err != nil || len(decls) == 0 { + return "" + } + + var sb strings.Builder + sb.WriteString("file " + action + "\n\n") + sb.WriteString("Declarations:\n") + for _, d := range decls { + sig := declarationSignature(d.Text) + if sig != "" && sig != d.Name { + sb.WriteString(" " + d.Name + ": " + sig + "\n") + } else { + sb.WriteString(" " + d.Name + "\n") + } + } + return sb.String() +} + // semanticDiffJSON parses both versions as JSON and produces a path-based diff. func semanticDiffJSON(path string, base, head []byte) SemanticDiffResult { var baseVal, headVal any diff --git a/pkg/github/semantic_diff_test.go b/pkg/github/semantic_diff_test.go index 248d821d4..30228be1c 100644 --- a/pkg/github/semantic_diff_test.go +++ b/pkg/github/semantic_diff_test.go @@ -355,7 +355,8 @@ func TestSemanticDiffNewAndDeletedFiles(t *testing.T) { t.Run("deleted Go file", func(t *testing.T) { result := SemanticDiff("main.go", []byte("package main\n"), nil) assert.Equal(t, DiffFormatStructural, result.Format) - assert.Equal(t, "file deleted", result.Diff) + assert.Contains(t, result.Diff, "file deleted") + assert.Contains(t, result.Diff, "package main") }) t.Run("both nil", func(t *testing.T) { diff --git a/pkg/github/structural_diff.go b/pkg/github/structural_diff.go index edead034f..df3bc6307 100644 --- a/pkg/github/structural_diff.go +++ b/pkg/github/structural_diff.go @@ -320,6 +320,14 @@ func goNameExtractor(node *sitter.Node, source []byte) string { return "" case "import_declaration": return summarizeImport(node, source) + case "package_clause": + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + if child.Type() == "package_identifier" { + return "package " + child.Content(source) + } + } + return "package" default: return defaultNameExtractor(node, source) } From d4a483feb4efeb83e50b70bf481e70274b92270f Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 10:59:57 +0100 Subject: [PATCH 4/5] Replace opaque tree-sitter names with actual code Multi-var/const blocks now extract all declared names from var_spec_list children (e.g., 'globalMap, oldRegexp' instead of '_var_declaration_19'). For >3 names, shows first two plus count. Var/const blocks are keyed by position (like imports) so adding/removing a variable shows as 'modified' with inline diff rather than remove+add. When name extraction returns empty, falls back to showing the first line of the declaration text instead of opaque '_kind_N' indices. This ensures no tree-sitter internal names appear in output. --- pkg/github/structural_diff.go | 49 ++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/pkg/github/structural_diff.go b/pkg/github/structural_diff.go index df3bc6307..6b8acef65 100644 --- a/pkg/github/structural_diff.go +++ b/pkg/github/structural_diff.go @@ -266,7 +266,7 @@ func extractChildDeclarations(config *languageConfig, node *sitter.Node, source name := config.nameExtractor(child, source) if name == "" { - name = fmt.Sprintf("_%s_%d", nodeType, i) + name = firstLine(child.Content(source)) } decls = append(decls, declaration{ @@ -310,14 +310,32 @@ func goNameExtractor(node *sitter.Node, source []byte) string { } return name case "type_declaration", "var_declaration", "const_declaration": + var names []string for i := 0; i < int(node.ChildCount()); i++ { child := node.Child(i) - nameNode := child.ChildByFieldName("name") - if nameNode != nil { - return nameNode.Content(source) + switch child.Type() { + case "var_spec", "const_spec", "type_spec": + nameNode := child.ChildByFieldName("name") + if nameNode != nil { + names = append(names, nameNode.Content(source)) + } + case "var_spec_list", "const_spec_list", "type_spec_list": + for j := 0; j < int(child.ChildCount()); j++ { + spec := child.Child(j) + nameNode := spec.ChildByFieldName("name") + if nameNode != nil { + names = append(names, nameNode.Content(source)) + } + } } } - return "" + if len(names) == 0 { + return "" + } + if len(names) <= 3 { + return strings.Join(names, ", ") + } + return fmt.Sprintf("%s, %s, ... (%d vars)", names[0], names[1], len(names)) case "import_declaration": return summarizeImport(node, source) case "package_clause": @@ -516,14 +534,15 @@ func diffDeclarations(config *languageConfig, base, head []declaration, indent s // singletons and their "name" changes when contents change. func indexDeclarations(decls []declaration) map[string]declaration { result := make(map[string]declaration, len(decls)) - importCount := 0 + kindCounters := make(map[string]int) for _, d := range decls { var key string switch d.Kind { case "import_declaration", "import_statement", "import_from_statement", - "package_clause", "package_declaration": - key = fmt.Sprintf("%s:%d", d.Kind, importCount) - importCount++ + "package_clause", "package_declaration", + "var_declaration", "const_declaration": + kindCounters[d.Kind]++ + key = fmt.Sprintf("%s:%d", d.Kind, kindCounters[d.Kind]) default: key = d.Kind + ":" + d.Name } @@ -541,6 +560,16 @@ func declarationSignature(text string) string { return strings.TrimSpace(text) } +// firstLine returns the first line of text, trimmed. Used as a fallback name +// for declarations where the name extractor returns empty, so that actual code +// is shown instead of opaque tree-sitter node indices. +func firstLine(text string) string { + if idx := strings.Index(text, "\n"); idx >= 0 { + return strings.TrimSpace(text[:idx]) + } + return strings.TrimSpace(text) +} + // modifiedDetail produces the detail output for a modified declaration. If the // declaration contains sub-declarations (e.g. methods in a class) and we haven't // hit the depth limit, it recurses to show which children changed. Otherwise it @@ -596,7 +625,7 @@ func findNestedDeclarations(config *languageConfig, node *sitter.Node, source [] if !skipRoot && config.declarationKinds[nodeType] { name := config.nameExtractor(child, source) if name == "" { - name = fmt.Sprintf("_%s_%d", nodeType, i) + name = firstLine(child.Content(source)) } *decls = append(*decls, declaration{ Kind: nodeType, From 3dc1535b8882cda03850f70a2a12bf725013929a Mon Sep 17 00:00:00 2001 From: Sam Morrow Date: Tue, 10 Feb 2026 11:17:02 +0100 Subject: [PATCH 5/5] Accept bare method names in symbol extraction When searching for a symbol like 'RegisterRoutes', if no exact match is found, try suffix matching against receiver-qualified names like '(*Handler).RegisterRoutes'. Returns the match only when unambiguous (exactly one suffix match). Error messages now suggest the closest match when a suffix match exists but wasn't used (e.g., when there are multiple types with the same method name). Updated the symbol parameter description to document the receiver prefix format and note that bare names work when unambiguous. --- .../__toolsnaps__/get_file_contents.snap | 2 +- pkg/github/repositories.go | 2 +- pkg/github/symbol_extraction.go | 38 +++++++++++++++++++ pkg/github/symbol_extraction_test.go | 28 ++++++++++++++ 4 files changed, 68 insertions(+), 2 deletions(-) diff --git a/pkg/github/__toolsnaps__/get_file_contents.snap b/pkg/github/__toolsnaps__/get_file_contents.snap index bcfb51fc1..bae55497e 100644 --- a/pkg/github/__toolsnaps__/get_file_contents.snap +++ b/pkg/github/__toolsnaps__/get_file_contents.snap @@ -28,7 +28,7 @@ "type": "string" }, "symbol": { - "description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.", + "description": "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.", "type": "string" } }, diff --git a/pkg/github/repositories.go b/pkg/github/repositories.go index ad6b35b7d..7f1d81ac2 100644 --- a/pkg/github/repositories.go +++ b/pkg/github/repositories.go @@ -654,7 +654,7 @@ func GetFileContents(t translations.TranslationHelperFunc) inventory.ServerTool }, "symbol": { Type: "string", - Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. If the symbol is not found, returns a list of available symbols.", + Description: "Optional: extract a specific symbol (function, class, type, etc.) from the file. For supported languages, returns only the symbol's source code instead of the entire file. For methods, use receiver prefix format: (*TypeName).MethodName — bare method names also work when unambiguous. If the symbol is not found, returns available symbols with suggestions.", }, }, Required: []string{"owner", "repo"}, diff --git a/pkg/github/symbol_extraction.go b/pkg/github/symbol_extraction.go index 2c46364b5..1a63fe831 100644 --- a/pkg/github/symbol_extraction.go +++ b/pkg/github/symbol_extraction.go @@ -35,16 +35,38 @@ func ExtractSymbol(path string, source []byte, symbolName string) (text string, // Build list of available symbols for the error message available := listSymbolNames(config, decls) + + // Suggest closest match for bare method names + if suggestion := findClosestMatch(available, symbolName); suggestion != "" { + return "", "", fmt.Errorf("symbol %q not found. Did you mean %q? Available symbols: %s", + symbolName, suggestion, strings.Join(available, ", ")) + } return "", "", fmt.Errorf("symbol %q not found. Available symbols: %s", symbolName, strings.Join(available, ", ")) } // findSymbol searches a slice of declarations for a matching name. +// It first tries an exact match, then falls back to suffix matching +// (e.g., "RegisterRoutes" matches "(*Handler).RegisterRoutes") when +// there is exactly one unambiguous match. func findSymbol(decls []declaration, name string) (string, string, bool) { for _, d := range decls { if d.Name == name { return d.Text, d.Kind, true } } + + // Suffix match: accept bare method name when unambiguous + var matches []declaration + suffix := "." + name + for _, d := range decls { + if strings.HasSuffix(d.Name, suffix) { + matches = append(matches, d) + } + } + if len(matches) == 1 { + return matches[0].Text, matches[0].Kind, true + } + return "", "", false } @@ -65,3 +87,19 @@ func listSymbolNames(config *languageConfig, decls []declaration) []string { } return names } + +// findClosestMatch looks for a symbol name that ends with ".name" or contains +// the search term as a substring, returning the best suggestion. +func findClosestMatch(available []string, name string) string { + suffix := "." + name + var suffixMatches []string + for _, s := range available { + if strings.HasSuffix(s, suffix) { + suffixMatches = append(suffixMatches, s) + } + } + if len(suffixMatches) == 1 { + return suffixMatches[0] + } + return "" +} diff --git a/pkg/github/symbol_extraction_test.go b/pkg/github/symbol_extraction_test.go index 95f3b31b8..0adcfcded 100644 --- a/pkg/github/symbol_extraction_test.go +++ b/pkg/github/symbol_extraction_test.go @@ -113,4 +113,32 @@ func TestExtractSymbol(t *testing.T) { assert.Contains(t, text, "30") assert.NotContains(t, text, "maxRetries") }) + + t.Run("bare method name matches unambiguous receiver", func(t *testing.T) { + source := []byte("package main\n\ntype Handler struct{}\n\nfunc (h *Handler) RegisterRoutes() {\n\t// routes\n}\n\nfunc (h *Handler) ServeHTTP() {\n\t// serve\n}\n") + text, kind, err := ExtractSymbol("main.go", source, "RegisterRoutes") + require.NoError(t, err) + assert.Equal(t, "method_declaration", kind) + assert.Contains(t, text, "RegisterRoutes") + assert.NotContains(t, text, "ServeHTTP") + }) + + t.Run("bare method name ambiguous returns error", func(t *testing.T) { + source := []byte("package main\n\ntype A struct{}\ntype B struct{}\n\nfunc (a A) Start() {}\n\nfunc (b B) Start() {}\n") + _, _, err := ExtractSymbol("main.go", source, "Start") + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") + }) + + t.Run("error suggests closest match", func(t *testing.T) { + source := []byte("package main\n\ntype Handler struct{}\n\nfunc (h *Handler) RegisterRoutes() {}\n\nfunc Hello() {}\n") + _, _, err := ExtractSymbol("main.go", source, "RegisterRoutes") + // Should succeed via suffix match + require.NoError(t, err) + + // Nonexistent but similar to a method — should suggest + _, _, err = ExtractSymbol("main.go", source, "Routes") + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") + }) }