From ef7f434aa8c334ae913bbc07b22b3d21ab0ec934 Mon Sep 17 00:00:00 2001 From: "wangzekun.zekin" Date: Mon, 26 Jan 2026 11:04:31 +0800 Subject: [PATCH 1/4] feat: add Extra field to store additional AST node metadata Collect direct calls and anonymous functions in Go parser and store them in the Extra field of Function, Dependency, Type, and Var. --- lang/golang/parser/file.go | 69 ++++++++++++++++++++++++++++++++++++-- lang/uniast/ast.go | 9 +++++ 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/lang/golang/parser/file.go b/lang/golang/parser/file.go index a2e8bac6..8f2b8b4a 100644 --- a/lang/golang/parser/file.go +++ b/lang/golang/parser/file.go @@ -121,7 +121,9 @@ func (p *GoParser) parseVar(ctx *fileContext, vspec *ast.ValueSpec, isConst bool // collect func value dependencies, in case of var a = func() {...} if val != nil && !isConst { - collects := collectInfos{} + collects := collectInfos{ + directCalls: map[FileLine]bool{}, + } ast.Inspect(*val, func(n ast.Node) bool { return p.parseASTNode(ctx, n, &collects) }) @@ -137,6 +139,20 @@ func (p *GoParser) parseVar(ctx *fileContext, vspec *ast.ValueSpec, isConst bool for _, dep := range collects.tys { v.Dependencies = InsertDependency(v.Dependencies, dep) } + if len(collects.directCalls) > 0 { + for i, dep := range v.Dependencies { + if collects.directCalls[dep.FileLine] { + if v.Dependencies[i].Extra == nil { + v.Dependencies[i].Extra = map[string]any{} + } + v.Dependencies[i].Extra["FunctionIsCall"] = true + } + } + } + if len(collects.anonymousFunctions) > 0 { + v.Extra = map[string]any{} + v.Extra["AnonymousFunctions"] = collects.anonymousFunctions + } } if vspec.Type != nil { @@ -392,12 +408,19 @@ func (p *GoParser) parseSelector(ctx *fileContext, expr *ast.SelectorExpr, infos type collectInfos struct { functionCalls, methodCalls []Dependency tys, globalVars []Dependency + + directCalls map[FileLine]bool + anonymousFunctions []FileLine // record anonymous function } func (p *GoParser) parseASTNode(ctx *fileContext, node ast.Node, collect *collectInfos) bool { switch expr := node.(type) { case *ast.SelectorExpr: return p.parseSelector(ctx, expr, collect) + case *ast.CallExpr: + p.parseCall(ctx, expr, collect) + case *ast.FuncLit: + collect.anonymousFunctions = append(collect.anonymousFunctions, ctx.FileLine(expr)) case *ast.Ident: callName := expr.Name // println("[parseFunc] ast.Ident:", callName) @@ -462,6 +485,22 @@ func (p *GoParser) parseASTNode(ctx *fileContext, node ast.Node, collect *collec return true } +// parseCall collect direct call info +func (p *GoParser) parseCall(ctx *fileContext, expr *ast.CallExpr, collect *collectInfos) { + var ident *ast.Ident + + switch idt := expr.Fun.(type) { + case *ast.Ident: + ident = idt + case *ast.SelectorExpr: + ident = idt.Sel + } + + if ident != nil { + collect.directCalls[ctx.FileLine(ident)] = true + } +} + // parseFunc parses all function declaration in one file func (p *GoParser) parseFunc(ctx *fileContext, funcDecl *ast.FuncDecl) (*Function, bool) { // method receiver @@ -511,7 +550,9 @@ func (p *GoParser) parseFunc(ctx *fileContext, funcDecl *ast.FuncDecl) (*Functio // collect content content := string(ctx.GetRawContent(funcDecl)) - collects := collectInfos{} + collects := collectInfos{ + directCalls: map[FileLine]bool{}, + } if funcDecl.Body == nil { goto set_func } @@ -521,7 +562,6 @@ func (p *GoParser) parseFunc(ctx *fileContext, funcDecl *ast.FuncDecl) (*Functio }) set_func: - if fname == "init" && p.repo.GetFunction(NewIdentity(ctx.module.Name, ctx.pkgPath, fname)) != nil { // according to https://go.dev/ref/spec#Program_initialization_and_execution, // duplicated init() is allowed and never be referenced, thus add a subfix @@ -544,6 +584,29 @@ set_func: f.Types = InsertDependency(f.Types, t) } f.Signature = string(sig) + + if len(collects.directCalls) > 0 { + for i, dep := range f.FunctionCalls { + if collects.directCalls[dep.FileLine] { + if f.FunctionCalls[i].Extra == nil { + f.FunctionCalls[i].Extra = map[string]any{} + } + f.FunctionCalls[i].Extra["FunctionIsCall"] = true + } + } + for i, dep := range f.MethodCalls { + if collects.directCalls[dep.FileLine] { + if f.MethodCalls[i].Extra == nil { + f.MethodCalls[i].Extra = map[string]any{} + } + f.MethodCalls[i].Extra["FunctionIsCall"] = true + } + } + } + if len(collects.anonymousFunctions) > 0 { + f.Extra = map[string]any{} + f.Extra["AnonymousFunctions"] = collects.anonymousFunctions + } return f, false } diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 873364bb..8f0f109a 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -510,11 +510,14 @@ type Function struct { // func llm compress result CompressData *string `json:"compress_data,omitempty"` + + Extra map[string]any `json:",omitempty"` } type Dependency struct { Identity FileLine `json:",omitempty"` + Extra map[string]any `json:",omitempty"` } func (d Dependency) Id() Identity { @@ -607,6 +610,9 @@ type Type struct { // FieldFunctions map[string]string CompressData *string `json:"compress_data,omitempty"` // struct llm compress result + + // extra data + Extra map[string]any `json:",omitempty"` } type Var struct { @@ -623,4 +629,7 @@ type Var struct { Groups []Identity `json:",omitempty"` CompressData *string `json:"compress_data,omitempty"` + + // extra data + Extra map[string]any `json:",omitempty"` } From 82177abbff7b3ea5a57e9ca3b6a39b33482bcbe3 Mon Sep 17 00:00:00 2001 From: "wangzekun.zekin" Date: Mon, 26 Jan 2026 15:28:55 +0800 Subject: [PATCH 2/4] feat: add custom extra info --- go.mod | 2 +- lang/golang/parser/file.go | 21 ++------- lang/uniast/ast.go | 97 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/go.mod b/go.mod index 60a83f02..3687cda2 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.23.4 require ( github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible + github.com/bytedance/sonic v1.14.1 github.com/cloudwego/eino v0.3.52 github.com/cloudwego/eino-ext/components/model/ark v0.1.16 github.com/cloudwego/eino-ext/components/model/claude v0.1.1 @@ -43,7 +44,6 @@ require ( github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/bytedance/gopkg v0.1.3 // indirect - github.com/bytedance/sonic v1.14.1 // indirect github.com/bytedance/sonic/loader v0.3.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect diff --git a/lang/golang/parser/file.go b/lang/golang/parser/file.go index 8f2b8b4a..12b748c7 100644 --- a/lang/golang/parser/file.go +++ b/lang/golang/parser/file.go @@ -142,16 +142,12 @@ func (p *GoParser) parseVar(ctx *fileContext, vspec *ast.ValueSpec, isConst bool if len(collects.directCalls) > 0 { for i, dep := range v.Dependencies { if collects.directCalls[dep.FileLine] { - if v.Dependencies[i].Extra == nil { - v.Dependencies[i].Extra = map[string]any{} - } - v.Dependencies[i].Extra["FunctionIsCall"] = true + v.Dependencies[i].SetExtra("FunctionIsCall", true) } } } if len(collects.anonymousFunctions) > 0 { - v.Extra = map[string]any{} - v.Extra["AnonymousFunctions"] = collects.anonymousFunctions + v.SetExtra("AnonymousFunctions", collects.anonymousFunctions) } } @@ -588,24 +584,17 @@ set_func: if len(collects.directCalls) > 0 { for i, dep := range f.FunctionCalls { if collects.directCalls[dep.FileLine] { - if f.FunctionCalls[i].Extra == nil { - f.FunctionCalls[i].Extra = map[string]any{} - } - f.FunctionCalls[i].Extra["FunctionIsCall"] = true + f.FunctionCalls[i].SetExtra("FunctionIsCall", true) } } for i, dep := range f.MethodCalls { if collects.directCalls[dep.FileLine] { - if f.MethodCalls[i].Extra == nil { - f.MethodCalls[i].Extra = map[string]any{} - } - f.MethodCalls[i].Extra["FunctionIsCall"] = true + f.MethodCalls[i].SetExtra("FunctionIsCall", true) } } } if len(collects.anonymousFunctions) > 0 { - f.Extra = map[string]any{} - f.Extra["AnonymousFunctions"] = collects.anonymousFunctions + f.SetExtra("AnonymousFunctions", collects.anonymousFunctions) } return f, false } diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index 8f0f109a..ce30036c 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -23,6 +23,7 @@ import ( "strconv" "strings" + "github.com/bytedance/sonic" "golang.org/x/tools/go/packages" ) @@ -511,13 +512,13 @@ type Function struct { // func llm compress result CompressData *string `json:"compress_data,omitempty"` - Extra map[string]any `json:",omitempty"` + Extra *ExtraInfo `json:",omitempty"` } type Dependency struct { Identity FileLine `json:",omitempty"` - Extra map[string]any `json:",omitempty"` + Extra *ExtraInfo `json:",omitempty"` } func (d Dependency) Id() Identity { @@ -612,7 +613,7 @@ type Type struct { CompressData *string `json:"compress_data,omitempty"` // struct llm compress result // extra data - Extra map[string]any `json:",omitempty"` + Extra *ExtraInfo `json:",omitempty"` } type Var struct { @@ -631,5 +632,93 @@ type Var struct { CompressData *string `json:"compress_data,omitempty"` // extra data - Extra map[string]any `json:",omitempty"` + Extra *ExtraInfo `json:",omitempty"` +} + +type ExtraInfo struct { + data map[string]any +} + +func (e *ExtraInfo) MarshalJSON() ([]byte, error) { + return sonic.Marshal(e.data) +} + +func (e *ExtraInfo) UnmarshalJSON(data []byte) error { + return sonic.Unmarshal(data, &e.data) +} + +func (t *Type) GetExtra(key string) any { + if t.Extra == nil { + return nil + } + if v, ok := t.Extra.data[key]; ok { + return v + } + return nil +} + +func (e *Type) SetExtra(key string, value any) { + if e.Extra == nil { + e.Extra = &ExtraInfo{ + data: make(map[string]any), + } + } + e.Extra.data[key] = value +} + +func (v *Var) GetExtra(key string) any { + if v.Extra == nil { + return nil + } + if v, ok := v.Extra.data[key]; ok { + return v + } + return nil +} + +func (v *Var) SetExtra(key string, value any) { + if v.Extra == nil { + v.Extra = &ExtraInfo{ + data: make(map[string]any), + } + } + v.Extra.data[key] = value +} + +func (f *Function) GetExtra(key string) any { + if f.Extra == nil { + return nil + } + if v, ok := f.Extra.data[key]; ok { + return v + } + return nil +} + +func (f *Function) SetExtra(key string, value any) { + if f.Extra == nil { + f.Extra = &ExtraInfo{ + data: make(map[string]any), + } + } + f.Extra.data[key] = value +} + +func (d *Dependency) GetExtra(key string) any { + if d.Extra == nil { + return nil + } + if v, ok := d.Extra.data[key]; ok { + return v + } + return nil +} + +func (d *Dependency) SetExtra(key string, value any) { + if d.Extra == nil { + d.Extra = &ExtraInfo{ + data: make(map[string]any), + } + } + d.Extra.data[key] = value } From 5d7a2fcabce26fd1a44e7582edbc7de64ecab090 Mon Sep 17 00:00:00 2001 From: "wangzekun.zekin" Date: Mon, 26 Jan 2026 15:42:48 +0800 Subject: [PATCH 3/4] docs: update uniast docs and version --- docs/uniast-en.md | 51 ++++++++++++++++++++++++++++++++++++++-- docs/uniast-zh.md | 53 +++++++++++++++++++++++++++++++++++++++--- lang/uniast/version.go | 2 +- 3 files changed, 100 insertions(+), 6 deletions(-) diff --git a/docs/uniast-en.md b/docs/uniast-en.md index 81fb345e..ef34e742 100644 --- a/docs/uniast-en.md +++ b/docs/uniast-en.md @@ -1,4 +1,4 @@ -# Universal Abstract-Syntax-Tree Specification (v0.1.3) +# Universal Abstract-Syntax-Tree Specification (v0.2.0) Universal Abstract-Syntax-Tree is a LLM-friendly, language-agnostic code context data structure established by ABCoder. It represents a unified abstract syntax tree of a repository's code, collecting definitions of language entities (functions, types, constants/variables) and their interdependencies for subsequent AI understanding and coding-workflow development. @@ -370,6 +370,23 @@ Function type AST Node entity, corresponding to [NodeType] as FUNC, including fu - Vars: Global variables referenced within the current function, including variables and constants +- Extra: Additional information for storing language-specific details or extra metadata + + + - AnonymousFunctions: Anonymous functions defined in the function, each element is the FileLine of the corresponding function + + + - File: The filename where it is located + + + - Line: **Line number of the starting position in the file (starting from 1)** + + + - StartOffset: **Byte offset of the code starting position relative to the file header** + + + - EndOffset: **Byte offset of the code ending position relative to the file header** + ###### Dependency @@ -384,7 +401,10 @@ Represents a dependency relationship, containing the dependent node Id, dependen "File": "manager.go", "Line": 140, "StartOffset": 3547, - "EndOffset": 3564 + "EndOffset": 3564, + "Extra": { + "FunctionIsCall": true + } } ``` @@ -409,6 +429,12 @@ Represents a dependency relationship, containing the dependent node Id, dependen - EndOffset: Offset of the ending position of the dependency point (not the dependent node) token relative to the code file +- Extra: Additional information for storing language-specific details or extra metadata + + + - FunctionIsCall: If the Dependency is a function call, whether it actually executes the function call or just references the function + + ##### Type Type definition, [NodeType] is TYPE, including type definitions in specific languages such as structs, enums, interfaces, type aliases, etc. @@ -490,6 +516,9 @@ Type definition, [NodeType] is TYPE, including type definitions in specific lang - Implements: Which interfaces this type implements Identity +- Extra: Additional information for storing language-specific details or extra metadata + + ##### Var Global variables, including variables and constants, **but must be global** @@ -553,6 +582,24 @@ var x = getx(y db.Data) int { - Groups: Group definitions, such as `const( A=1, B=2, C=3)` in Go, Groups would be `[C=3, B=2]` (assuming A is the variable itself) +- Extra: Additional information for storing language-specific details or extra metadata + + + - AnonymousFunctions: Anonymous functions defined in the initialization function of the current variable. Each element is the FileLine of the corresponding function + + + - File: The filename where it is located + + + - Line: **Line number of the starting position in the file (starting from 1)** + + + - StartOffset: **Byte offset of the code starting position relative to the file header** + + + - EndOffset: **Byte offset of the code ending position relative to the file header** + + ### Graph The dependency topology graph of all AST Nodes in the repository. Formatted as Identity => Node mapping, where each Node contains dependency relationships with other nodes. diff --git a/docs/uniast-zh.md b/docs/uniast-zh.md index 0cca4928..8fb05aea 100644 --- a/docs/uniast-zh.md +++ b/docs/uniast-zh.md @@ -1,4 +1,4 @@ -# Universal Abstract-Syntax-Tree Specification (v0.1.3) +# Universal Abstract-Syntax-Tree Specification (v0.2.0) Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的定义及其相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。 @@ -371,12 +371,29 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 - Vars: 当前函数内引用的全局量,包括变量和常量 +- Extra: 额外信息,用于存储一些语言特定的信息,或者是一些额外的元数据 + + + - AnonymousFunctions: 函数中所定义的匿名函数,每个元素为对应函数的 FileLine + + + - File: 所在的文件名 + + + - Line: **起始位置文件的行号(从1开始)** + + + - StartOffset: 代码起始位置**相对文件头的字节偏移量** + + + - EndOffset: 代码结束位置**相对文件头的字节偏移量** + ###### Dependency 表示一个依赖关系,包含依赖节点 Id、依赖产生位置等信息,方便 LLM 准确识别 -``` +```json { "ModPath": "github.com/cloudwego/localsession", "PkgPath": "github.com/cloudwego/localsession", @@ -384,7 +401,10 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 "File": "manager.go", "Line": 140, "StartOffset": 3547, - "EndOffset": 3564 + "EndOffset": 3564, + "Extra": { + "FunctionIsCall": true + } } ``` @@ -409,6 +429,12 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 - EndOffset: 依赖点(不是被依赖节点)token 结束位置相对代码文件的偏移 +- Extra: 额外信息,用于存储一些语言特定的信息,或者是一些额外的元数据 + + + - FunctionIsCall: 如果 Dependency 是一个函数调用,是否真正执行了函数调用,而不是只是引用了函数 + + ##### Type 类型定义,【NodeType】为 TYPE,包括具体语言中的类型定义,如 结构体、枚举、接口、类型别名等 @@ -490,6 +516,9 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 - Implements: 该类型实现了哪些接口 **Identity** +- Extra: 额外信息,用于存储一些语言特定的信息,或者是一些额外的元数据 + + ##### Var 全局量,包括变量和常量,**但是必须是全局** @@ -553,6 +582,24 @@ var x = getx(y db.Data) int { - Groups: 同组定义, 如 Go 中的 `const( A=1, B=2, C=3)`,Groups 为 `[C=3, B=2]`(假设 A 为变量自身) +- Extra: 额外信息,用于存储一些语言特定的信息,或者是一些额外的元数据 + + + - AnonymousFunctions: 在当前变量的初始化函数中,所定义的匿名函数。每个元素为对应函数的 FileLine + + + - File: 所在的文件名 + + + - Line: **起始位置文件的行号(从1开始)** + + + - StartOffset: 代码起始位置**相对文件头的字节偏移量** + + + - EndOffset: 代码结束位置**相对文件头的字节偏移量** + + ### Graph 整个仓库的 AST Node 依赖拓扑图。形式为 Identity => Node 的映射,其中每个 Node 包含对其它节点的依赖关系。基于该拓扑图,可以实现**任意节点上下文的递归获取**。 diff --git a/lang/uniast/version.go b/lang/uniast/version.go index 99a9f558..87a8373c 100644 --- a/lang/uniast/version.go +++ b/lang/uniast/version.go @@ -16,4 +16,4 @@ package uniast -const Version = "v0.1.4" +const Version = "v0.2.0" From 8344691f1e9ab0086a639a2c506dcecc451d19e1 Mon Sep 17 00:00:00 2001 From: "wangzekun.zekin" Date: Tue, 27 Jan 2026 20:07:35 +0800 Subject: [PATCH 4/4] use const --- .github/workflows/regression.yml | 1 + lang/golang/parser/file.go | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 9d806318..36edd03e 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -19,6 +19,7 @@ jobs: ['id'] ['Path'] ['ToolVersion'] + ['ASTVersion'] ['Modules']['a.b/c']['Dependencies']['a.b/c'] ['Modules']['a.b/c/cmdx']['Dependencies']['a.b/c/cmdx'] steps: diff --git a/lang/golang/parser/file.go b/lang/golang/parser/file.go index 12b748c7..95d503f4 100644 --- a/lang/golang/parser/file.go +++ b/lang/golang/parser/file.go @@ -26,6 +26,11 @@ import ( . "github.com/cloudwego/abcoder/lang/uniast" ) +const ( + ExtraKey_FunctionIsCall = "FunctionIsCall" + ExtraKey_AnonymousFunctions = "AnonymousFunctions" +) + func (p *GoParser) parseFile(ctx *fileContext, f *ast.File) error { cont := true ast.Inspect(f, func(node ast.Node) bool { @@ -142,12 +147,12 @@ func (p *GoParser) parseVar(ctx *fileContext, vspec *ast.ValueSpec, isConst bool if len(collects.directCalls) > 0 { for i, dep := range v.Dependencies { if collects.directCalls[dep.FileLine] { - v.Dependencies[i].SetExtra("FunctionIsCall", true) + v.Dependencies[i].SetExtra(ExtraKey_FunctionIsCall, true) } } } if len(collects.anonymousFunctions) > 0 { - v.SetExtra("AnonymousFunctions", collects.anonymousFunctions) + v.SetExtra(ExtraKey_AnonymousFunctions, collects.anonymousFunctions) } } @@ -584,17 +589,17 @@ set_func: if len(collects.directCalls) > 0 { for i, dep := range f.FunctionCalls { if collects.directCalls[dep.FileLine] { - f.FunctionCalls[i].SetExtra("FunctionIsCall", true) + f.FunctionCalls[i].SetExtra(ExtraKey_FunctionIsCall, true) } } for i, dep := range f.MethodCalls { if collects.directCalls[dep.FileLine] { - f.MethodCalls[i].SetExtra("FunctionIsCall", true) + f.MethodCalls[i].SetExtra(ExtraKey_FunctionIsCall, true) } } } if len(collects.anonymousFunctions) > 0 { - f.SetExtra("AnonymousFunctions", collects.anonymousFunctions) + f.SetExtra(ExtraKey_AnonymousFunctions, collects.anonymousFunctions) } return f, false }