From b8ca61e1560ab55553f41f81ee22c443e737b4cc Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Sun, 15 Feb 2026 17:38:48 -0800 Subject: [PATCH 1/6] Add parameter type signatures for IL methods/calls Extract and propagate parenthesized parameter type signatures to enable overload-precise identification and matching of methods and unresolved call targets. - Extractor: ILExtractor now emits il_method_param_signature and il_call_target_param_signature tuples. - DB schema: Added il_method_param_signature and il_call_target_param_signature to semmlecode.binary.dbscheme. - QL API/AST: Exposed/getters for param signatures across CilInstructions, IR, InstructionSig, TranslatedElement/Function/Instruction and transform layers so signatures flow through translation. - Translated implementations: TranslatedCilMethod and relevant translated call/new-object logic return the extracted signatures; non-CIL backends return wildcards where appropriate. - VulnerableCalls: Expanded the vulnerableCallModel and related predicates to include paramSignature and updated matching logic to accept exact signatures or wildcard '*'. - Models: Updated example YAML models to include a '*' paramSignature for existing entries. This change improves precision when matching overloaded methods for analyses such as vulnerable-call detection. --- .../ILExtractor.cs | 9 ++++ .../extractor/cil/semmlecode.binary.dbscheme | 22 ++++++++++ .../binary/ast/internal/CilInstructions.qll | 6 +++ .../ql/lib/semmle/code/binary/ast/ir/IR.qll | 6 +++ .../ast/ir/internal/Instruction0/Function.qll | 3 ++ .../ir/internal/Instruction0/Instruction.qll | 3 ++ .../Instruction0/TranslatedElement.qll | 6 +++ .../Instruction0/TranslatedFunction.qll | 11 +++++ .../Instruction0/TranslatedInstruction.qll | 12 ++++++ .../binary/ast/ir/internal/InstructionSig.qll | 3 ++ .../TransformInstruction.qll | 8 ++++ .../src/VulnerableCalls/VulnerableCalls.qll | 43 ++++++++++++------- .../VulnerableCallsSummarize.ql | 12 ++++-- .../models/java-test-model.yml | 2 +- .../src/VulnerableCalls/models/test-model.yml | 2 +- 15 files changed, 127 insertions(+), 21 deletions(-) diff --git a/binary/extractor/cil/Semmle.Extraction.CSharp.IL/ILExtractor.cs b/binary/extractor/cil/Semmle.Extraction.CSharp.IL/ILExtractor.cs index 352fe644f9ac..a3ce67d4d0f3 100644 --- a/binary/extractor/cil/Semmle.Extraction.CSharp.IL/ILExtractor.cs +++ b/binary/extractor/cil/Semmle.Extraction.CSharp.IL/ILExtractor.cs @@ -111,6 +111,11 @@ private void ExtractMethod(MethodDefinition method, int typeId) { // Write access flags trap.WriteTuple("cil_method_access_flags", methodId, (int)method.Attributes); + // Write parameter type signature for overload-precise identification + var methodParamTypes = string.Join(",", + method.Parameters.Select(p => p.ParameterType.FullName.Replace('/', '.'))); + trap.WriteTuple("il_method_param_signature", methodId, $"({methodParamTypes})"); + if (method.HasBody) { ExtractMethodBody(method, methodId); } @@ -182,6 +187,10 @@ private void ExtractMethodBody(MethodDefinition method, int methodId) { var targetMethodName = $"{declaringTypeName}.{methodRef.Name}"; trap.WriteTuple("il_call_target_unresolved", instrId, targetMethodName); trap.WriteTuple("il_number_of_arguments", instrId, methodRef.Parameters.Count); + // Emit parameter type signature for overload-precise matching + var paramTypes = string.Join(",", + methodRef.Parameters.Select(p => p.ParameterType.FullName.Replace('/', '.'))); + trap.WriteTuple("il_call_target_param_signature", instrId, $"({paramTypes})"); if(methodRef.MethodReturnType.ReturnType.MetadataType is not Mono.Cecil.MetadataType.Void) { trap.WriteTuple("il_call_has_return_value", instrId); } diff --git a/binary/extractor/cil/semmlecode.binary.dbscheme b/binary/extractor/cil/semmlecode.binary.dbscheme index 1332f79f579b..98f4ed668212 100644 --- a/binary/extractor/cil/semmlecode.binary.dbscheme +++ b/binary/extractor/cil/semmlecode.binary.dbscheme @@ -2467,6 +2467,28 @@ il_call_target_unresolved( string target_method_name: string ref ); +/** + * Parameter type signature for method definitions. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise identification of methods during export. + */ +il_method_param_signature( + int method: @method ref, + string param_signature: string ref +); + +/** + * Parameter type signature for unresolved method call targets. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise matching of call targets. + */ +il_call_target_param_signature( + int instruction: @il_instruction ref, + string param_signature: string ref +); + il_field_operand( int instruction: @il_instruction ref, string declaring_type_name: string ref, diff --git a/binary/ql/lib/semmle/code/binary/ast/internal/CilInstructions.qll b/binary/ql/lib/semmle/code/binary/ast/internal/CilInstructions.qll index 786077adabf2..50805dddeeb6 100644 --- a/binary/ql/lib/semmle/code/binary/ast/internal/CilInstructions.qll +++ b/binary/ql/lib/semmle/code/binary/ast/internal/CilInstructions.qll @@ -141,6 +141,9 @@ class CilMethod extends @method { result.getIndex() = i } + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getParamSignature() { il_method_param_signature(this, result) } + CilType getDeclaringType() { methods(this, _, _, result) } Location getLocation() { none() } // TODO: Extract @@ -430,6 +433,9 @@ abstract class CilCallOrNewObject extends CilInstruction { final int getNumberOfArguments() { il_number_of_arguments(this, result) } final string getExternalName() { il_call_target_unresolved(this, result) } + + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + final string getParamSignature() { il_call_target_param_signature(this, result) } } abstract class CilCall extends CilCallOrNewObject { diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/IR.qll b/binary/ql/lib/semmle/code/binary/ast/ir/IR.qll index 58760eebf4b1..1c1f47418ea2 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/IR.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/IR.qll @@ -25,6 +25,9 @@ private module FinalInstruction { predicate isPublic() { super.isPublic() } + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getParamSignature() { result = super.getParamSignature() } + /** * Gets the fully qualified name of this method in the format: * "Namespace.ClassName.MethodName". @@ -302,6 +305,9 @@ private module FinalInstruction { class ExternalRefInstruction extends Instruction instanceof Instruction::ExternalRefInstruction { string getExternalName() { result = super.getExternalName() } + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getExternalParamSignature() { result = super.getExternalParamSignature() } + cached predicate hasFullyQualifiedName(string namespace, string className, string methodName) { exists(string s, string r | diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Function.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Function.qll index 89ec7491e2e2..cd63bc61176f 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Function.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Function.qll @@ -25,5 +25,8 @@ class Function extends TFunction { predicate isPublic() { f.isPublic() } + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getParamSignature() { result = f.getParamSignature() } + Type getDeclaringType() { result.getAFunction() = this } } diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Instruction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Instruction.qll index f8f0370c2419..2c03b1107f1f 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Instruction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/Instruction.qll @@ -176,6 +176,9 @@ class ExternalRefInstruction extends Instruction { string getExternalName() { result = te.getExternalName(tag) } + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getExternalParamSignature() { result = te.getExternalParamSignature(tag) } + final override string getImmediateValue() { result = this.getExternalName() } } diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedElement.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedElement.qll index 313939405aa4..a4a904df770c 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedElement.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedElement.qll @@ -263,6 +263,12 @@ abstract class TranslatedElement extends TTranslatedElement { */ string getExternalName(InstructionTag tag) { none() } + /** + * Gets the parameter type signature for an external call with the given tag, e.g. + * `(System.String,System.Int32)`. This `tag` must refer to an `ExternalRef` instruction. + */ + string getExternalParamSignature(InstructionTag tag) { none() } + /** * Gets the name of the field referenced by an instruction with the given tag. This `tag` must refer to * a `FieldAddress` instruction (that is, an instruction for which diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll index a7faa107ef31..1c073a8b42a1 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll @@ -47,6 +47,9 @@ abstract class TranslatedFunction extends TranslatedElement { abstract string getName(); + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + abstract string getParamSignature(); + final override string toString() { result = "Translation of " + this.getName() } abstract predicate isProgramEntryPoint(); @@ -116,6 +119,9 @@ class TranslatedX86Function extends TranslatedFunction, TTranslatedX86Function { final override predicate isPublic() { entry instanceof Raw::X86ExportedEntryInstruction } + // x86 does not have parameter type signatures + final override string getParamSignature() { result = "*" } + final override predicate hasOrdering(LocalVariableTag tag, int ordering) { exists(Raw::X86Register r | tag = X86RegisterTag(r) | // TODO: This hardcodes X64 calling convention for Windows @@ -217,6 +223,8 @@ class TranslatedCilMethod extends TranslatedFunction, TTranslatedCilMethod { override string getName() { result = method.getName() } + override string getParamSignature() { result = method.getParamSignature() } + override predicate isProgramEntryPoint() { none() } override predicate isPublic() { method.isPublic() } @@ -321,6 +329,9 @@ class TranslatedJvmMethod extends TranslatedFunction, TTranslatedJvmMethod { override string getName() { result = method.getName() } + // JVM does not currently extract parameter type signatures + override string getParamSignature() { result = "*" } + override predicate isProgramEntryPoint() { none() } override predicate isPublic() { method.isPublic() } diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll index 29ed38b39849..951f2773e8b5 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll @@ -2169,6 +2169,12 @@ class TranslatedCilCall extends TranslatedCilInstruction, TTranslatedCilCall { result = instr.getExternalName() } + override string getExternalParamSignature(InstructionTag tag) { + not exists(instr.getTarget()) and + tag = CilCallTargetTag() and + result = instr.getParamSignature() + } + override Instruction getChildSuccessor(TranslatedElement child, SuccessorType succType) { none() } override Instruction getSuccessor(InstructionTag tag, SuccessorType succType) { @@ -2432,6 +2438,12 @@ class TranslatedNewObject extends TranslatedCilInstruction, TTranslatedNewObject result = instr.getExternalName() } + override string getExternalParamSignature(InstructionTag tag) { + not exists(instr.getConstructor()) and + tag = CilCallTargetTag() and + result = instr.getParamSignature() + } + override predicate hasTempVariable(TempVariableTag tag) { tag = CilNewObjInitVarTag() or diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll index 4cb91bf02750..b1b71b11641e 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll @@ -202,6 +202,9 @@ signature module InstructionSig { class ExternalRefInstruction extends Instruction { string getExternalName(); + + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getExternalParamSignature(); } class SubInstruction extends BinaryInstruction; diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll index cb19342fdc19..806c6810ec90 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll @@ -730,6 +730,14 @@ module Transform { result = extRef.getExternalName() ) } + + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getExternalParamSignature() { + exists(Input::ExternalRefInstruction extRef | + this = TOldInstruction(extRef) and + result = extRef.getExternalParamSignature() + ) + } } class FieldAddressInstruction extends Instruction { diff --git a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll index 8a4041bcdc6d..57c6c6a69451 100644 --- a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll +++ b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll @@ -7,13 +7,17 @@ private import binary private import semmle.code.binary.ast.ir.IR /** - * Holds if any call identified by `(namespace, className, methodName)` should be flagged - * as potentially vulnerable, for reasons explained by the advisory with the given `id`. + * Holds if any call identified by `(namespace, className, methodName, paramSignature)` should be + * flagged as potentially vulnerable, for reasons explained by the advisory with the given `id`. + * + * `paramSignature` is a comma-separated list of fully-qualified parameter types enclosed in + * parentheses, e.g. `(System.String,System.Int32)`. An empty signature `()` matches methods + * with no parameters. A wildcard `*` matches any overload. * * This is an extensible predicate - values are provided via YAML data extensions. */ extensible predicate vulnerableCallModel( - string namespace, string className, string methodName, string id + string namespace, string className, string methodName, string paramSignature, string id ); /** @@ -23,12 +27,17 @@ class VulnerableMethodCall extends CallInstruction { string vulnerabilityId; VulnerableMethodCall() { - exists(string namespace, string className, string methodName | - vulnerableCallModel(namespace, className, methodName, vulnerabilityId) and - this.getTargetOperand() - .getAnyDef() - .(ExternalRefInstruction) - .hasFullyQualifiedName(namespace, className, methodName) + exists(string namespace, string className, string methodName, string paramSignature | + vulnerableCallModel(namespace, className, methodName, paramSignature, vulnerabilityId) and + exists(ExternalRefInstruction extRef | + extRef = this.getTargetOperand().getAnyDef() and + extRef.hasFullyQualifiedName(namespace, className, methodName) and + ( + paramSignature = "*" + or + extRef.getExternalParamSignature() = paramSignature + ) + ) ) } @@ -125,26 +134,30 @@ Function getAPublicVulnerableMethod(string id) { */ module ExportedVulnerableCalls { /** - * Holds if `(namespace, className, methodName)` identifies a method that + * Holds if `(namespace, className, methodName, paramSignature)` identifies a method that * leads to a vulnerable call identified by `id`. */ - predicate pathToVulnerableMethod(string namespace, string className, string methodName, string id) { + predicate pathToVulnerableMethod( + string namespace, string className, string methodName, string paramSignature, string id + ) { exists(Function m | m = getAVulnerableMethod(id) and - m.hasFullyQualifiedName(namespace, className, methodName) + m.hasFullyQualifiedName(namespace, className, methodName) and + paramSignature = m.getParamSignature() ) } /** - * Holds if `(namespace, className, methodName)` identifies a public method + * Holds if `(namespace, className, methodName, paramSignature)` identifies a public method * that leads to a vulnerable call identified by `id`. */ predicate publicPathToVulnerableMethod( - string namespace, string className, string methodName, string id + string namespace, string className, string methodName, string paramSignature, string id ) { exists(Function m | m = getAPublicVulnerableMethod(id) and - m.hasFullyQualifiedName(namespace, className, methodName) + m.hasFullyQualifiedName(namespace, className, methodName) and + paramSignature = m.getParamSignature() ) } } diff --git a/binary/ql/src/VulnerableCalls/VulnerableCallsSummarize.ql b/binary/ql/src/VulnerableCalls/VulnerableCallsSummarize.ql index 5def9a503c9c..e689c9f61dbf 100644 --- a/binary/ql/src/VulnerableCalls/VulnerableCallsSummarize.ql +++ b/binary/ql/src/VulnerableCalls/VulnerableCallsSummarize.ql @@ -13,17 +13,21 @@ import semmle.code.binary.ast.ir.IR * Exports all methods that can reach vulnerable calls. * Output format matches the vulnerableCallModel extensible predicate for iterative analysis. */ -query predicate vulnerableCallModel(string namespace, string className, string methodName, string id) { - ExportedVulnerableCalls::pathToVulnerableMethod(namespace, className, methodName, id) +query predicate vulnerableCallModel( + string namespace, string className, string methodName, string paramSignature, string id +) { + ExportedVulnerableCalls::pathToVulnerableMethod(namespace, className, methodName, paramSignature, + id) } /** * Exports only public methods that reach vulnerable calls (for API surface analysis). */ query predicate publicVulnerableCallModel( - string namespace, string className, string methodName, string id + string namespace, string className, string methodName, string paramSignature, string id ) { - ExportedVulnerableCalls::publicPathToVulnerableMethod(namespace, className, methodName, id) + ExportedVulnerableCalls::publicPathToVulnerableMethod(namespace, className, methodName, + paramSignature, id) } /** diff --git a/binary/ql/src/VulnerableCalls/models/java-test-model.yml b/binary/ql/src/VulnerableCalls/models/java-test-model.yml index 0e57e8f8f9ea..a677ab018f49 100644 --- a/binary/ql/src/VulnerableCalls/models/java-test-model.yml +++ b/binary/ql/src/VulnerableCalls/models/java-test-model.yml @@ -3,4 +3,4 @@ extensions: pack: binary/vulnerable-calls extensible: vulnerableCallModel data: - - ["java.io", "PrintStream", "println", "TEST-JAVA-001"] + - ["java.io", "PrintStream", "println", "*", "TEST-JAVA-001"] diff --git a/binary/ql/src/VulnerableCalls/models/test-model.yml b/binary/ql/src/VulnerableCalls/models/test-model.yml index d43627c1ed3d..076b5e90e37d 100644 --- a/binary/ql/src/VulnerableCalls/models/test-model.yml +++ b/binary/ql/src/VulnerableCalls/models/test-model.yml @@ -3,4 +3,4 @@ extensions: pack: binary/vulnerable-calls extensible: vulnerableCallModel data: - - ["System", "Console", "WriteLine", "TEST-VULN-001"] + - ["System", "Console", "WriteLine", "*", "TEST-VULN-001"] From b63270e42eae7fc80344bb6c2f43d6b407372471 Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Fri, 27 Feb 2026 08:14:40 -0800 Subject: [PATCH 2/6] Add method param signatures and JVM stack metadata Expose a getParamSignature API on InstructionSig (and the TransformInstruction implementation) to return parenthesized parameter-type signatures (e.g. "(System.String,System.Int32)"). Extend the extraction DB schema with il_method_param_signature and il_call_target_param_signature to enable overload-precise method identification, and add jvm_stack_height and jvm_stack_slot tables to record JVM stack heights and map stack slots to producer instructions to simplify stack-based dataflow analysis. --- .../binary/ast/ir/internal/InstructionSig.qll | 3 ++ .../TransformInstruction.qll | 3 ++ binary/ql/lib/semmlecode.binary.dbscheme | 46 +++++++++++++++++++ .../src/VulnerableCalls/codeql-pack.lock.yml | 4 ++ 4 files changed, 56 insertions(+) create mode 100644 binary/ql/src/VulnerableCalls/codeql-pack.lock.yml diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll index b1b71b11641e..c0612aab69f3 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/InstructionSig.qll @@ -34,6 +34,9 @@ signature module InstructionSig { Type getDeclaringType(); predicate isPublic(); + + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getParamSignature(); } class Operand { diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll index 806c6810ec90..9adfef639586 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/TransformInstruction/TransformInstruction.qll @@ -184,6 +184,9 @@ module Transform { Type getDeclaringType() { result = super.getDeclaringType() } predicate isPublic() { super.isPublic() } + + /** Gets the parenthesized parameter type signature, e.g. `(System.String,System.Int32)`. */ + string getParamSignature() { result = super.getParamSignature() } } class Type instanceof Input::Type { diff --git a/binary/ql/lib/semmlecode.binary.dbscheme b/binary/ql/lib/semmlecode.binary.dbscheme index e6cff58b0101..98f4ed668212 100644 --- a/binary/ql/lib/semmlecode.binary.dbscheme +++ b/binary/ql/lib/semmlecode.binary.dbscheme @@ -2467,6 +2467,28 @@ il_call_target_unresolved( string target_method_name: string ref ); +/** + * Parameter type signature for method definitions. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise identification of methods during export. + */ +il_method_param_signature( + int method: @method ref, + string param_signature: string ref +); + +/** + * Parameter type signature for unresolved method call targets. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise matching of call targets. + */ +il_call_target_param_signature( + int instruction: @il_instruction ref, + string param_signature: string ref +); + il_field_operand( int instruction: @il_instruction ref, string declaring_type_name: string ref, @@ -2966,3 +2988,27 @@ jvm_method_access_flags( unique int method: @method ref, int flags: int ref ); + +/** + * Stack height at entry to a JVM instruction. + * This is computed by abstract interpretation during extraction. + */ +jvm_stack_height( + unique int instr: @jvm_instruction ref, + int height: int ref +); + +/** + * Maps a stack slot at a specific instruction to the instruction that produced the value. + * slot 0 is the top of the stack, slot 1 is below that, etc. + * producer_id is the instruction ID that pushed this value onto the stack. + * + * This allows QL to determine data flow through the operand stack without + * expensive recursive CFG traversal. + */ +#keyset[instr, slot] +jvm_stack_slot( + int instr: @jvm_instruction ref, + int slot: int ref, + int producer_id: @jvm_instruction ref +); diff --git a/binary/ql/src/VulnerableCalls/codeql-pack.lock.yml b/binary/ql/src/VulnerableCalls/codeql-pack.lock.yml new file mode 100644 index 000000000000..53004274575d --- /dev/null +++ b/binary/ql/src/VulnerableCalls/codeql-pack.lock.yml @@ -0,0 +1,4 @@ +--- +lockVersion: 1.0.0 +dependencies: {} +compiled: false From fb379462c423bafd76be521ad5581bd0eb9fb181 Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:10:48 -0800 Subject: [PATCH 3/6] Include same-assembly method definitions in vulnerable method closure For root cause mode analysis, where the vulnerable methods being traced are defined in the same binary being analyzed (not referenced cross-assembly), getAVulnerableMethod needs a base case that matches method definitions by their fully-qualified name and parameter signature. Previously, only cross-assembly calls via ExternalRefInstruction were matched as the base case. Intra-assembly calls are handled by the existing transitive getStaticTarget() clause, but the closure never started because the base case only found external ref call sites. The new clause matches methods defined in the current binary against the model, respecting the paramSignature field (including wildcard '*'). For standard cross-assembly analysis this is a no-op since the model methods won't be defined in the binary being analyzed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- binary/ql/src/VulnerableCalls/VulnerableCalls.qll | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll index 57c6c6a69451..475016fe9a7b 100644 --- a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll +++ b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll @@ -95,9 +95,20 @@ Function getStateMachineImplementation(Function stub) { isStateMachineImplementa * state machine implementations. */ Function getAVulnerableMethod(string id) { - // Direct call to vulnerable method + // Direct call to vulnerable method (cross-assembly via ExternalRef) result = getADirectlyVulnerableMethod(id) or + // Method defined in this binary that matches the model. + // This handles root cause mode where the vulnerable method is in the same + // package being analyzed, not referenced cross-assembly via ExternalRef. + // The result set includes the root cause methods themselves plus all their + // transitive callers, filtered downstream to public methods for export. + exists(string namespace, string className, string methodName, string paramSignature | + vulnerableCallModel(namespace, className, methodName, paramSignature, id) and + result.hasFullyQualifiedName(namespace, className, methodName) and + (paramSignature = "*" or result.getParamSignature() = paramSignature) + ) + or // Transitive: method calls another method that is vulnerable (via ExternalRef for external calls) exists(CallInstruction call, Function callee | call.getEnclosingFunction() = result and From 81a6b80010e5901aa3702934e24a8a4a01f362cb Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:12:53 -0800 Subject: [PATCH 4/6] Sync JVM extractor dbscheme with ql lib The ql lib dbscheme was updated with il_method_param_signature, il_call_target_param_signature, jvm_stack_height, and jvm_stack_slot tables but the JVM extractor's copy was not updated. This causes a schema mismatch when building a JVM database and then running the binary-ql queries against it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../extractor/jvm/semmlecode.binary.dbscheme | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/binary/extractor/jvm/semmlecode.binary.dbscheme b/binary/extractor/jvm/semmlecode.binary.dbscheme index e6cff58b0101..98f4ed668212 100644 --- a/binary/extractor/jvm/semmlecode.binary.dbscheme +++ b/binary/extractor/jvm/semmlecode.binary.dbscheme @@ -2467,6 +2467,28 @@ il_call_target_unresolved( string target_method_name: string ref ); +/** + * Parameter type signature for method definitions. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise identification of methods during export. + */ +il_method_param_signature( + int method: @method ref, + string param_signature: string ref +); + +/** + * Parameter type signature for unresolved method call targets. + * The param_signature is a parenthesized, comma-separated list of fully-qualified + * parameter type names, e.g. "(System.String,System.Int32)" or "()" for no parameters. + * This enables overload-precise matching of call targets. + */ +il_call_target_param_signature( + int instruction: @il_instruction ref, + string param_signature: string ref +); + il_field_operand( int instruction: @il_instruction ref, string declaring_type_name: string ref, @@ -2966,3 +2988,27 @@ jvm_method_access_flags( unique int method: @method ref, int flags: int ref ); + +/** + * Stack height at entry to a JVM instruction. + * This is computed by abstract interpretation during extraction. + */ +jvm_stack_height( + unique int instr: @jvm_instruction ref, + int height: int ref +); + +/** + * Maps a stack slot at a specific instruction to the instruction that produced the value. + * slot 0 is the top of the stack, slot 1 is below that, etc. + * producer_id is the instruction ID that pushed this value onto the stack. + * + * This allows QL to determine data flow through the operand stack without + * expensive recursive CFG traversal. + */ +#keyset[instr, slot] +jvm_stack_slot( + int instr: @jvm_instruction ref, + int slot: int ref, + int producer_id: @jvm_instruction ref +); From 3e61cc72af6b2772dad103bf985f82bd0845ee2a Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:25:53 -0800 Subject: [PATCH 5/6] Add parameter type signature extraction for JVM bytecode The CIL extractor already emits il_method_param_signature and il_call_target_param_signature for overload-precise method matching. This commit adds the same capability to the JVM bytecode extractor. JVM extractor changes: - ParseParamSignature: converts JVM descriptors (e.g. '(Ljava/lang/Object;JJ)V') to human-readable signatures (e.g. '(Object,long,long)') - ExtractMethod: emits il_method_param_signature for method definitions - ExtractMethodRef: emits il_call_target_param_signature for call sites QL library changes: - JvmMethod: add getParamSignature() backed by il_method_param_signature - JvmInvoke: add getParamSignature() backed by il_call_target_param_signature - TranslatedJvmInvoke: wire getExternalParamSignature to instr.getParamSignature() - TranslatedJvmFunction: use method.getParamSignature() instead of wildcard '*' VulnerableCalls.qll: - VulnerableMethodCall: handle case where extRef lacks param signature (backwards compat for databases built before this change) - Root cause base case: handle functions with wildcard param signature Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../JvmExtractor.cs | 70 +++++++++++++++++++ .../binary/ast/internal/JvmInstructions.qll | 5 ++ .../Instruction0/TranslatedFunction.qll | 3 +- .../Instruction0/TranslatedInstruction.qll | 5 ++ .../src/VulnerableCalls/VulnerableCalls.qll | 8 ++- 5 files changed, 88 insertions(+), 3 deletions(-) diff --git a/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs b/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs index 645a9a174146..b814d628c429 100644 --- a/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs +++ b/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs @@ -142,6 +142,10 @@ private void ExtractMethod(Method method, int typeId, ClassFile classFile, strin // Extract access flags as raw bitmask trap.WriteTuple("jvm_method_access_flags", methodId, (int)method.AccessFlags); + // Write parameter type signature for overload-precise identification + var descriptorUtf8ForSig = classFile.Constants.Get(method.Descriptor); + trap.WriteTuple("il_method_param_signature", methodId, ParseParamSignature(descriptorUtf8ForSig.Value)); + // Check if this is a static method (for parameter indexing) bool isStatic = (method.AccessFlags & AccessFlag.Static) != 0; @@ -647,6 +651,12 @@ private void ExtractMethodRef(Instruction instr, int instrId, ClassFile classFil int paramCount = CountParameters(descriptor); trap.WriteTuple("jvm_number_of_arguments", instrId, paramCount); + // Write parameter type signature for overload-precise matching + if (!string.IsNullOrEmpty(descriptor)) + { + trap.WriteTuple("il_call_target_param_signature", instrId, ParseParamSignature(descriptor)); + } + if (!IsVoidReturn(descriptor)) { trap.WriteTuple("jvm_call_has_return_value", instrId); @@ -782,6 +792,66 @@ private static int CountParameters(string descriptor) return count; } + /// + /// Converts a JVM method descriptor to a parenthesized, comma-separated + /// parameter type signature, e.g. "(Ljava/lang/Object;JJ)V" becomes + /// "(Object,long,long)". + /// + private static string ParseParamSignature(string descriptor) + { + if (!descriptor.StartsWith("(")) + return "(*)"; + + int closeParenIdx = descriptor.IndexOf(')'); + if (closeParenIdx < 0) + return "(*)"; + + var paramPart = descriptor.Substring(1, closeParenIdx - 1); + var types = new System.Collections.Generic.List(); + int i = 0; + while (i < paramPart.Length) + { + int arrayDims = 0; + while (i < paramPart.Length && paramPart[i] == '[') + { + arrayDims++; + i++; + } + + if (i >= paramPart.Length) + break; + + string baseType; + char c = paramPart[i]; + switch (c) + { + case 'B': baseType = "byte"; i++; break; + case 'C': baseType = "char"; i++; break; + case 'D': baseType = "double"; i++; break; + case 'F': baseType = "float"; i++; break; + case 'I': baseType = "int"; i++; break; + case 'J': baseType = "long"; i++; break; + case 'S': baseType = "short"; i++; break; + case 'Z': baseType = "boolean"; i++; break; + case 'L': + int semiIdx = paramPart.IndexOf(';', i); + if (semiIdx < 0) semiIdx = paramPart.Length; + // Extract class name, convert / to ., strip leading L + baseType = paramPart.Substring(i + 1, semiIdx - i - 1).Replace('/', '.'); + i = semiIdx + 1; + break; + default: + baseType = "?"; + i++; + break; + } + + types.Add(baseType + new string('[', arrayDims) + new string(']', arrayDims)); + } + + return "(" + string.Join(",", types) + ")"; + } + private static bool IsVoidReturn(string descriptor) { return descriptor.EndsWith(")V"); diff --git a/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll b/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll index 51ac1f659505..65be49b50da3 100644 --- a/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll +++ b/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll @@ -64,6 +64,9 @@ class JvmMethod extends @method { private string getSignature() { methods(this, _, result, _) } + /** Gets the parenthesized parameter type signature, e.g. `(Object,long,long)`. */ + string getParamSignature() { il_method_param_signature(this, result) } + predicate isVoid() { this.getSignature().matches("%)V") } JvmInstruction getAnInstruction() { jvm_instruction_method(result, this) } @@ -1209,6 +1212,8 @@ class JvmPutfield extends @jvm_putfield, JvmFieldStore { } abstract class JvmInvoke extends JvmInstruction { string getCallTarget() { jvm_call_target_unresolved(this, result) } + string getParamSignature() { il_call_target_param_signature(this, result) } + int getNumberOfArguments() { jvm_number_of_arguments(this, result) } predicate hasReturnValue() { jvm_call_has_return_value(this) } diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll index 1c073a8b42a1..6df91a4cccdd 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedFunction.qll @@ -329,8 +329,7 @@ class TranslatedJvmMethod extends TranslatedFunction, TTranslatedJvmMethod { override string getName() { result = method.getName() } - // JVM does not currently extract parameter type signatures - override string getParamSignature() { result = "*" } + override string getParamSignature() { result = method.getParamSignature() } override predicate isProgramEntryPoint() { none() } diff --git a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll index 951f2773e8b5..dc9481e6d570 100644 --- a/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll +++ b/binary/ql/lib/semmle/code/binary/ast/ir/internal/Instruction0/TranslatedInstruction.qll @@ -2769,6 +2769,11 @@ class TranslatedJvmInvoke extends TranslatedJvmInstruction, TTranslatedJvmInvoke result = instr.getCallTarget() } + final override string getExternalParamSignature(InstructionTag tag) { + tag = JvmCallTargetTag() and + result = instr.getParamSignature() + } + override Instruction getChildSuccessor(TranslatedElement child, SuccessorType succType) { none() } override Instruction getSuccessor(InstructionTag tag, SuccessorType succType) { diff --git a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll index 475016fe9a7b..e44e736fbfa5 100644 --- a/binary/ql/src/VulnerableCalls/VulnerableCalls.qll +++ b/binary/ql/src/VulnerableCalls/VulnerableCalls.qll @@ -36,6 +36,8 @@ class VulnerableMethodCall extends CallInstruction { paramSignature = "*" or extRef.getExternalParamSignature() = paramSignature + or + not exists(extRef.getExternalParamSignature()) // JVM calls lack param signatures ) ) ) @@ -106,7 +108,11 @@ Function getAVulnerableMethod(string id) { exists(string namespace, string className, string methodName, string paramSignature | vulnerableCallModel(namespace, className, methodName, paramSignature, id) and result.hasFullyQualifiedName(namespace, className, methodName) and - (paramSignature = "*" or result.getParamSignature() = paramSignature) + ( + paramSignature = "*" or + result.getParamSignature() = paramSignature or + result.getParamSignature() = "*" // JVM functions don't have param signatures yet + ) ) or // Transitive: method calls another method that is vulnerable (via ExternalRef for external calls) From 2cbc39b3ec922c7723b3434902b69dd521a34e15 Mon Sep 17 00:00:00 2001 From: Giulia Stocco <98900+gfs@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:31:22 -0800 Subject: [PATCH 6/6] Fix JVM param signature to use JVM-specific dbscheme table il_call_target_param_signature references @il_instruction which is incompatible with JVM's @jvm_instruction type. Add jvm_call_target_param_signature table for JVM call target signatures and update the extractor and QL to use it. Also sync all extractor dbschemes (JVM and CIL) with the canonical ql/lib copy. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- binary/extractor/cil/semmlecode.binary.dbscheme | 10 ++++++++++ .../Semmle.Extraction.Java.ByteCode/JvmExtractor.cs | 2 +- binary/extractor/jvm/semmlecode.binary.dbscheme | 10 ++++++++++ .../code/binary/ast/internal/JvmInstructions.qll | 2 +- binary/ql/lib/semmlecode.binary.dbscheme | 10 ++++++++++ 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/binary/extractor/cil/semmlecode.binary.dbscheme b/binary/extractor/cil/semmlecode.binary.dbscheme index 98f4ed668212..032083f1cd27 100644 --- a/binary/extractor/cil/semmlecode.binary.dbscheme +++ b/binary/extractor/cil/semmlecode.binary.dbscheme @@ -3012,3 +3012,13 @@ jvm_stack_slot( int slot: int ref, int producer_id: @jvm_instruction ref ); + +/** + * Parameter type signature for JVM method call targets. + * The param_signature is a parenthesized, comma-separated list of human-readable + * parameter type names, e.g. "(Object,long,long)" or "()" for no parameters. + */ +jvm_call_target_param_signature( + int instruction: @jvm_instruction ref, + string param_signature: string ref +); diff --git a/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs b/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs index b814d628c429..ce851609515a 100644 --- a/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs +++ b/binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs @@ -654,7 +654,7 @@ private void ExtractMethodRef(Instruction instr, int instrId, ClassFile classFil // Write parameter type signature for overload-precise matching if (!string.IsNullOrEmpty(descriptor)) { - trap.WriteTuple("il_call_target_param_signature", instrId, ParseParamSignature(descriptor)); + trap.WriteTuple("jvm_call_target_param_signature", instrId, ParseParamSignature(descriptor)); } if (!IsVoidReturn(descriptor)) diff --git a/binary/extractor/jvm/semmlecode.binary.dbscheme b/binary/extractor/jvm/semmlecode.binary.dbscheme index 98f4ed668212..032083f1cd27 100644 --- a/binary/extractor/jvm/semmlecode.binary.dbscheme +++ b/binary/extractor/jvm/semmlecode.binary.dbscheme @@ -3012,3 +3012,13 @@ jvm_stack_slot( int slot: int ref, int producer_id: @jvm_instruction ref ); + +/** + * Parameter type signature for JVM method call targets. + * The param_signature is a parenthesized, comma-separated list of human-readable + * parameter type names, e.g. "(Object,long,long)" or "()" for no parameters. + */ +jvm_call_target_param_signature( + int instruction: @jvm_instruction ref, + string param_signature: string ref +); diff --git a/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll b/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll index 65be49b50da3..14eac8b98759 100644 --- a/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll +++ b/binary/ql/lib/semmle/code/binary/ast/internal/JvmInstructions.qll @@ -1212,7 +1212,7 @@ class JvmPutfield extends @jvm_putfield, JvmFieldStore { } abstract class JvmInvoke extends JvmInstruction { string getCallTarget() { jvm_call_target_unresolved(this, result) } - string getParamSignature() { il_call_target_param_signature(this, result) } + string getParamSignature() { jvm_call_target_param_signature(this, result) } int getNumberOfArguments() { jvm_number_of_arguments(this, result) } diff --git a/binary/ql/lib/semmlecode.binary.dbscheme b/binary/ql/lib/semmlecode.binary.dbscheme index 98f4ed668212..032083f1cd27 100644 --- a/binary/ql/lib/semmlecode.binary.dbscheme +++ b/binary/ql/lib/semmlecode.binary.dbscheme @@ -3012,3 +3012,13 @@ jvm_stack_slot( int slot: int ref, int producer_id: @jvm_instruction ref ); + +/** + * Parameter type signature for JVM method call targets. + * The param_signature is a parenthesized, comma-separated list of human-readable + * parameter type names, e.g. "(Object,long,long)" or "()" for no parameters. + */ +jvm_call_target_param_signature( + int instruction: @jvm_instruction ref, + string param_signature: string ref +);