From 4543c66d26ba2cd7df87869abfe32f45bd3ecbe4 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 13:49:17 +0000 Subject: [PATCH 01/16] Python: Prepare `LocalSourceNode` for locality Removes the dependence on the (global) `ModuleVariableNode.getARead()`, by adding a local version (that doesn't include `import *` reads) instead. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 10 +++++++--- .../python/dataflow/new/internal/LocalSources.qll | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 4d112bcdcddd..10ac89b023d8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -440,13 +440,17 @@ class ModuleVariableNode extends Node, TModuleVariableNode { /** Gets a node that reads this variable. */ Node getARead() { - result.asCfgNode() = var.getALoad().getAFlowNode() and - // Ignore reads that happen when the module is imported. These are only executed once. - not result.getScope() = mod + result = this.getALocalRead() or this = import_star_read(result) } + /** Gets a node that reads this variable, excluding reads that happen through `from ... import *`. */ + Node getALocalRead() { + result.asCfgNode() = var.getALoad().getAFlowNode() and + not result.getScope() = mod + } + /** Gets an `EssaNode` that corresponds to an assignment of this global variable. */ Node getAWrite() { any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode)) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index c43a111c9c8b..7752846ae1ff 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -67,7 +67,7 @@ class LocalSourceNode extends Node { or // We explicitly include any read of a global variable, as some of these may have local flow going // into them. - this = any(ModuleVariableNode mvn).getARead() + this = any(ModuleVariableNode v).getALocalRead() or // We include all scope entry definitions, as these act as the local source within the scope they // enter. @@ -248,7 +248,7 @@ private module Cached { pragma[nomagic] private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo, _) and - not nodeTo = any(ModuleVariableNode v).getARead() + not nodeTo = any(ModuleVariableNode v).getALocalRead() } /** From 30ce4069c7f57a158f9cd4fa0a1e3f35906538b2 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 17:01:11 +0000 Subject: [PATCH 02/16] Python: Remove global restriction on `ModuleVariableNode` This may result in more nodes, but it should still be bounded by the number of global variables in the source code. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 10ac89b023d8..0f508898c5aa 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -76,15 +76,7 @@ newtype TNode = node.getNode() = any(Comp c).getIterable() } or /** A node representing a global (module-level) variable in a specific module. */ - TModuleVariableNode(Module m, GlobalVariable v) { - v.getScope() = m and - ( - v.escapes() - or - isAccessedThroughImportStar(m) and - ImportStar::globalNameDefinedInModule(v.getId(), m) - ) - } or + TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m } or /** * A synthetic node representing that an iterable sequence flows to consumer. */ @@ -470,8 +462,6 @@ class ModuleVariableNode extends Node, TModuleVariableNode { override Location getLocation() { result = mod.getLocation() } } -private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) } - private ModuleVariableNode import_star_read(Node n) { resolved_import_star_module(result.getModule(), result.getVariable().getId(), n) } From ac5a74448fd8a6191d1c01060787eb70b7ce6e3f Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 12 Jan 2026 15:04:14 +0000 Subject: [PATCH 03/16] Python: Fix tests With `ModuleVariableNode`s now appearing for _all_ global variables (not just the ones that actually seem to be used), some of the tests changed a bit. Mostly this was in the form of new flow (because of new nodes that popped into existence). For some inline expectation tests, I opted to instead exclude these results, as there was no suitable location to annotate. For the normal tests, I just accepted the output (after having vetted it carefully, of course). --- .../utils/test/dataflow/MaximalFlowTest.qll | 4 ++- .../experimental/attrs/AttrWrites.expected | 1 + .../dataflow/basic/global.expected | 15 +++++++++++ .../dataflow/basic/globalStep.expected | 4 +++ .../dataflow/basic/local.expected | 5 ++++ .../dataflow/basic/maximalFlows.expected | 5 ++++ .../dataflow/basic/sinks.expected | 5 ++++ .../dataflow/basic/sources.expected | 5 ++++ .../dataflow/global-flow/test.py | 26 +++++++++---------- .../dataflow/typetracking/moduleattr.expected | 1 + .../PoorMansFunctionResolutionTest.ql | 4 ++- 11 files changed, 60 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index 7587584a269b..5e9831906a4a 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -8,7 +8,9 @@ module MaximalFlowTest implements FlowTestSig { predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) { source != sink and - MaximalFlows::flow(source, sink) + MaximalFlows::flow(source, sink) and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not sink instanceof DataFlow::ModuleVariableNode } } diff --git a/python/ql/test/experimental/attrs/AttrWrites.expected b/python/ql/test/experimental/attrs/AttrWrites.expected index f8a55824043f..d1fc30b34515 100644 --- a/python/ql/test/experimental/attrs/AttrWrites.expected +++ b/python/ql/test/experimental/attrs/AttrWrites.expected @@ -1,4 +1,5 @@ | test.py:5:9:5:16 | ControlFlowNode for __init__ | test.py:4:1:4:20 | ControlFlowNode for ClassExpr | __init__ | test.py:5:5:5:28 | ControlFlowNode for FunctionExpr | | test.py:6:9:6:16 | ControlFlowNode for Attribute | test.py:6:9:6:12 | ControlFlowNode for self | foo | test.py:6:20:6:22 | ControlFlowNode for foo | +| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:0:0:0:0 | ModuleVariableNode in Module test for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:9:1:9:5 | ControlFlowNode for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:12:1:12:25 | ControlFlowNode for setattr() | test.py:12:9:12:13 | ControlFlowNode for myobj | foo | test.py:12:23:12:24 | ControlFlowNode for IntegerLiteral | diff --git a/python/ql/test/library-tests/dataflow/basic/global.expected b/python/ql/test/library-tests/dataflow/basic/global.expected index 7d2c0cab9b92..9e0ef2e6751b 100644 --- a/python/ql/test/library-tests/dataflow/basic/global.expected +++ b/python/ql/test/library-tests/dataflow/basic/global.expected @@ -1,6 +1,9 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | @@ -8,26 +11,33 @@ | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:3:2:3 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:3:3:3 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | @@ -37,6 +47,8 @@ | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x | @@ -47,7 +59,10 @@ | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/globalStep.expected b/python/ql/test/library-tests/dataflow/basic/globalStep.expected index 00ee53dba003..26d8902e7bbe 100644 --- a/python/ql/test/library-tests/dataflow/basic/globalStep.expected +++ b/python/ql/test/library-tests/dataflow/basic/globalStep.expected @@ -1,5 +1,6 @@ | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | @@ -31,10 +32,13 @@ | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/local.expected b/python/ql/test/library-tests/dataflow/basic/local.expected index 142c84015aeb..eb47f1308d42 100644 --- a/python/ql/test/library-tests/dataflow/basic/local.expected +++ b/python/ql/test/library-tests/dataflow/basic/local.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected index a9fa5d8da920..421918620455 100644 --- a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected +++ b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected @@ -1,7 +1,12 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | diff --git a/python/ql/test/library-tests/dataflow/basic/sinks.expected b/python/ql/test/library-tests/dataflow/basic/sinks.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sinks.expected +++ b/python/ql/test/library-tests/dataflow/basic/sinks.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/sources.expected b/python/ql/test/library-tests/dataflow/basic/sources.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sources.expected +++ b/python/ql/test/library-tests/dataflow/basic/sources.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/global-flow/test.py b/python/ql/test/library-tests/dataflow/global-flow/test.py index ab2da1c2959d..0b62e47c71fb 100644 --- a/python/ql/test/library-tests/dataflow/global-flow/test.py +++ b/python/ql/test/library-tests/dataflow/global-flow/test.py @@ -8,9 +8,9 @@ g1, g2 = [6], [7] # $writes=g1 writes=g2 -# Assignment that's only referenced in this scope. This one will not give rise to a `ModuleVariableNode`. +# Assignment that's only referenced in this scope. -unreferenced_g = [8] +unreferenced_g = [8] # $writes=unreferenced_g print(unreferenced_g) # Testing modifications of globals @@ -34,7 +34,7 @@ # A global with multiple potential definitions -import unknown_module +import unknown_module # $writes=unknown_module if unknown_module.attr: g_mult = [200] # $writes=g_mult else: @@ -46,7 +46,7 @@ if unknown_module.attr: g_redef = [500] # $writes=g_redef -def global_access(): +def global_access(): # $writes=global_access l = 5 print(g) # $reads=g print(g1) # $reads=g1 @@ -59,12 +59,12 @@ def global_access(): def print_g_mod(): # $writes=print_g_mod print(g_mod) # $reads=g_mod -def global_mod(): +def global_mod(): # $writes=global_mod global g_mod g_mod += [150] # $reads,writes=g_mod print_g_mod() # $reads=print_g_mod -def global_inside_local_function(): +def global_inside_local_function(): # $writes=global_inside_local_function def local_function(): print(g) # $reads=g local_function() @@ -76,21 +76,21 @@ def local_function(): import foo_module # $writes=foo_module -def use_foo(): +def use_foo(): # $writes=use_foo print(foo_module.attr) # $reads=foo_module # Partial imports from bar import baz_attr, quux_attr # $writes=baz_attr writes=quux_attr -def use_partial_import(): +def use_partial_import(): # $writes=use_partial_import print(baz_attr, quux_attr) # $reads=baz_attr reads=quux_attr # Aliased imports from spam_module import ham_attr as eggs_attr # $writes=eggs_attr -def use_aliased_import(): +def use_aliased_import(): # $writes=use_aliased_import print(eggs_attr) # $reads=eggs_attr # Import star (unlikely to work unless we happen to extract/model the referenced module) @@ -99,23 +99,23 @@ def use_aliased_import(): from unknown import * -def secretly_use_unknown(): +def secretly_use_unknown(): # $writes=secretly_use_unknown print(unknown_attr) # $reads=unknown_attr # Known modules from known import * -def secretly_use_known(): +def secretly_use_known(): # $writes=secretly_use_known print(known_attr) # $reads=known_attr # Local import in function -def imports_locally(): +def imports_locally(): # $writes=imports_locally import mod1 # Global import hidden in function -def imports_stuff(): +def imports_stuff(): # $writes=imports_stuff global mod2 import mod2 # $writes=mod2 diff --git a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected index ff9673aaaea8..06b560623929 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected +++ b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected @@ -2,6 +2,7 @@ module_tracker | import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr | module_attr_tracker | import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for attr_ref | +| import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for x | | import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember | | import_as_attr.py:1:28:1:35 | ControlFlowNode for attr_ref | | import_as_attr.py:3:1:3:1 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql index b0325b027c3a..b9575e43493e 100644 --- a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql +++ b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql @@ -17,7 +17,9 @@ module InlinePoorMansFunctionResolutionTest implements TestSig { ) and // exclude decorator calls (which with our extractor rewrites does reference the // function) - not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() + not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not ref instanceof DataFlow::ModuleVariableNode | value = func.getName() and tag = "resolved" and From 7fccc23dbef3d37b398534c5727d37bc2c5ebd28 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 26 Jan 2026 13:28:35 +0000 Subject: [PATCH 04/16] Python: Make `ExtractedArgumentNode` local Explicitly adds a bunch of nodes that were previously (using a global analysis) identified as `ExtractedArgumentNode`s. These are then subsequently filtered out in `argumentOf` (which is global) by putting the call to `getCallArg` there instead of in the charpred. --- .../dataflow/new/internal/DataFlowPublic.qll | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 0f508898c5aa..d14cac5a4cd8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -339,27 +339,51 @@ abstract class ArgumentNode extends Node { final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) } } +/** Gets an overapproximation of the argument nodes that are included in `getCallArg`. */ +Node getCallArgApproximation() { + // pre-update nodes for calls + result = any(CallCfgNode c).(PostUpdateNode).getPreUpdateNode() + or + // self parameters in methods + exists(Class c | result.asExpr() = c.getAMethod().getArg(0)) + or + // the object part of an attribute expression (which might be a bound method) + result.asCfgNode() = any(AttrNode a).getObject() + or + // the function part of any call + result.asCfgNode() = any(CallNode c).getFunction() +} + +/** Gets the extracted argument nodes that do not rely on `getCallArg`. */ +private Node otherArgs() { + // for potential summaries we allow all normal call arguments + normalCallArg(_, result, _) + or + // and self arguments + result.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject() + or + // for comprehensions, we allow the synthetic `iterable` argument + result.asExpr() = any(Comp c).getIterable() +} + /** * A data flow node that represents a call argument found in the source code. */ class ExtractedArgumentNode extends ArgumentNode { ExtractedArgumentNode() { - // for resolved calls, we need to allow all argument nodes - getCallArg(_, _, _, this, _) - or - // for potential summaries we allow all normal call arguments - normalCallArg(_, this, _) + this = getCallArgApproximation() or - // and self arguments - this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject() - or - // for comprehensions, we allow the synthetic `iterable` argument - this.asExpr() = any(Comp c).getIterable() + this = otherArgs() } final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { this = call.getArgument(pos) and - call instanceof ExtractedDataFlowCall + call instanceof ExtractedDataFlowCall and + ( + this = otherArgs() + or + this = getCallArgApproximation() and getCallArg(_, _, _, this, _) + ) } } From 6113d4be9e6a3711e4afdcdc22e27cb0afb56da6 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 26 Jan 2026 15:38:25 +0000 Subject: [PATCH 05/16] Python: Fix test issues Fixes the test failures that arose from making `ExtractedArgumentNode` local. For the consistency checks, we now explicitly exclude the `ExtractedArgumentNode`s (now much more plentiful due to the overapproximation) that don't have a corresponding `getCallArg` tuple. For various queries/tests using `instanceof ArgumentNode`, we instead us `isArgumentNode`, which explicitly filters out the ones for which `isArgumentOf` doesn't hold (which, again, is the case for most of the nodes in the overapproximation). --- python/ql/consistency-queries/DataFlowConsistency.ql | 10 ++++++++++ python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll | 2 +- python/ql/lib/utils/test/dataflow/callGraphConfig.qll | 2 +- .../InitCallsSubclass/InitCallsSubclassMethod.ql | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/python/ql/consistency-queries/DataFlowConsistency.ql b/python/ql/consistency-queries/DataFlowConsistency.ql index 759db3d19a9c..62bbb3062c29 100644 --- a/python/ql/consistency-queries/DataFlowConsistency.ql +++ b/python/ql/consistency-queries/DataFlowConsistency.ql @@ -26,6 +26,8 @@ private module Input implements InputSig { or // TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936 exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat()) + or + missingArgumentCallExclude(n) } predicate reverseReadExclude(Node n) { @@ -134,6 +136,14 @@ private module Input implements InputSig { other.getNode().getScope() = f ) } + + predicate missingArgumentCallExclude(ArgumentNode arg) { + // We overapproximate the argument nodes in order to not rely on the global `getCallArg` + // predicate. + // Because of this, we must exclude the cases where we have an approximation but no actual + // argument node. + arg = getCallArgApproximation() and not getCallArg(_, _, _, arg, _) + } } import MakeConsistency diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index 5e9831906a4a..cbd3b4c6aa51 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -35,7 +35,7 @@ module MaximalFlowsConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node node) { exists(node.getLocation().getFile().getRelativePath()) and not any(CallNode c).getArg(_) = node.asCfgNode() and - not node instanceof DataFlow::ArgumentNode and + not isArgumentNode(node, _, _) and not node.asCfgNode().(NameNode).getId().matches("SINK%") and not DataFlow::localFlowStep(node, _) } diff --git a/python/ql/lib/utils/test/dataflow/callGraphConfig.qll b/python/ql/lib/utils/test/dataflow/callGraphConfig.qll index 8528396a12f2..85ecb9b701db 100644 --- a/python/ql/lib/utils/test/dataflow/callGraphConfig.qll +++ b/python/ql/lib/utils/test/dataflow/callGraphConfig.qll @@ -9,7 +9,7 @@ module CallGraphConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { node instanceof DataFlowPrivate::ReturnNode or - node instanceof DataFlow::ArgumentNode + DataFlowPrivate::isArgumentNode(node, _, _) } predicate isSink(DataFlow::Node node) { diff --git a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql index 32eb5ffe79e5..4c1b3247d96f 100644 --- a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql +++ b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql @@ -15,6 +15,7 @@ import python import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.internal.DataFlowDispatch +import semmle.python.dataflow.new.internal.DataFlowPrivate predicate initSelfCallOverridden( Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target, @@ -39,7 +40,7 @@ predicate readsFromSelf(Function method) { self.getParameter() = method.getArg(0) and DataFlow::localFlow(self, sink) | - sink instanceof DataFlow::ArgumentNode + isArgumentNode(sink, _, _) or sink = any(DataFlow::AttrRead a).getObject() ) From 3f718123a687bf43d6bdc1c108a23e30f5503548 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 29 Jan 2026 16:09:43 +0000 Subject: [PATCH 06/16] Python: Make capturing closure arguments synthetic and non-global Uses the same trick as for `ExtractedArgumentNode`, wherein we postpone the global restriction on the charpred to instead be in the `argumentOf` predicate (which is global anyway). In addition to this, we also converted `CapturedVariablesArgumentNode` into a proper synthetic node, and added an explicit post-update node for it. These nodes just act as wrappers for the function part of call nodes. Thus, to make them work with the variable capture machinery, we simply map them to the closure node for the corresponding control-flow or post-update node. --- .../new/internal/DataFlowDispatch.qll | 74 ++++++++++++++----- .../dataflow/new/internal/DataFlowPrivate.qll | 8 ++ .../dataflow/new/internal/DataFlowPublic.qll | 14 ++++ .../dataflow/new/internal/VariableCapture.qll | 6 ++ 4 files changed, 82 insertions(+), 20 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index c717cd7bc97b..b04b83be83ec 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1714,36 +1714,66 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl * This is also known as the environment part of a closure. * * This is used for tracking flow through captured variables. - * - * TODO: - * We might want a synthetic node here, but currently that incurs problems - * with non-monotonic recursion, because of the use of `resolveCall` in the - * char pred. This may be solvable by using - * `CallGraphConstruction::Make` in stead of - * `CallGraphConstruction::Simple::Make` appropriately. */ -class CapturedVariablesArgumentNode extends CfgNode, ArgumentNode { - CallNode callNode; +class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesArgumentNode { + ControlFlowNode callable; - CapturedVariablesArgumentNode() { - node = callNode.getFunction() and - exists(Function target | resolveCall(callNode, target, _) | - target = any(VariableCapture::CapturedVariable v).getACapturingScope() - ) - } + SynthCapturedVariablesArgumentNode() { this = TSynthCapturedVariablesArgumentNode(callable) } + + /** Gets the `CallNode` corresponding to this captured variables argument node. */ + CallNode getCallNode() { result.getFunction() = callable } + + /** Gets the `CfgNode` that corresponds to this synthetic node. */ + CfgNode getUnderlyingNode() { result.asCfgNode() = callable } + + override Scope getScope() { result = callable.getScope() } + + override Location getLocation() { result = callable.getLocation() } override string toString() { result = "Capturing closure argument" } +} +/** A captured variables argument node viewed as an argument node. Needed because `argumentOf` is a global predicate. */ +class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode, + SynthCapturedVariablesArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - callNode = call.getNode() and - pos.isLambdaSelf() + exists(CallNode callNode | callNode = this.getCallNode() | + callNode = call.getNode() and + exists(Function target | resolveCall(callNode, target, _) | + target = any(VariableCapture::CapturedVariable v).getACapturingScope() + ) and + pos.isLambdaSelf() + ) } } -/** A synthetic node representing the values of variables captured by a comprehension. */ -class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode, - ArgumentNode +/** A synthetic node representing the values of captured variables after the output has been computed. */ +class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl, + TSynthCapturedVariablesArgumentPostUpdateNode { + ControlFlowNode callable; + + SynthCapturedVariablesArgumentPostUpdateNode() { + this = TSynthCapturedVariablesArgumentPostUpdateNode(callable) + } + + /** Gets the `PostUpdateNode` (for a `CfgNode`) that corresponds to this synthetic node. */ + PostUpdateNode getUnderlyingNode() { result.getPreUpdateNode().asCfgNode() = callable } + + override string toString() { result = "[post] Capturing closure argument" } + + override Scope getScope() { result = callable.getScope() } + + override Location getLocation() { result = callable.getLocation() } + + override SynthCapturedVariablesArgumentNode getPreUpdateNode() { + result = TSynthCapturedVariablesArgumentNode(callable) + } +} + +/** A synthetic node representing the values of variables captured by a comprehension. */ +class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode { Comp comp; SynthCompCapturedVariablesArgumentNode() { this = TSynthCompCapturedVariablesArgumentNode(comp) } @@ -1755,7 +1785,11 @@ class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVar override Location getLocation() { result = comp.getLocation() } Comp getComprehension() { result = comp } +} +class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode, + ArgumentNode +{ override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { call.(ComprehensionCall).getComprehension() = comp and pos.isLambdaSelf() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 2322539995b6..9866bd009642 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1128,6 +1128,14 @@ predicate nodeIsHidden(Node n) { n instanceof SynthCaptureNode or n instanceof SynthCapturedVariablesParameterNode + or + n instanceof SynthCapturedVariablesArgumentNode + or + n instanceof SynthCapturedVariablesArgumentPostUpdateNode + or + n instanceof SynthCompCapturedVariablesArgumentNode + or + n instanceof SynthCompCapturedVariablesArgumentPostUpdateNode } class LambdaCallKind = Unit; diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index d14cac5a4cd8..532f7b23e4cf 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -121,6 +121,20 @@ newtype TNode = f = any(VariableCapture::CapturedVariable v).getACapturingScope() and exists(TFunction(f)) } or + /** + * A synthetic node representing the values of the variables captured + * by the callable being called. + */ + TSynthCapturedVariablesArgumentNode(ControlFlowNode callable) { + callable = any(CallNode c).getFunction() + } or + /** + * A synthetic node representing the values of the variables captured + * by the callable being called, after the output has been computed. + */ + TSynthCapturedVariablesArgumentPostUpdateNode(ControlFlowNode callable) { + callable = any(CallNode c).getFunction() + } or /** A synthetic node representing the values of variables captured by a comprehension. */ TSynthCompCapturedVariablesArgumentNode(Comp comp) { comp.getFunction() = any(VariableCapture::CapturedVariable v).getACapturingScope() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll index a7b3b9ceaebf..5ed365a8e56f 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll @@ -114,6 +114,12 @@ private Flow::ClosureNode asClosureNode(Node n) { result.(Flow::ExprNode).getExpr().getNode() = comp ) or + // For captured variable argument nodes (and their post-update variants), we use the closure node + // for the underlying node. + result = asClosureNode(n.(SynthCapturedVariablesArgumentNode).getUnderlyingNode()) + or + result = asClosureNode(n.(SynthCapturedVariablesArgumentPostUpdateNode).getUnderlyingNode()) + or // TODO: Should the `Comp`s above be excluded here? result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode() or From fb6175d10b24116a98fbda185b922a41ae426085 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 29 Jan 2026 17:06:21 +0000 Subject: [PATCH 07/16] Python: Fix consistency test failures As we now have many more capturing closure arguments, we must once again exclude the ones that don't actually have `argumentOf` defined. --- python/ql/consistency-queries/DataFlowConsistency.ql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/ql/consistency-queries/DataFlowConsistency.ql b/python/ql/consistency-queries/DataFlowConsistency.ql index 62bbb3062c29..829aa6debef2 100644 --- a/python/ql/consistency-queries/DataFlowConsistency.ql +++ b/python/ql/consistency-queries/DataFlowConsistency.ql @@ -143,6 +143,10 @@ private module Input implements InputSig { // Because of this, we must exclude the cases where we have an approximation but no actual // argument node. arg = getCallArgApproximation() and not getCallArg(_, _, _, arg, _) + or + // Likewise, capturing closure arguments do not have corresponding argument nodes in some cases. + arg instanceof SynthCapturedVariablesArgumentNode and + not arg.argumentOf(_, _) } } From 958c798c3fa09ba96bcf7d6dc6cc4deb6f7a0505 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 29 Jan 2026 19:37:55 +0000 Subject: [PATCH 08/16] Python: Accept dataflow test changes New nodes means new results. Luckily we rarely have a test that selects _all_ dataflow nodes. --- python/ql/test/library-tests/dataflow/basic/local.expected | 2 ++ python/ql/test/library-tests/dataflow/basic/sinks.expected | 2 ++ python/ql/test/library-tests/dataflow/basic/sources.expected | 2 ++ 3 files changed, 6 insertions(+) diff --git a/python/ql/test/library-tests/dataflow/basic/local.expected b/python/ql/test/library-tests/dataflow/basic/local.expected index eb47f1308d42..96d402325129 100644 --- a/python/ql/test/library-tests/dataflow/basic/local.expected +++ b/python/ql/test/library-tests/dataflow/basic/local.expected @@ -36,7 +36,9 @@ | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:5:7:17 | Capturing closure argument | test.py:7:5:7:17 | Capturing closure argument | | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:17 | [post] Capturing closure argument | test.py:7:5:7:17 | [post] Capturing closure argument | | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/library-tests/dataflow/basic/sinks.expected b/python/ql/test/library-tests/dataflow/basic/sinks.expected index 34525d5043e1..80055f9a2f2b 100644 --- a/python/ql/test/library-tests/dataflow/basic/sinks.expected +++ b/python/ql/test/library-tests/dataflow/basic/sinks.expected @@ -15,7 +15,9 @@ | test.py:6:1:6:1 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:5:7:17 | Capturing closure argument | | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:17 | [post] Capturing closure argument | | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/library-tests/dataflow/basic/sources.expected b/python/ql/test/library-tests/dataflow/basic/sources.expected index 34525d5043e1..80055f9a2f2b 100644 --- a/python/ql/test/library-tests/dataflow/basic/sources.expected +++ b/python/ql/test/library-tests/dataflow/basic/sources.expected @@ -15,7 +15,9 @@ | test.py:6:1:6:1 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:5:7:17 | Capturing closure argument | | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:17 | [post] Capturing closure argument | | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | From 3d65142980bbd24db5476551423a55a970cd1208 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 5 Dec 2025 13:23:11 +0000 Subject: [PATCH 09/16] Python: Add overlay annotations to AST classes ... and everything else that it depends on. --- python/ql/lib/semmle/python/AstExtended.qll | 3 +++ python/ql/lib/semmle/python/AstGenerated.qll | 2 ++ python/ql/lib/semmle/python/Class.qll | 2 ++ python/ql/lib/semmle/python/Comment.qll | 2 ++ python/ql/lib/semmle/python/Comprehensions.qll | 3 +++ python/ql/lib/semmle/python/Constants.qll | 2 ++ python/ql/lib/semmle/python/Exprs.qll | 3 +++ python/ql/lib/semmle/python/Files.qll | 3 +++ python/ql/lib/semmle/python/Flow.qll | 3 +++ python/ql/lib/semmle/python/Function.qll | 3 +++ python/ql/lib/semmle/python/GuardedControlFlow.qll | 3 +++ python/ql/lib/semmle/python/Import.qll | 3 +++ python/ql/lib/semmle/python/Keywords.qll | 3 +++ python/ql/lib/semmle/python/Module.qll | 3 +++ python/ql/lib/semmle/python/Operations.qll | 3 +++ python/ql/lib/semmle/python/Patterns.qll | 2 ++ python/ql/lib/semmle/python/SSA.qll | 2 ++ python/ql/lib/semmle/python/Scope.qll | 5 +++++ python/ql/lib/semmle/python/Stmts.qll | 3 +++ python/ql/lib/semmle/python/Variables.qll | 3 +++ python/ql/lib/semmle/python/essa/Definitions.qll | 3 +++ python/ql/lib/semmle/python/essa/Essa.qll | 2 ++ python/ql/lib/semmle/python/essa/SsaCompute.qll | 2 ++ python/ql/lib/semmle/python/essa/SsaDefinitions.qll | 2 ++ python/ql/lib/semmle/python/internal/CachedStages.qll | 1 + python/ql/lib/semmle/python/types/Builtins.qll | 3 +++ python/ql/lib/semmle/python/types/ImportTime.qll | 3 +++ 27 files changed, 72 insertions(+) diff --git a/python/ql/lib/semmle/python/AstExtended.qll b/python/ql/lib/semmle/python/AstExtended.qll index 73292b85c3a8..13da4e899a71 100644 --- a/python/ql/lib/semmle/python/AstExtended.qll +++ b/python/ql/lib/semmle/python/AstExtended.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/AstGenerated.qll b/python/ql/lib/semmle/python/AstGenerated.qll index 12f868323878..8805a43bec38 100644 --- a/python/ql/lib/semmle/python/AstGenerated.qll +++ b/python/ql/lib/semmle/python/AstGenerated.qll @@ -3,6 +3,8 @@ * WARNING: Any modifications to this file will be lost. * Relations can be changed by modifying master.py. */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/Class.qll b/python/ql/lib/semmle/python/Class.qll index 19b81e86a125..cee0e730cb4f 100644 --- a/python/ql/lib/semmle/python/Class.qll +++ b/python/ql/lib/semmle/python/Class.qll @@ -1,6 +1,8 @@ /** * Provides classes representing Python classes. */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/Comment.qll b/python/ql/lib/semmle/python/Comment.qll index 839d700b8cd1..c87ccc1521d2 100644 --- a/python/ql/lib/semmle/python/Comment.qll +++ b/python/ql/lib/semmle/python/Comment.qll @@ -1,6 +1,8 @@ /** * Provides classes representing comments in Python. */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/Comprehensions.qll b/python/ql/lib/semmle/python/Comprehensions.qll index 37f07614282f..12e71e2d5d11 100644 --- a/python/ql/lib/semmle/python/Comprehensions.qll +++ b/python/ql/lib/semmle/python/Comprehensions.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** The base class for list, set and dictionary comprehensions, and generator expressions. */ diff --git a/python/ql/lib/semmle/python/Constants.qll b/python/ql/lib/semmle/python/Constants.qll index 03254a4bfd04..f86019c0256f 100644 --- a/python/ql/lib/semmle/python/Constants.qll +++ b/python/ql/lib/semmle/python/Constants.qll @@ -1,4 +1,6 @@ /** Standard builtin types and modules */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/Exprs.qll b/python/ql/lib/semmle/python/Exprs.qll index c374863d684e..6ab9f8d8340d 100644 --- a/python/ql/lib/semmle/python/Exprs.qll +++ b/python/ql/lib/semmle/python/Exprs.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + private import python private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/Files.qll b/python/ql/lib/semmle/python/Files.qll index 2da0dd61f885..bb3c504654e7 100644 --- a/python/ql/lib/semmle/python/Files.qll +++ b/python/ql/lib/semmle/python/Files.qll @@ -1,4 +1,6 @@ /** Provides classes for working with files and folders. */ +overlay[local] +module; import python private import codeql.util.FileSystem @@ -178,6 +180,7 @@ class Container extends Impl::Container { override Container getParentContainer() { result = super.getParentContainer() } + overlay[global] Container getChildContainer(string baseName) { result = this.getAChildContainer() and result.getBaseName() = baseName diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll index 898cd566ab96..b29f9fd13839 100644 --- a/python/ql/lib/semmle/python/Flow.qll +++ b/python/ql/lib/semmle/python/Flow.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python private import semmle.python.internal.CachedStages private import codeql.controlflow.BasicBlock as BB diff --git a/python/ql/lib/semmle/python/Function.qll b/python/ql/lib/semmle/python/Function.qll index e15d28d3a12b..c133275b8b78 100644 --- a/python/ql/lib/semmle/python/Function.qll +++ b/python/ql/lib/semmle/python/Function.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** diff --git a/python/ql/lib/semmle/python/GuardedControlFlow.qll b/python/ql/lib/semmle/python/GuardedControlFlow.qll index 73ea183850af..3169e4d0c1ad 100644 --- a/python/ql/lib/semmle/python/GuardedControlFlow.qll +++ b/python/ql/lib/semmle/python/GuardedControlFlow.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** A basic block which terminates in a condition, splitting the subsequent control flow */ diff --git a/python/ql/lib/semmle/python/Import.qll b/python/ql/lib/semmle/python/Import.qll index c75ef9f0c918..e8a7facccad3 100644 --- a/python/ql/lib/semmle/python/Import.qll +++ b/python/ql/lib/semmle/python/Import.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python private import semmle.python.types.Builtins private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/Keywords.qll b/python/ql/lib/semmle/python/Keywords.qll index b7ecca528bb9..da7b582ef16b 100644 --- a/python/ql/lib/semmle/python/Keywords.qll +++ b/python/ql/lib/semmle/python/Keywords.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python class KeyValuePair extends KeyValuePair_, DictDisplayItem { diff --git a/python/ql/lib/semmle/python/Module.qll b/python/ql/lib/semmle/python/Module.qll index f22f0d6fe39f..a30aab452c38 100644 --- a/python/ql/lib/semmle/python/Module.qll +++ b/python/ql/lib/semmle/python/Module.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/Operations.qll b/python/ql/lib/semmle/python/Operations.qll index e8f5e4799a54..c6318af63e25 100644 --- a/python/ql/lib/semmle/python/Operations.qll +++ b/python/ql/lib/semmle/python/Operations.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** The base class for operators */ diff --git a/python/ql/lib/semmle/python/Patterns.qll b/python/ql/lib/semmle/python/Patterns.qll index 9b4760611d00..fb99a123584e 100644 --- a/python/ql/lib/semmle/python/Patterns.qll +++ b/python/ql/lib/semmle/python/Patterns.qll @@ -1,6 +1,8 @@ /** * Wrapping generated AST classes: `Pattern_` and subclasses. */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/SSA.qll b/python/ql/lib/semmle/python/SSA.qll index b71bd95de795..777792877340 100644 --- a/python/ql/lib/semmle/python/SSA.qll +++ b/python/ql/lib/semmle/python/SSA.qll @@ -1,4 +1,6 @@ /** SSA library */ +overlay[local] +module; import python diff --git a/python/ql/lib/semmle/python/Scope.qll b/python/ql/lib/semmle/python/Scope.qll index 4131455299cb..66a7170aec77 100644 --- a/python/ql/lib/semmle/python/Scope.qll +++ b/python/ql/lib/semmle/python/Scope.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python private import semmle.python.dataflow.new.internal.ImportResolution @@ -6,6 +9,7 @@ private import semmle.python.dataflow.new.internal.ImportResolution * * This aims to be the same as m.getAnExport(), but without using the points-to machinery. */ +overlay[global] private string getAModuleExport(Module m) { py_exports(m, result) or @@ -76,6 +80,7 @@ class Scope extends Scope_ { predicate isTopLevel() { this.getEnclosingModule() = this.getEnclosingScope() } /** Holds if this scope is deemed to be public */ + overlay[global] predicate isPublic() { /* Not inside a function */ not this.getEnclosingScope() instanceof Function and diff --git a/python/ql/lib/semmle/python/Stmts.qll b/python/ql/lib/semmle/python/Stmts.qll index ea309227af67..c0dfac10ee84 100644 --- a/python/ql/lib/semmle/python/Stmts.qll +++ b/python/ql/lib/semmle/python/Stmts.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** A statement */ diff --git a/python/ql/lib/semmle/python/Variables.qll b/python/ql/lib/semmle/python/Variables.qll index 1249fd020caa..d2baf04a5bf1 100644 --- a/python/ql/lib/semmle/python/Variables.qll +++ b/python/ql/lib/semmle/python/Variables.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** A variable, either a global or local variable (including parameters) */ diff --git a/python/ql/lib/semmle/python/essa/Definitions.qll b/python/ql/lib/semmle/python/essa/Definitions.qll index aca6991b9f69..6e7b8d5b376a 100644 --- a/python/ql/lib/semmle/python/essa/Definitions.qll +++ b/python/ql/lib/semmle/python/essa/Definitions.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /* * Classification of variables. These should be non-overlapping and complete. diff --git a/python/ql/lib/semmle/python/essa/Essa.qll b/python/ql/lib/semmle/python/essa/Essa.qll index 384bfd2f91fe..ebc22beedf33 100644 --- a/python/ql/lib/semmle/python/essa/Essa.qll +++ b/python/ql/lib/semmle/python/essa/Essa.qll @@ -1,6 +1,8 @@ /** * Library for SSA representation (Static Single Assignment form). */ +overlay[local] +module; import python private import SsaCompute diff --git a/python/ql/lib/semmle/python/essa/SsaCompute.qll b/python/ql/lib/semmle/python/essa/SsaCompute.qll index d2512eecdede..fb030b6250ee 100644 --- a/python/ql/lib/semmle/python/essa/SsaCompute.qll +++ b/python/ql/lib/semmle/python/essa/SsaCompute.qll @@ -88,6 +88,8 @@ * ``` * and thus it falls out that `g3` must be `1`. */ +overlay[local] +module; import python private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/essa/SsaDefinitions.qll b/python/ql/lib/semmle/python/essa/SsaDefinitions.qll index 6c87af102fa9..827bee34474e 100644 --- a/python/ql/lib/semmle/python/essa/SsaDefinitions.qll +++ b/python/ql/lib/semmle/python/essa/SsaDefinitions.qll @@ -2,6 +2,8 @@ * Provides classes and predicates for determining the uses and definitions of * variables for ESSA form. */ +overlay[local] +module; import python private import semmle.python.internal.CachedStages diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll index 687cabeceaed..df96e8498556 100644 --- a/python/ql/lib/semmle/python/internal/CachedStages.qll +++ b/python/ql/lib/semmle/python/internal/CachedStages.qll @@ -35,6 +35,7 @@ module Stages { * Computes predicates based on the AST. * These include SSA and basic-blocks. */ + overlay[local] cached module AST { /** diff --git a/python/ql/lib/semmle/python/types/Builtins.qll b/python/ql/lib/semmle/python/types/Builtins.qll index 796397f72cd6..371cf758d5c5 100644 --- a/python/ql/lib/semmle/python/types/Builtins.qll +++ b/python/ql/lib/semmle/python/types/Builtins.qll @@ -1,3 +1,6 @@ +overlay[local?] +module; + import python private import LegacyPointsTo diff --git a/python/ql/lib/semmle/python/types/ImportTime.qll b/python/ql/lib/semmle/python/types/ImportTime.qll index 1604013d7ff4..27f70b09aa40 100644 --- a/python/ql/lib/semmle/python/types/ImportTime.qll +++ b/python/ql/lib/semmle/python/types/ImportTime.qll @@ -1,3 +1,6 @@ +overlay[local] +module; + import python /** From 12405fc8e3bbce7766274f14cf780e80280ec221 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 5 Dec 2025 13:48:28 +0000 Subject: [PATCH 10/16] Python: Fix broken queries --- python/ql/lib/analysis/DefinitionTracking.qll | 7 +++---- .../LoopVariableCapture/LoopVariableCaptureQuery.qll | 8 +++++--- python/ql/src/analysis/ImportFailure.ql | 6 ++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/analysis/DefinitionTracking.qll b/python/ql/lib/analysis/DefinitionTracking.qll index 0d58bd69b7b6..21155970375b 100644 --- a/python/ql/lib/analysis/DefinitionTracking.qll +++ b/python/ql/lib/analysis/DefinitionTracking.qll @@ -471,11 +471,10 @@ Definition getUniqueDefinition(Expr use) { not result = TLocalDefinition(use) } -/** A helper class to get suitable locations for attributes */ -class NiceLocationExpr extends Expr { - /** Gets a textual representation of this element. */ - override string toString() { result = this.(Expr).toString() } +final class FinalExpr = Expr; +/** A helper class to get suitable locations for attributes */ +class NiceLocationExpr extends FinalExpr { /** * Holds if this element is at the specified location. * The location spans column `bc` of line `bl` to diff --git a/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll b/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll index 7f25701cac8e..987740236f24 100644 --- a/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll +++ b/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll @@ -3,8 +3,10 @@ import python import semmle.python.dataflow.new.DataFlow +final class FinalAstNode = AstNode; + /** A looping construct. */ -abstract class Loop extends AstNode { +abstract class Loop extends FinalAstNode { /** * Gets a loop variable of this loop. * For example, `x` and `y` in `for x,y in pairs: print(x+y)` @@ -13,9 +15,9 @@ abstract class Loop extends AstNode { } /** A `for` loop. */ -private class ForLoop extends Loop, For { +private class ForLoop extends Loop instanceof For { override Variable getALoopVariable() { - this.getTarget() = result.getAnAccess().getParentNode*() and + this.(For).getTarget() = result.getAnAccess().getParentNode*() and result.getScope() = this.getScope() } } diff --git a/python/ql/src/analysis/ImportFailure.ql b/python/ql/src/analysis/ImportFailure.ql index c9289a8b474a..71967e6e04f7 100644 --- a/python/ql/src/analysis/ImportFailure.ql +++ b/python/ql/src/analysis/ImportFailure.ql @@ -59,7 +59,9 @@ predicate ok_to_fail(ImportExpr ie) { os_specific_import(ie) != get_os() } -class VersionTest extends ControlFlowNode { +final class FinalControlFlowNode = ControlFlowNode; + +class VersionTest extends FinalControlFlowNode { VersionTest() { exists(string name | name.matches("%version%") and @@ -70,7 +72,7 @@ class VersionTest extends ControlFlowNode { ) } - override string toString() { result = "VersionTest" } + string toString() { result = "VersionTest" } } /** A guard on the version of the Python interpreter */ From 1eefafb5719fdee3d807b5f474131e774ce9fd30 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 13:30:57 +0000 Subject: [PATCH 11/16] Python: Simple dataflow annotations None of these required any changes to the dataflow libraries, so it seemed easiest to put them in their own commit. --- python/ql/lib/semmle/python/ApiGraphs.qll | 2 ++ python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll | 1 + .../lib/semmle/python/dataflow/new/internal/Attributes.qll | 2 ++ .../ql/lib/semmle/python/dataflow/new/internal/Builtins.qll | 2 ++ .../semmle/python/dataflow/new/internal/FlowSummaryImpl.qll | 3 +++ .../lib/semmle/python/dataflow/new/internal/ImportStar.qll | 6 ++++++ .../python/dataflow/new/internal/IterableUnpacking.qll | 2 ++ .../semmle/python/dataflow/new/internal/MatchUnpacking.qll | 2 ++ .../semmle/python/dataflow/new/internal/VariableCapture.qll | 2 ++ python/ql/lib/semmle/python/frameworks/Flask.qll | 2 ++ python/ql/lib/semmle/python/frameworks/Stdlib.qll | 2 ++ .../ql/lib/semmle/python/frameworks/data/ModelsAsData.qll | 2 ++ .../frameworks/data/internal/ApiGraphModelsSpecific.qll | 2 ++ python/ql/lib/semmle/python/internal/Awaited.qll | 2 ++ python/ql/lib/semmle/python/internal/CachedStages.qll | 1 + .../test/library-tests/dataflow/summaries/TestSummaries.qll | 3 +++ .../dataflow/typetracking-summaries/TestSummaries.qll | 3 +++ 17 files changed, 39 insertions(+) diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index b45c10e1417e..efd8141efc6e 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -451,6 +451,7 @@ module API { * allowing this predicate to be used in a negative * context when constructing new nodes. */ + overlay[local] predicate moduleImportExists(string m) { Impl::isImported(m) and // restrict `moduleImport` so it will never give results for a dotted name. Note @@ -695,6 +696,7 @@ module API { * * This is determined syntactically. */ + overlay[local] cached predicate isImported(string name) { // Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index f83870ab050d..f9a951241875 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -25,6 +25,7 @@ deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; class Provenance = Impl::Public::Provenance; /** Provides the `Range` class used to define the extent of `SummarizedCallable`. */ +overlay[local] module SummarizedCallable { /** A callable with a flow summary, identified by a unique string. */ abstract class Range extends LibraryCallable, Impl::Public::SummarizedCallable { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll index e9bcc5e67855..8778ae288667 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll @@ -1,4 +1,6 @@ /** This module provides an API for attribute reads and writes. */ +overlay[local] +module; private import python import DataFlowUtil diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 9ed9e7d7a2b0..6a66d241083a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -1,4 +1,6 @@ /** Provides predicates for reasoning about built-ins in Python. */ +overlay[local] +module; private import python private import semmle.python.dataflow.new.DataFlow diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 449b51565a85..41cb0368b507 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -1,6 +1,8 @@ /** * Provides classes and predicates for defining flow summaries. */ +overlay[local] +module; private import python private import codeql.dataflow.internal.FlowSummaryImpl @@ -99,6 +101,7 @@ module Input implements InputSig private import Make as Impl private module StepsInput implements Impl::Private::StepsInputSig { + overlay[global] DataFlowCall getACall(Public::SummarizedCallable sc) { result = TPotentialLibraryCall([ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll index 564630c47dbc..83f8ee862c39 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll @@ -1,4 +1,6 @@ /** Provides predicates for reasoning about uses of `import *` in Python. */ +overlay[local] +module; private import python private import semmle.python.dataflow.new.internal.Builtins @@ -11,6 +13,7 @@ module ImportStar { * Holds if `n` is an access of a variable called `name` (which is _not_ the name of a * built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`. */ + overlay[local] cached predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) { n.isLoad() and @@ -61,6 +64,7 @@ module ImportStar { * Holds if `n` may refer to a global variable of the same name in the module `m`, accessible * from the scope of `n` by a chain of `import *` imports. */ + overlay[global] cached predicate importStarResolvesTo(NameNode n, Module m) { m = getStarImported+(n.getEnclosingModule()) and @@ -71,6 +75,7 @@ module ImportStar { /** * Gets a module that is imported from `m` via `import *`. */ + overlay[global] cached Module getStarImported(Module m) { exists(ImportStar i, DataFlow::CfgNode imported_module | @@ -92,6 +97,7 @@ module ImportStar { * * this would return the data-flow nodes corresponding to `foo.bar` and `quux`. */ + overlay[local] cached ControlFlowNode potentialImportStarBase(Scope s) { result = any(ImportStarNode n | n.getScope() = s).getModule() diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll index e83e789c2fbc..5def15fa3c8a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll @@ -166,6 +166,8 @@ * * `c`: [ListElementContent] */ +overlay[local] +module; private import python private import DataFlowPublic diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll b/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll index 8064c34d9218..e72e378da528 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll @@ -50,6 +50,8 @@ * keyword arguments using the `__match_args__` attribute on the class. We do not * currently model this. */ +overlay[local] +module; private import python private import DataFlowPublic diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll index 5ed365a8e56f..fbe05979328c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll @@ -1,4 +1,6 @@ /** Provides logic related to captured variables. */ +overlay[local] +module; private import python private import DataFlowPublic diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index 8ac9f3deab31..4a9dfad68d25 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -2,6 +2,8 @@ * Provides classes modeling security-relevant aspects of the `flask` PyPI package. * See https://flask.palletsprojects.com/en/1.1.x/. */ +overlay[local?] +module; private import python private import semmle.python.dataflow.new.DataFlow diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index e749ab66f8b2..5d3b994880a1 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2,6 +2,8 @@ * Provides classes modeling security-relevant aspects of the standard libraries. * Note: some modeling is done internally in the dataflow/taint tracking implementation. */ +overlay[local?] +module; private import python private import semmle.python.dataflow.new.DataFlow diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll index e66f2c01d70a..542f8c995e98 100644 --- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -8,6 +8,8 @@ * The package name refers to the top-level module the import comes from, and not a PyPI package. * So for `from foo.bar import baz`, the package will be `foo`. */ +overlay[local?] +module; private import python private import internal.ApiGraphModels as Shared diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index 7adc24bab14f..3136f87569c9 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -30,6 +30,7 @@ import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow * Holds if models describing `type` may be relevant for the analysis of this database. */ bindingset[type] +overlay[local] predicate isTypeUsed(string type) { // If `type` is a path, then it is the first component that should be imported. API::moduleImportExists(type.splitAt(".", 0)) @@ -39,6 +40,7 @@ predicate isTypeUsed(string type) { * Holds if `type` can be obtained from an instance of `otherType` due to * language semantics modeled by `getExtraNodeFromType`. */ +overlay[local] predicate hasImplicitTypeModel(string type, string otherType) { none() } /** Gets a Python-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */ diff --git a/python/ql/lib/semmle/python/internal/Awaited.qll b/python/ql/lib/semmle/python/internal/Awaited.qll index cd5162e6151e..43affdf95a4f 100644 --- a/python/ql/lib/semmle/python/internal/Awaited.qll +++ b/python/ql/lib/semmle/python/internal/Awaited.qll @@ -3,6 +3,8 @@ * * Provides helper class for defining additional API graph edges. */ +overlay[local] +module; private import python private import semmle.python.dataflow.new.DataFlow diff --git a/python/ql/lib/semmle/python/internal/CachedStages.qll b/python/ql/lib/semmle/python/internal/CachedStages.qll index df96e8498556..7379cc51372f 100644 --- a/python/ql/lib/semmle/python/internal/CachedStages.qll +++ b/python/ql/lib/semmle/python/internal/CachedStages.qll @@ -177,6 +177,7 @@ module Stages { * Always holds. * Ensures that a predicate is evaluated as part of the DataFlow stage. */ + overlay[local] cached predicate ref() { 1 = 1 } diff --git a/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll b/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll index 11b9c0ef09e6..14d68455d621 100644 --- a/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/library-tests/dataflow/summaries/TestSummaries.qll @@ -1,3 +1,6 @@ +overlay[local?] +module; + private import python private import semmle.python.dataflow.new.FlowSummary private import semmle.python.ApiGraphs diff --git a/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll index c4c4096c686a..57e0013b6e0e 100644 --- a/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/library-tests/dataflow/typetracking-summaries/TestSummaries.qll @@ -1,3 +1,6 @@ +overlay[local?] +module; + private import python private import semmle.python.dataflow.new.FlowSummary private import semmle.python.ApiGraphs From 0c543a5613a8e9fc229520c4771866d01ce9861f Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 13:31:44 +0000 Subject: [PATCH 12/16] Python: `LocalSources.qll` annotations --- .../semmle/python/dataflow/new/internal/LocalSources.qll | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index 7752846ae1ff..5cbe7b44ab30 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -5,6 +5,8 @@ * Note that unlike `TypeTracker.qll`, this library only performs * local tracking within a function. */ +overlay[local] +module; private import python import DataFlowPublic @@ -77,6 +79,7 @@ class LocalSourceNode extends Node { } /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ + overlay[caller] pragma[inline] predicate flowsTo(Node nodeTo) { Cached::hasLocalSource(nodeTo, this) } @@ -149,6 +152,7 @@ class LocalSourceNode extends Node { * * See `TypeTracker` for more details about how to use this. */ + overlay[global] pragma[inline] LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } @@ -157,6 +161,7 @@ class LocalSourceNode extends Node { * * See `TypeBackTracker` for more details about how to use this. */ + overlay[global] pragma[inline] LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t = t2.step(result, this) } } @@ -210,6 +215,7 @@ private module FutureWork { * * See `TypeTracker` for more details about how to use this. */ + overlay[global] pragma[inline] TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } @@ -218,6 +224,7 @@ private module FutureWork { * * See `TypeBackTracker` for more details about how to use this. */ + overlay[global] pragma[inline] TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) } } From e211f4bb5dbd2bf917cef5d44ad771422e5b18bc Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 13:33:33 +0000 Subject: [PATCH 13/16] Python: `DataFlowPublic.qll` annotations --- .../dataflow/new/internal/DataFlowPublic.qll | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 532f7b23e4cf..c59462819b2c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -1,6 +1,8 @@ /** * Provides Python-specific definitions for use in the data flow library. */ +overlay[local] +module; private import python private import DataFlowPrivate @@ -22,6 +24,7 @@ private import semmle.python.frameworks.data.ModelsAsData * - Module variable nodes: These represent global variables and act as canonical targets for reads and writes of these. * - Synthetic nodes: These handle flow in various special cases. */ +overlay[local] newtype TNode = /** A node corresponding to a control flow node. */ TCfgNode(ControlFlowNode node) { @@ -157,6 +160,7 @@ private import semmle.python.internal.CachedStages * An element, viewed as a node in a data flow graph. Either an SSA variable * (`EssaNode`) or a control flow node (`CfgNode`). */ +overlay[local] class Node extends TNode { /** Gets a textual representation of this element. */ cached @@ -324,6 +328,7 @@ class ScopeEntryDefinitionNode extends Node, TScopeEntryDefinitionNode { * The value of a parameter at function entry, viewed as a node in a data * flow graph. */ +overlay[local] class ParameterNode extends Node instanceof ParameterNodeImpl { /** Gets the parameter corresponding to this node, if any. */ final Parameter getParameter() { result = super.getParameter() } @@ -345,6 +350,7 @@ class LocalSourceParameterNode extends ExtractedParameterNode, LocalSourceNode { ExtractedParameterNode parameterNode(Parameter p) { result.getParameter() = p } /** A data flow node that represents a call argument. */ +overlay[global] abstract class ArgumentNode extends Node { /** Holds if this argument occurs at the given position in the given call. */ abstract predicate argumentOf(DataFlowCall call, ArgumentPosition pos); @@ -383,6 +389,7 @@ private Node otherArgs() { /** * A data flow node that represents a call argument found in the source code. */ +overlay[global] class ExtractedArgumentNode extends ArgumentNode { ExtractedArgumentNode() { this = getCallArgApproximation() @@ -469,6 +476,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode { GlobalVariable getVariable() { result = var } /** Gets a node that reads this variable. */ + overlay[global] Node getARead() { result = this.getALocalRead() or @@ -500,10 +508,12 @@ class ModuleVariableNode extends Node, TModuleVariableNode { override Location getLocation() { result = mod.getLocation() } } +overlay[global] private ModuleVariableNode import_star_read(Node n) { resolved_import_star_module(result.getModule(), result.getVariable().getId(), n) } +overlay[global] pragma[nomagic] private predicate resolved_import_star_module(Module m, string name, Node n) { exists(NameNode nn | nn = n.asCfgNode() | @@ -625,6 +635,7 @@ signature predicate guardChecksSig(GuardNode g, ControlFlowNode node, boolean br * This is expected to be used in `isBarrier`/`isSanitizer` definitions * in data flow and taint tracking. */ +overlay[global] module BarrierGuard { /** Gets a node that is safely guarded by the given guard check. */ ExprNode getABarrierNode() { @@ -652,6 +663,7 @@ private module WithParam { */ module ParameterizedBarrierGuard::guardChecksSig/4 guardChecks> { /** Gets a node that is safely guarded by the given guard check with parameter `param`. */ + overlay[global] ExprNode getABarrierNode(P param) { exists(GuardNode g, EssaDefinition def, ControlFlowNode node, boolean branch | AdjacentUses::useOfDef(def, node) and @@ -671,6 +683,7 @@ module ParameterizedBarrierGuard::guardChecksSig/4 guar module ExternalBarrierGuard { private import semmle.python.ApiGraphs + overlay[global] private predicate guardCheck(GuardNode g, ControlFlowNode node, boolean branch, string kind) { exists(API::CallNode call, API::Node parameter | parameter = call.getAParameter() and @@ -689,6 +702,7 @@ module ExternalBarrierGuard { * * INTERNAL: Do not use. */ + overlay[global] ExprNode getAnExternalBarrierNode(string kind) { result = ParameterizedBarrierGuard::getABarrierNode(kind) } @@ -698,6 +712,7 @@ module ExternalBarrierGuard { * Algebraic datatype for tracking data content associated with values. * Content can be collection elements or object attributes. */ +overlay[local] newtype TContent = /** An element of a list. */ TListElementContent() or @@ -769,6 +784,7 @@ newtype TContent = * If the value is a collection, it can have elements, * if it is an object, it can have attribute values. */ +overlay[local] class Content extends TContent { /** Gets a textual representation of this element. */ string toString() { result = "Content" } From 65d01e98f339422ef5286ef6f4c4fb3ce88a7160 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 13:34:05 +0000 Subject: [PATCH 14/16] Python: `DataFlowPrivate.qll` annotations --- .../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 9866bd009642..fffd0150008e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1,3 +1,6 @@ +overlay[local?] +module; + private import python private import DataFlowPublic private import semmle.python.essa.SsaCompute @@ -39,6 +42,7 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) //-------- // Nodes //-------- +overlay[local] predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr } // ============================================================================= @@ -111,6 +115,7 @@ class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode { * func = foo if else bar * func(1, 2, 3) */ +overlay[local] class SynthStarArgsElementParameterNode extends ParameterNodeImpl, TSynthStarArgsElementParameterNode { @@ -241,6 +246,7 @@ private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryEle * (c) since the synthesized nodes are hidden, the reported data-flow paths will be * collapsed anyway. */ +overlay[local] class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode { DataFlowCallable callable; From 9e43da92747b46f108f1f7e3ea127191491094d5 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 13:35:46 +0000 Subject: [PATCH 15/16] Python: `DataFlowDispatch.qll` annotations --- .../new/internal/DataFlowDispatch.qll | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index b04b83be83ec..d4444c6795bf 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -31,6 +31,8 @@ * Note: This hasn't been 100% realized yet, so we don't currently expose a predicate to * ask what targets any data-flow node has. But it's still the plan to do this! */ +overlay[local?] +module; private import python private import DataFlowPublic @@ -39,6 +41,7 @@ private import FlowSummaryImpl as FlowSummaryImpl private import semmle.python.internal.CachedStages private import semmle.python.dataflow.new.internal.TypeTrackingImpl::CallGraphConstruction as CallGraphConstruction +overlay[local] newtype TParameterPosition = /** Used for `self` in methods, and `cls` in classmethods. */ TSelfParameterPosition() or @@ -84,6 +87,7 @@ newtype TParameterPosition = TSynthDictSplatParameterPosition() /** A parameter position. */ +overlay[local] class ParameterPosition extends TParameterPosition { /** Holds if this position represents a `self`/`cls` parameter. */ predicate isSelf() { this = TSelfParameterPosition() } @@ -146,6 +150,7 @@ class ParameterPosition extends TParameterPosition { } } +overlay[local] newtype TArgumentPosition = /** Used for `self` in methods, and `cls` in classmethods. */ TSelfArgumentPosition() or @@ -180,6 +185,7 @@ newtype TArgumentPosition = TDictSplatArgumentPosition() /** An argument position. */ +overlay[local] class ArgumentPosition extends TArgumentPosition { /** Holds if this position represents a `self`/`cls` argument. */ predicate isSelf() { this = TSelfArgumentPosition() } @@ -248,6 +254,7 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { * `@staticmethod` decorator or by convention * (like a `__new__` method on a class is a classmethod even without the decorator). */ +overlay[local] predicate isStaticmethod(Function func) { exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() | func.getADecorator() = id.getNode() @@ -259,6 +266,7 @@ predicate isStaticmethod(Function func) { * `@classmethod` decorator or by convention * (like a `__new__` method on a class is a classmethod even without the decorator). */ +overlay[local] predicate isClassmethod(Function func) { exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() | func.getADecorator() = id.getNode() @@ -275,6 +283,7 @@ predicate isClassmethod(Function func) { } /** Holds if the function `func` has a `property` decorator. */ +overlay[local] predicate hasPropertyDecorator(Function func) { exists(NameNode id | id.getId() = "property" and id.isGlobal() | func.getADecorator() = id.getNode() @@ -284,6 +293,7 @@ predicate hasPropertyDecorator(Function func) { /** * Holds if the function `func` has a `contextlib.contextmanager`. */ +overlay[local] predicate hasContextmanagerDecorator(Function func) { exists(ControlFlowNode contextmanager | contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal() @@ -298,20 +308,25 @@ predicate hasContextmanagerDecorator(Function func) { // Callables // ============================================================================= /** A callable defined in library code, identified by a unique string. */ +overlay[local] abstract class LibraryCallable extends string { bindingset[this] LibraryCallable() { any() } /** Gets a call to this library callable. */ + overlay[global] abstract CallCfgNode getACall(); /** Same as `getACall` but without referring to the call graph or API graph. */ + overlay[global] CallCfgNode getACallSimple() { none() } /** Gets a data-flow node, where this library callable is used as a call-back. */ + overlay[global] abstract ArgumentNode getACallback(); } +overlay[local] newtype TDataFlowCallable = /** * Is used as the target for all calls: plain functions, lambdas, methods on classes, @@ -329,6 +344,7 @@ newtype TDataFlowCallable = TLibraryCallable(LibraryCallable callable) /** A callable. */ +overlay[local] abstract class DataFlowCallable extends TDataFlowCallable { /** Gets a textual representation of this element. */ abstract string toString(); @@ -350,6 +366,7 @@ abstract class DataFlowCallable extends TDataFlowCallable { } /** A callable function. */ +overlay[local] abstract class DataFlowFunction extends DataFlowCallable, TFunction { Function func; @@ -370,6 +387,7 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction { /** Gets the positional parameter offset, to take into account self/cls parameters. */ int positionalOffset() { result = 0 } + overlay[local] override ParameterNode getParameter(ParameterPosition ppos) { // Do not handle lower bound positions (such as `[1..]`) here // they are handled by parameter matching and would create @@ -408,11 +426,13 @@ abstract class DataFlowFunction extends DataFlowCallable, TFunction { } /** A plain (non-method) function. */ +overlay[local] class DataFlowPlainFunction extends DataFlowFunction { DataFlowPlainFunction() { not this instanceof DataFlowMethod } } /** A method. */ +overlay[local] class DataFlowMethod extends DataFlowFunction { Class cls; @@ -431,11 +451,13 @@ class DataFlowMethod extends DataFlowFunction { } /** A classmethod. */ +overlay[local] class DataFlowClassmethod extends DataFlowMethod { DataFlowClassmethod() { isClassmethod(func) } } /** A staticmethod. */ +overlay[local] class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction { DataFlowStaticmethod() { isStaticmethod(func) } @@ -450,6 +472,7 @@ class DataFlowStaticmethod extends DataFlowMethod, DataFlowFunction { * A module. This is not actually a callable, but we need this so a * `ModuleVariableNode` have an enclosing callable. */ +overlay[local] class DataFlowModuleScope extends DataFlowCallable, TModule { Module mod; @@ -466,6 +489,7 @@ class DataFlowModuleScope extends DataFlowCallable, TModule { override ParameterNode getParameter(ParameterPosition ppos) { none() } } +overlay[local] class LibraryCallableValue extends DataFlowCallable, TLibraryCallable { LibraryCallable callable; @@ -476,6 +500,7 @@ class LibraryCallableValue extends DataFlowCallable, TLibraryCallable { override string getQualifiedName() { result = callable.toString() } /** Gets a data-flow node, where this library callable is used as a call-back. */ + overlay[global] ArgumentNode getACallback() { result = callable.getACallback() } override Scope getScope() { none() } @@ -1210,6 +1235,7 @@ predicate resolveCall(CallNode call, Function target, CallType type) { * Holds if the argument of `call` at position `apos` is `arg`. This is just a helper * predicate that maps ArgumentPositions to the arguments of the underlying `CallNode`. */ +overlay[local] cached predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) { exists(int index | @@ -1589,6 +1615,7 @@ class SummaryCall extends DataFlowCall, TSummaryCall { * The value of a parameter at function entry, viewed as a node in a data * flow graph. */ +overlay[local] abstract class ParameterNodeImpl extends Node { /** Gets the `Parameter` this `ParameterNode` represents. */ abstract Parameter getParameter(); @@ -1610,6 +1637,7 @@ abstract class ParameterNodeImpl extends Node { * * This is used for tracking flow through captured variables. */ +overlay[local] class SynthCapturedVariablesParameterNode extends ParameterNodeImpl, TSynthCapturedVariablesParameterNode { @@ -1634,6 +1662,7 @@ class SynthCapturedVariablesParameterNode extends ParameterNodeImpl, } /** A parameter for a library callable with a flow summary. */ +overlay[local] class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode { SummaryParameterNode() { FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), _) @@ -1684,6 +1713,7 @@ private class SummaryReturnNode extends FlowSummaryNode, ReturnNode { override ReturnKind getKind() { result = rk } } +overlay[global] private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { private SummaryCall call_; private ArgumentPosition pos_; @@ -1737,6 +1767,7 @@ class SynthCapturedVariablesArgumentNode extends Node, TSynthCapturedVariablesAr class CapturedVariablesArgumentNodeAsArgumentNode extends ArgumentNode, SynthCapturedVariablesArgumentNode { + overlay[global] override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { exists(CallNode callNode | callNode = this.getCallNode() | callNode = call.getNode() and @@ -1773,6 +1804,7 @@ class SynthCapturedVariablesArgumentPostUpdateNode extends PostUpdateNodeImpl, } /** A synthetic node representing the values of variables captured by a comprehension. */ +overlay[local] class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVariablesArgumentNode { Comp comp; @@ -1790,6 +1822,7 @@ class SynthCompCapturedVariablesArgumentNode extends Node, TSynthCompCapturedVar class SynthCompCapturedVariablesArgumentNodeAsArgumentNode extends SynthCompCapturedVariablesArgumentNode, ArgumentNode { + overlay[global] override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { call.(ComprehensionCall).getComprehension() = comp and pos.isLambdaSelf() @@ -1834,12 +1867,14 @@ DataFlowCallable viableCallable(DataFlowCall call) { // ============================================================================= // Remaining required data-flow things // ============================================================================= +overlay[local] private newtype TReturnKind = TNormalReturnKind() /** * A return kind. A return kind describes how a value can be returned * from a callable. For Python, this is simply a method return. */ +overlay[local] class ReturnKind extends TReturnKind { /** Gets a textual representation of this element. */ string toString() { result = "return" } From 9b12c604fc064d5a4e7dae459dbbe504fe2f0560 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 30 Jan 2026 14:07:33 +0000 Subject: [PATCH 16/16] Python: Fix `frameworks/data/warnings.ql` --- python/ql/test/library-tests/frameworks/data/warnings.ql | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ql/test/library-tests/frameworks/data/warnings.ql b/python/ql/test/library-tests/frameworks/data/warnings.ql index f09684132359..07c746547dd3 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.ql +++ b/python/ql/test/library-tests/frameworks/data/warnings.ql @@ -2,6 +2,7 @@ import python import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.ModelsAsData +overlay[local] class IsTesting extends ApiGraphModels::TestAllModels { IsTesting() { this = this } }