github · mbaluda · Dec 12, 2025 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
@@ -0,0 +1,5 @@
+---
+category: minorAnalysis
+---
+* Added experimental query `py/prompt-injection` to detect potential prompt injection vulnerabilities in code using LLMs.
+* Added taint flow model and type model for `agents` and `openai` modules.
diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll
@@ -325,6 +325,31 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
   }
 }
 
+/**
+ * A data-flow node that prompts an AI model.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `AIPrompt::Range` instead.
+ */
+class AIPrompt extends DataFlow::Node instanceof AIPrompt::Range {
+  /** Gets an input that is used as AI prompt. */
+  DataFlow::Node getAPrompt() { result = super.getAPrompt() }
+}
+
+/** Provides a class for modeling new AI prompting mechanisms. */
+module AIPrompt {
+  /**
+   * A data-flow node that prompts an AI model.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `AIPrompt` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets an input that is used as AI prompt. */
+    abstract DataFlow::Node getAPrompt();
+  }
+}
+
 /**
  * A data-flow node that logs data.
  *

diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll
@@ -54,6 +54,7 @@ private import semmle.python.frameworks.Multidict
 private import semmle.python.frameworks.Mysql
 private import semmle.python.frameworks.MySQLdb
 private import semmle.python.frameworks.Numpy
+private import semmle.python.frameworks.OpenAI
 private import semmle.python.frameworks.Opml
 private import semmle.python.frameworks.Oracledb
 private import semmle.python.frameworks.Pandas

diff --git a/python/ql/lib/semmle/python/frameworks/OpenAI.qll b/python/ql/lib/semmle/python/frameworks/OpenAI.qll
@@ -0,0 +1,85 @@
+/**
+ * Provides classes modeling security-relevant aspects of the `openAI` Agents SDK package.
+ * See https://github.com/openai/openai-agents-python.
+ * As well as the regular openai python interface.
+ * See https://github.com/openai/openai-python.
+ */
+
+private import python
+private import semmle.python.ApiGraphs
+
+/**
+ * Provides models for agents SDK (instances of the `agents.Runner` class etc).
+ *
+ * See https://github.com/openai/openai-agents-python.
+ */
+module AgentSDK {
+  /** Gets a reference to the `agents.Agent` class. */
+  API::Node classRef() { result = API::moduleImport("agents").getMember("Runner") }
+
+  /** Gets a reference to the `run` members. */
+  API::Node runMembers() { result = classRef().getMember(["run", "run_sync", "run_streamed"]) }
+
+  /** Gets a reference to a potential property of `agents.Runner` called input which can refer to a system prompt depending on the role specified. */
+  API::Node getContentNode() {
+    result = runMembers().getKeywordParameter("input").getASubscript().getSubscript("content")
+    or
+    result = runMembers().getParameter(_).getASubscript().getSubscript("content")
+  }
+}
+
+/**
+ * Provides models for Agent (instances of the `openai.OpenAI` class).
+ *
+ * See https://github.com/openai/openai-python.
+ */
+module OpenAI {
+  /** Gets a reference to the `openai.OpenAI` class. */
+  API::Node classRef() {
+    result =
+      API::moduleImport("openai").getMember(["OpenAI", "AsyncOpenAI", "AzureOpenAI"]).getReturn()
+  }
+
+  /** Gets a reference to a potential property of `openai.OpenAI` called instructions which refers to the system prompt. */
+  API::Node getContentNode() {
+    exists(API::Node content |
+      content =
+        classRef()
+            .getMember("responses")
+            .getMember("create")
+            .getKeywordParameter(["input", "instructions"]) or
+      content =
+        classRef()
+            .getMember("responses")
+            .getMember("create")
+            .getKeywordParameter(["input", "instructions"])
+            .getASubscript()
+            .getSubscript("content") or
+      content =
+        classRef()
+            .getMember("realtime")
+            .getMember("connect")
+            .getReturn()
+            .getMember("conversation")
+            .getMember("item")
+            .getMember("create")
+            .getKeywordParameter("item")
+            .getSubscript("content") or
+      content =
+        classRef()
+            .getMember("chat")
+            .getMember("completions")
+            .getMember("create")
+            .getKeywordParameter("messages")
+            .getASubscript()
+            .getSubscript("content")
+    |
+      // content
+      if not exists(content.getASubscript())
+      then result = content
+      else
+        // content.text
+        result = content.getASubscript().getSubscript("text")
+    )
+  }
+}
@@ -0,0 +1,6 @@
+extensions:
+  - addsTo:
+      pack: codeql/python-all
+      extensible: sinkModel
+    data:
+      - ['agents', 'Member[Agent].Argument[instructions:]', 'prompt-injection']
@@ -0,0 +1,12 @@
+extensions:
+  - addsTo:
+      pack: codeql/python-all
+      extensible: sinkModel
+    data:
+      - ['OpenAI', 'Member[beta].Member[assistants].Member[create].Argument[instructions:]', 'prompt-injection']
+
+  - addsTo:
+      pack: codeql/python-all
+      extensible: typeModel
+    data:
+      - ['OpenAI', 'openai', 'Member[OpenAI,AsyncOpenAI,AzureOpenAI].ReturnValue']
diff --git a/python/ql/lib/semmle/python/security/dataflow/PromptInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PromptInjectionCustomizations.qll
@@ -0,0 +1,64 @@
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "prompt injection"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+
+import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.Concepts
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.BarrierGuards
+private import semmle.python.frameworks.data.ModelsAsData
+private import semmle.python.frameworks.OpenAI
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "prompt injection"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+module PromptInjection {
+  /**
+   * A data flow source for "prompt injection" vulnerabilities.
+   */
+  abstract class Source extends DataFlow::Node { }
+
+  /**
+   * A data flow sink for "prompt injection" vulnerabilities.
+   */
+  abstract class Sink extends DataFlow::Node { }
+
+  /**
+   * A sanitizer for "prompt injection" vulnerabilities.
+   */
+  abstract class Sanitizer extends DataFlow::Node { }
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
+
+  /**
+   * A prompt to an AI model, considered as a flow sink.
+   */
+  class AIPromptAsSink extends Sink {
+    AIPromptAsSink() { this = any(AIPrompt p).getAPrompt() }
+  }
+
+  private class SinkFromModel extends Sink {
+    SinkFromModel() { this = ModelOutput::getASinkNode("prompt-injection").asSink() }
+  }
+
+  private class PromptContentSink extends Sink {
+    PromptContentSink() {
+      this = OpenAI::getContentNode().asSink()
+      or
+      this = AgentSDK::getContentNode().asSink()
+    }
+  }
+
+  /**
+   * A comparison with a constant, considered as a sanitizer-guard.
+   */
+  class ConstCompareAsSanitizerGuard extends Sanitizer, ConstCompareBarrier { }
+}
diff --git a/python/ql/lib/semmle/python/security/dataflow/PromptInjectionQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PromptInjectionQuery.qll
@@ -0,0 +1,25 @@
+/**
+ * Provides a taint-tracking configuration for detecting "prompt injection" vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if
+ * `PromptInjection::Configuration` is needed, otherwise
+ * `PromptInjectionCustomizations` should be imported instead.
+ */
+
+private import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import PromptInjectionCustomizations::PromptInjection
+
+private module PromptInjectionConfig implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node node) { node instanceof Source }
+
+  predicate isSink(DataFlow::Node node) { node instanceof Sink }
+
+  predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
+
+  predicate observeDiffInformedIncrementalMode() { any() }
+}
+
+/** Global taint-tracking for detecting "prompt injection" vulnerabilities. */
+module PromptInjectionFlow = TaintTracking::Global<PromptInjectionConfig>;
@@ -0,0 +1,24 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>Prompts can be constructed to bypass the original purposes of an agent and lead to sensitive data leak or 
+operations that were not intended.</p>
+</overview>
+
+<recommendation>
+<p>Sanitize user input and also avoid using user input in developer or system level prompts.</p>
+</recommendation>
+
+<example>
+<p>In the following examples, the cases marked GOOD show secure prompt construction; whereas in the case marked BAD they may be susceptible to prompt injection.</p>
+<sample src="examples/example.py" />
+</example>
+
+<references>
+<li>OpenAI: <a href="https://openai.github.io/openai-guardrails-python">Guardrails</a>.</li>
+</references>
+
+</qhelp>
@@ -0,0 +1,20 @@
+/**
+ * @name Prompt injection
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 5.0
+ * @precision high
+ * @id py/prompt-injection
+ * @tags security
+ *       experimental
+ *       external/cwe/cwe-1427
+ */
+
+import python
+import semmle.python.security.dataflow.PromptInjectionQuery
+import PromptInjectionFlow::PathGraph
+
+from PromptInjectionFlow::PathNode source, PromptInjectionFlow::PathNode sink
+where PromptInjectionFlow::flowPath(source, sink)
+select sink.getNode(), source, sink, "This prompt construction depends on a $@.", source.getNode(),
+  "user-provided value"
@@ -0,0 +1,17 @@
+from flask import Flask, request
+from agents import Agent
+from guardrails import GuardrailAgent
+
+@app.route("/parameter-route")
+def get_input():
+    input = request.args.get("input")
+
+    goodAgent = GuardrailAgent(  # GOOD: Agent created with guardrails automatically configured.
+        config=Path("guardrails_config.json"),
+        name="Assistant",
+        instructions="This prompt is customized for " + input)
+
+    badAgent = Agent(
+        name="Assistant",
+        instructions="This prompt is customized for " + input  # BAD: user input in agent instruction.
+    )