Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ ql/python/ql/src/Metrics/NumberOfStatements.ql
ql/python/ql/src/Metrics/TransitiveImports.ql
ql/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.ql
ql/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.ql
ql/python/ql/src/Security/CWE-1427/UserPromptInjection.ql
ql/python/ql/src/Security/CWE-798/HardcodedCredentials.ql
ql/python/ql/src/Statements/C_StyleParentheses.ql
ql/python/ql/src/Statements/DocStrings.ql
Expand Down Expand Up @@ -87,7 +88,6 @@ ql/python/ql/src/experimental/Security/CWE-079/EmailXss.ql
ql/python/ql/src/experimental/Security/CWE-091/XsltInjection.ql
ql/python/ql/src/experimental/Security/CWE-094/Js2Py.ql
ql/python/ql/src/experimental/Security/CWE-1236/CsvInjection.ql
ql/python/ql/src/experimental/Security/CWE-1427/PromptInjection.ql
ql/python/ql/src/experimental/Security/CWE-176/UnicodeBypassValidation.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/PossibleTimingAttackAgainstHash.ql
ql/python/ql/src/experimental/Security/CWE-208/TimingAttackAgainstHash/TimingAttackAgainstHash.ql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ ql/python/ql/src/Security/CWE-1004/NonHttpOnlyCookie.ql
ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-1427/SystemPromptInjection.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-117/LogInjection.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-1427/SystemPromptInjection.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ ql/python/ql/src/Security/CWE-113/HeaderInjection.ql
ql/python/ql/src/Security/CWE-116/BadTagFilter.ql
ql/python/ql/src/Security/CWE-117/LogInjection.ql
ql/python/ql/src/Security/CWE-1275/SameSiteNoneCookie.ql
ql/python/ql/src/Security/CWE-1427/SystemPromptInjection.ql
ql/python/ql/src/Security/CWE-209/StackTraceExposure.ql
ql/python/ql/src/Security/CWE-215/FlaskDebug.ql
ql/python/ql/src/Security/CWE-285/PamAuthorization.ql
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added prompt-injection sink models (`system-prompt-injection` and `user-prompt-injection` kinds) for the `openai`, `agents`, `anthropic`, `google-genai`, `openrouter` and `langchain` frameworks.
25 changes: 25 additions & 0 deletions python/ql/lib/semmle/python/Concepts.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1794,3 +1794,28 @@ module Cryptography {

import ConceptsShared::Cryptography
}

/**
* A data-flow node that prompts an AI model.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `AIPrompt::Range` instead.
*/
class AIPrompt extends DataFlow::Node instanceof AIPrompt::Range {
/** Gets an input that is used as AI prompt. */
DataFlow::Node getAPrompt() { result = super.getAPrompt() }
}

/** Provides a class for modeling new AI prompting mechanisms. */
module AIPrompt {
/**
* A data-flow node that prompts an AI model.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `AIPrompt` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is used as AI prompt. */
abstract DataFlow::Node getAPrompt();
}
}
58 changes: 58 additions & 0 deletions python/ql/lib/semmle/python/frameworks/Anthropic.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/**
* Provides classes modeling security-relevant aspects of the `anthropic` package.
* See https://github.com/anthropics/anthropic-sdk-python.
*
* Structurally typed sinks (the `system` field) are modeled via Models as Data:
* python/ql/lib/semmle/python/frameworks/anthropic.model.yml
*
* This file retains only role-filtered message sinks that require inspecting a
* sibling `role` key, which MaD cannot express.
*/

private import python
private import semmle.python.ApiGraphs

/** Provides classes modeling prompt-injection sinks of the `anthropic` package. */
module Anthropic {
/** Gets a reference to an `anthropic.Anthropic` client instance. */
private API::Node classRef() {
result = API::moduleImport("anthropic").getMember(["Anthropic", "AsyncAnthropic"]).getReturn()
}

/** Gets the message dictionaries passed to `messages.create`/`messages.stream` (stable and beta). */
private API::Node messageElement() {
exists(API::Node create |
create = classRef().getMember("messages").getMember(["create", "stream"])
or
create = classRef().getMember("beta").getMember("messages").getMember(["create", "stream"])
|
result = create.getKeywordParameter("messages").getASubscript()
)
}

/**
* Gets role-filtered system/assistant message content sinks that MaD cannot express.
*/
API::Node getSystemOrAssistantPromptNode() {
exists(API::Node msg |
msg = messageElement() and
msg.getSubscript("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
["system", "assistant"]
|
result = msg.getSubscript("content")
)
}

/**
* Gets role-filtered user message content sinks that MaD cannot express.
*/
API::Node getUserPromptNode() {
exists(API::Node msg |
msg = messageElement() and
not msg.getSubscript("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
["system", "assistant"]
|
result = msg.getSubscript("content")
)
}
}
58 changes: 58 additions & 0 deletions python/ql/lib/semmle/python/frameworks/GoogleGenAI.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/**
* Provides classes modeling security-relevant aspects of the `google-genai` package.
* See https://github.com/googleapis/python-genai.
*
* Structurally typed sinks (`system_instruction`, `contents`, etc.) are modeled via
* Models as Data: python/ql/lib/semmle/python/frameworks/google-genai.model.yml
*
* This file retains only role-filtered content sinks that require inspecting a
* sibling `role` key, which MaD cannot express.
*/

private import python
private import semmle.python.ApiGraphs

/** Provides classes modeling prompt-injection sinks of the `google-genai` package. */
module GoogleGenAI {
/** Gets a reference to a `google.genai.Client` instance. */
private API::Node clientRef() {
result = API::moduleImport("google.genai").getMember("Client").getReturn()
}

/** Gets the content dictionaries passed to `models.generate_content`/`generate_content_stream`. */
private API::Node contentElement() {
result =
clientRef()
.getMember("models")
.getMember(["generate_content", "generate_content_stream"])
.getKeywordParameter("contents")
.getASubscript()
}

/**
* Gets role-filtered system/model content sinks that MaD cannot express.
* Gemini uses the "model" role instead of "assistant".
*/
API::Node getSystemOrAssistantPromptNode() {
exists(API::Node msg |
msg = contentElement() and
msg.getSubscript("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
["system", "model"]
|
result = msg.getSubscript("parts").getASubscript().getSubscript("text")
)
}

/**
* Gets role-filtered user content sinks that MaD cannot express.
*/
API::Node getUserPromptNode() {
exists(API::Node msg |
msg = contentElement() and
not msg.getSubscript("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
["system", "model"]
|
result = msg.getSubscript("parts").getASubscript().getSubscript("text")
)
}
}
161 changes: 161 additions & 0 deletions python/ql/lib/semmle/python/frameworks/OpenAI.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/**
* Provides classes modeling security-relevant aspects of the `openai` Agents SDK package.
* See https://github.com/openai/openai-agents-python.
* As well as the regular openai python interface.
* See https://github.com/openai/openai-python.
*
* Structurally typed sinks (instructions, prompt, input, etc.) are modeled via
* Models as Data: python/ql/lib/semmle/python/frameworks/openai.model.yml and
* python/ql/lib/semmle/python/frameworks/agent.model.yml
*
* This file retains only role-filtered message sinks that require inspecting a
* sibling `role` key, which MaD cannot express.
*/

private import python
private import semmle.python.ApiGraphs

/** Holds if `msg` is a message dictionary with a privileged (system/developer/assistant) role. */
private predicate isSystemOrDevMessage(API::Node msg) {
msg.getSubscript("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
["system", "developer", "assistant"]
}

/**
* Provides models for the agents SDK (instances of the `agents.Runner` class etc).
*
* See https://github.com/openai/openai-agents-python.
*/
module AgentSdk {
/** Gets a reference to the `agents.Runner` class. */
API::Node classRef() { result = API::moduleImport("agents").getMember("Runner") }

/** Gets a reference to the `run` members. */
API::Node runMembers() { result = classRef().getMember(["run", "run_sync", "run_streamed"]) }

/** Gets a reference to the `input` argument of a `Runner.run` call. */
private API::Node runInput() {
result = runMembers().getKeywordParameter("input")
or
result = runMembers().getParameter(1)
}

/**
* Gets role-filtered system/developer/assistant message content sinks that
* MaD cannot express.
*/
API::Node getSystemOrAssistantPromptNode() {
exists(API::Node msg |
msg = runInput().getASubscript() and
isSystemOrDevMessage(msg)
|
result = msg.getSubscript("content")
)
}

/**
* Gets role-filtered user message content sinks that MaD cannot express.
* The string-input case is handled via MaD (agent.model.yml).
*/
API::Node getUserPromptNode() {
exists(API::Node msg |
msg = runInput().getASubscript() and
not isSystemOrDevMessage(msg)
|
result = msg.getSubscript("content")
)
}
}

/**
* Provides models for the OpenAI client (instances of the `openai.OpenAI` class).
*
* See https://github.com/openai/openai-python.
*/
module OpenAI {
/** Gets a reference to an `openai.OpenAI` client instance. */
API::Node classRef() {
result =
API::moduleImport("openai").getMember(["OpenAI", "AsyncOpenAI", "AzureOpenAI"]).getReturn()
}

/** Gets the message dictionaries passed to `chat.completions.create`. */
private API::Node chatMessage() {
result =
classRef()
.getMember("chat")
.getMember("completions")
.getMember("create")
.getKeywordParameter("messages")
.getASubscript()
}

/** Gets the message dictionaries passed as a list to `responses.create`. */
private API::Node responsesMessage() {
result =
classRef().getMember("responses").getMember("create").getKeywordParameter("input").getASubscript()
}

/** Gets the content sink of a message dictionary, including the `text` of structured content. */
private API::Node messageContent(API::Node msg) {
result = msg.getSubscript("content")
or
result = msg.getSubscript("content").getASubscript().getSubscript("text")
}

/** Gets the `beta.threads.messages.create` call (Assistants API thread messages). */
private API::Node threadMessageCreate() {
result =
classRef().getMember("beta").getMember("threads").getMember("messages").getMember("create")
}

/** Holds if the `role` keyword of thread-message `call` is a privileged (assistant) role. */
private predicate threadRoleIsAssistant(API::Node call) {
call.getKeywordParameter("role").getAValueReachingSink().asExpr().(StringLiteral).getText() =
"assistant"
}

/**
* Gets role-filtered system/developer/assistant message content sinks that
* MaD cannot express.
*/
API::Node getSystemOrAssistantPromptNode() {
exists(API::Node msg | msg = [chatMessage(), responsesMessage()] and isSystemOrDevMessage(msg) |
result = messageContent(msg)
)
or
exists(API::Node call | call = threadMessageCreate() and threadRoleIsAssistant(call) |
result = call.getKeywordParameter("content")
)
}

/**
* Gets role-filtered user message content sinks that MaD cannot express.
* The string-input case is handled via MaD (openai.model.yml).
*/
API::Node getUserPromptNode() {
exists(API::Node msg |
msg = [chatMessage(), responsesMessage()] and not isSystemOrDevMessage(msg)
|
result = messageContent(msg)
)
or
exists(API::Node call | call = threadMessageCreate() and not threadRoleIsAssistant(call) |
result = call.getKeywordParameter("content")
)
or
// realtime conversation items, role cannot be statically resolved in general
result =
classRef()
.getMember("realtime")
.getMember("connect")
.getReturn()
.getMember("conversation")
.getMember("item")
.getMember("create")
.getKeywordParameter("item")
.getSubscript("content")
.getASubscript()
.getSubscript("text")
}
}
Loading
Loading