From 384e17a4ef52b2b6ebff9cdc5e96ad7d1a40cfbf Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 1 Dec 2025 16:24:59 +0000 Subject: [PATCH 1/4] Implement websockets models --- python/ql/lib/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/Websockets.qll | 83 +++++++++++++++++++ .../websockets/ConceptsTest.expected | 0 .../frameworks/websockets/ConceptsTest.ql | 2 + .../websockets/InlineTaintTest.expected | 3 + .../frameworks/websockets/InlineTaintTest.ql | 2 + .../frameworks/websockets/response_test.py | 46 ++++++++++ .../websockets/taint_test_asyncio.py | 30 +++++++ .../frameworks/websockets/taint_test_sync.py | 26 ++++++ 9 files changed, 193 insertions(+) create mode 100644 python/ql/lib/semmle/python/frameworks/Websockets.qll create mode 100644 python/ql/test/library-tests/frameworks/websockets/ConceptsTest.expected create mode 100644 python/ql/test/library-tests/frameworks/websockets/ConceptsTest.ql create mode 100644 python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.expected create mode 100644 python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.ql create mode 100644 python/ql/test/library-tests/frameworks/websockets/response_test.py create mode 100644 python/ql/test/library-tests/frameworks/websockets/taint_test_asyncio.py create mode 100644 python/ql/test/library-tests/frameworks/websockets/taint_test_sync.py diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index 955385141f7f..4906d15ea007 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -89,6 +89,7 @@ private import semmle.python.frameworks.TRender private import semmle.python.frameworks.Twisted private import semmle.python.frameworks.Ujson private import semmle.python.frameworks.Urllib3 +private import semmle.python.frameworks.Websockets private import semmle.python.frameworks.Xmltodict private import semmle.python.frameworks.Yaml private import semmle.python.frameworks.Yarl diff --git a/python/ql/lib/semmle/python/frameworks/Websockets.qll b/python/ql/lib/semmle/python/frameworks/Websockets.qll new file mode 100644 index 000000000000..88a3e701eea1 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Websockets.qll @@ -0,0 +1,83 @@ +/** + * Provides definitions and modeling for the `websockets` PyPI package. + * + * See https://websockets.readthedocs.io/en/stable/ + */ + +private import python +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.Concepts +private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.PoorMansFunctionResolution +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper + +/** + * Provides models for the `websockets` PyPI package. + * See https://websockets.readthedocs.io/en/stable/ + */ +module Websockets { + private class HandlerArg extends DataFlow::Node { + HandlerArg() { + exists(DataFlow::CallCfgNode c | + c = + API::moduleImport("websockets") + .getMember(["asyncio", "sync"]) + .getMember("server") + .getMember(["serve", "unix_serve"]) + .getACall() + | + (this = c.getArg(0) or this = c.getArgByName("handler")) + ) + } + } + + /** A websocket handler that is passed to `serve`. */ + // TODO: handlers defined via route maps, e.g. through `websockets.asyncio.router.route`, are more complex to handle. + class WebSocketHandler extends Http::Server::RequestHandler::Range { + WebSocketHandler() { poorMansFunctionTracker(this) = any(HandlerArg a) } + + override Parameter getARoutedParameter() { result = this.getAnArg() } + + override string getFramework() { result = "websockets" } + } + + module ServerConnection { + /** + * A source of instances of `websockets.asyncio.ServerConnection` and `websockets.threading.ServerConnection`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `WebSocket::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.threading.ServerConnection`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.threading.ServerConnection`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.threading.ServerConnection`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + private class HandlerParam extends DataFlow::Node, InstanceSource { + HandlerParam() { exists(WebSocketHandler h | this = DataFlow::parameterNode(h.getArg(0))) } + } + + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "websockets.asyncio.ServerConnection" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getAsyncMethodName() { result = ["recv", "recv_streaming"] } + + override string getMethodName() { result = ["recv", "recv_streaming"] } + } + } +} diff --git a/python/ql/test/library-tests/frameworks/websockets/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/websockets/ConceptsTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/frameworks/websockets/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/websockets/ConceptsTest.ql new file mode 100644 index 000000000000..b557a0bccb69 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.expected b/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.expected new file mode 100644 index 000000000000..020c338fd192 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +testFailures diff --git a/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.ql b/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.ql new file mode 100644 index 000000000000..8524da5fe7db --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/InlineTaintTest.ql @@ -0,0 +1,2 @@ +import experimental.meta.InlineTaintTest +import MakeInlineTaintTest diff --git a/python/ql/test/library-tests/frameworks/websockets/response_test.py b/python/ql/test/library-tests/frameworks/websockets/response_test.py new file mode 100644 index 000000000000..a4a0264e2c26 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/response_test.py @@ -0,0 +1,46 @@ +import websockets.sync.server +import websockets.sync.router +from werkzeug.routing import Map, Rule + +def arg_handler(websocket): # $ requestHandler routedParameter=websocket + websocket.send("arg" + websocket.recv()) + +s1 = websockets.sync.server.serve(arg_handler, "localhost", 8000) + +def kw_handler(websocket): # $ requestHandler routedParameter=websocket + websocket.send("kw" + websocket.recv()) + +s2 = websockets.sync.server.serve(handler=kw_handler, host="localhost", port=8001) + +def route_handler(websocket, x): # $ MISSING: requestHandler routedParameter=websocket routedParameter=x + websocket.send(f"route {x} {websocket.recv()}") + +s3 = websockets.sync.router.route(Map([ + Rule("/", endpoint=route_handler) +]), "localhost", 8002) + +def unix_handler(websocket): # $ requestHandler routedParameter=websocket + websocket.send("unix" + websocket.recv()) + +s4 = websockets.sync.server.unix_serve(unix_handler, path="/tmp/ws.sock") + +def unix_route_handler(websocket, x): # $ MISSING: requestHandler routedParameter=websocket routedParameter=x + websocket.send(f"unix route {x} {websocket.recv()}") + +s5 = websockets.sync.router.unix_route(Map([ + Rule("/", endpoint=unix_route_handler) +]), path="/tmp/ws2.sock") + +if __name__ == "__main__": + import sys + server = s1 + if len(sys.argv) > 1: + if sys.argv[1] == "kw": + server = s2 + elif sys.argv[1] == "route": + server = s3 + elif sys.argv[1] == "unix": + server = s4 + elif sys.argv[1] == "unix_route": + server = s5 + server.serve_forever() \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/websockets/taint_test_asyncio.py b/python/ql/test/library-tests/frameworks/websockets/taint_test_asyncio.py new file mode 100644 index 000000000000..5c7904b4193f --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/taint_test_asyncio.py @@ -0,0 +1,30 @@ +import websockets.asyncio.server +import asyncio + +def ensure_tainted(*args): + print("tainted", args) + +def ensure_not_tainted(*args): + print("not tainted", args) + +async def handler(websocket): # $ requestHandler routedParameter=websocket + ensure_tainted( + websocket, # $ tainted + await websocket.recv() # $ tainted + ) + + async for msg in websocket: + ensure_tainted(msg) # $ tainted + await websocket.send(msg) + + async for msg in websocket.recv_streaming(): + ensure_tainted(msg) # $ tainted + await websocket.send(msg) + + +async def main(): + server = await websockets.asyncio.server.serve(handler, "localhost", 8000) + await server.serve_forever() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/python/ql/test/library-tests/frameworks/websockets/taint_test_sync.py b/python/ql/test/library-tests/frameworks/websockets/taint_test_sync.py new file mode 100644 index 000000000000..d7aedae3f65d --- /dev/null +++ b/python/ql/test/library-tests/frameworks/websockets/taint_test_sync.py @@ -0,0 +1,26 @@ +import websockets.sync.server + +def ensure_tainted(*args): + print("tainted", args) + +def ensure_not_tainted(*args): + print("not tainted", args) + +def handler(websocket): # $ requestHandler routedParameter=websocket + ensure_tainted( + websocket, # $ tainted + websocket.recv() # $ tainted + ) + + for msg in websocket: + ensure_tainted(msg) # $ tainted + websocket.send(msg) + + for msg in websocket.recv_streaming(): + ensure_tainted(msg) # $ tainted + websocket.send(msg) + + +if __name__ == "__main__": + server = websockets.sync.server.serve(handler, "localhost", 8000) + server.serve_forever() From 6a1e26c566b6d6cfb7ad72df219de9459ec83388 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 1 Dec 2025 20:06:24 +0000 Subject: [PATCH 2/4] Add change note --- python/ql/lib/change-notes/2025-12-01-websockets.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-12-01-websockets.md diff --git a/python/ql/lib/change-notes/2025-12-01-websockets.md b/python/ql/lib/change-notes/2025-12-01-websockets.md new file mode 100644 index 000000000000..6b4db223d7b3 --- /dev/null +++ b/python/ql/lib/change-notes/2025-12-01-websockets.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Remote flow sources for the `websockets` package have been modeled. \ No newline at end of file From 6fbae45d49dcab6a96aee173d2e97f31e313f309 Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 1 Dec 2025 20:14:36 +0000 Subject: [PATCH 3/4] Update qldoc --- python/ql/lib/semmle/python/frameworks/Websockets.qll | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Websockets.qll b/python/ql/lib/semmle/python/frameworks/Websockets.qll index 88a3e701eea1..6690a6ec0368 100644 --- a/python/ql/lib/semmle/python/frameworks/Websockets.qll +++ b/python/ql/lib/semmle/python/frameworks/Websockets.qll @@ -41,19 +41,20 @@ module Websockets { override string getFramework() { result = "websockets" } } + /** Provides taint models for instances of `ServerConnection` objects passed to websocket handlers. */ module ServerConnection { /** - * A source of instances of `websockets.asyncio.ServerConnection` and `websockets.threading.ServerConnection`, extend this class to model new instances. + * A source of instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`, extend this class to model new instances. * * This can include instantiations of the class, return values from function * calls, or a special parameter that will be set when functions are called by an external * library. * - * Use the predicate `WebSocket::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.threading.ServerConnection`. + * Use the predicate `WebSocket::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`. */ abstract class InstanceSource extends DataFlow::LocalSourceNode { } - /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.threading.ServerConnection`. */ + /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */ private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { t.start() and result instanceof InstanceSource @@ -61,7 +62,7 @@ module Websockets { exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) } - /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.threading.ServerConnection`. */ + /** Gets a reference to an instance of `websockets.asyncio.ServerConnection` or `websockets.sync.ServerConnection`. */ DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } private class HandlerParam extends DataFlow::Node, InstanceSource { From ac55cf95442fa99cbc52aa02f9e0a08b8e81ef0a Mon Sep 17 00:00:00 2001 From: Joe Farebrother Date: Mon, 1 Dec 2025 20:41:59 +0000 Subject: [PATCH 4/4] Update test and qldoc --- python/ql/lib/semmle/python/frameworks/Websockets.qll | 2 +- .../frameworks/websockets/response_test.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Websockets.qll b/python/ql/lib/semmle/python/frameworks/Websockets.qll index 6690a6ec0368..b4300fc59807 100644 --- a/python/ql/lib/semmle/python/frameworks/Websockets.qll +++ b/python/ql/lib/semmle/python/frameworks/Websockets.qll @@ -50,7 +50,7 @@ module Websockets { * calls, or a special parameter that will be set when functions are called by an external * library. * - * Use the predicate `WebSocket::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`. + * Use the predicate `ServerConnection::instance()` to get references to instances of `websockets.asyncio.ServerConnection` and `websockets.sync.ServerConnection`. */ abstract class InstanceSource extends DataFlow::LocalSourceNode { } diff --git a/python/ql/test/library-tests/frameworks/websockets/response_test.py b/python/ql/test/library-tests/frameworks/websockets/response_test.py index a4a0264e2c26..d7ad630d9cfe 100644 --- a/python/ql/test/library-tests/frameworks/websockets/response_test.py +++ b/python/ql/test/library-tests/frameworks/websockets/response_test.py @@ -34,13 +34,14 @@ def unix_route_handler(websocket, x): # $ MISSING: requestHandler routedParamet if __name__ == "__main__": import sys server = s1 - if len(sys.argv) > 1: - if sys.argv[1] == "kw": + args = sys.argv # $ threatModelSource[commandargs]=sys.argv + if len(args) > 1: + if args[1] == "kw": server = s2 - elif sys.argv[1] == "route": + elif args[1] == "route": server = s3 - elif sys.argv[1] == "unix": + elif args[1] == "unix": server = s4 - elif sys.argv[1] == "unix_route": + elif args[1] == "unix_route": server = s5 server.serve_forever() \ No newline at end of file