Skip to content

Commit 80c6f08

Browse files
committed
Fix TODO in containerStep
1 parent ec13e1b commit 80c6f08

1 file changed

Lines changed: 29 additions & 28 deletions

File tree

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,35 @@ private import semmle.python.ApiGraphs
1111
*/
1212
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
1313

14+
/**
15+
* Holds if default taint tracking should read content `contentSet` implicitly and
16+
* propagate taint from a container to reads of that content.
17+
*/
18+
private predicate defaultTaintReadContent(DataFlow::ContentSet contentSet) {
19+
// Tuple and dictionary content is precise, so use wildcard content sets to avoid
20+
// blowing up the size of `Stage1::readSetEx` (otherwise this predicate would
21+
// expand to one row per (node, distinct key or index) and the framework's
22+
// read-set relation grows quadratically). `ContentSet.getAReadContent` expands
23+
// these wildcards back to the specific contents when matching against stores.
24+
contentSet.isAnyTupleElement()
25+
or
26+
contentSet.isAnyDictionaryElement()
27+
or
28+
// List and set element content is already imprecise, so no wildcard expansion is
29+
// needed.
30+
contentSet.getAStoreContent() instanceof DataFlow::ListElementContent
31+
or
32+
contentSet.getAStoreContent() instanceof DataFlow::SetElementContent
33+
}
34+
1435
/**
1536
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
1637
* of `c` at sinks and inputs to additional taint steps.
1738
*/
1839
bindingset[node]
1940
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
20-
// We allow implicit reads of precise content; imprecise content has already
21-
// bubbled up. We use the wildcard content sets here rather than the
22-
// per-key/per-index ones to avoid blowing up the size of `Stage1::readSetEx`
23-
// (otherwise this predicate would expand to one row per (node, distinct key
24-
// or index) and the framework's read-set relation grows quadratically).
25-
// `ContentSet.getAReadContent` expands these wildcards back to the specific
26-
// contents when matching against stores.
2741
exists(node) and
28-
(c.isAnyTupleElement() or c.isAnyDictionaryElement())
42+
defaultTaintReadContent(c)
2943
}
3044

3145
private module Cached {
@@ -171,28 +185,15 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
171185
}
172186

173187
/**
174-
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
175-
* (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
176-
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
177-
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
188+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to reading
189+
* content from containers (lists/sets/dictionaries/tuples): subscripts, iteration,
190+
* constructor invocation, methods.
178191
*/
179192
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
180-
// construction by literal
181-
//
182-
// TODO: once we have proper flow-summary modeling, we might not need this step any
183-
// longer -- but there needs to be a matching read-step for the store-step, and we
184-
// don't provide that right now.
185-
DataFlowPrivate::listStoreStep(nodeFrom, _, nodeTo)
186-
or
187-
DataFlowPrivate::setStoreStep(nodeFrom, _, nodeTo)
188-
or
189-
// comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
190-
// resulting list of the list-comprehension.
191-
//
192-
// TODO: once we have proper flow-summary modeling, we might not need this step any
193-
// longer -- but there needs to be a matching read-step for the store-step, and we
194-
// don't provide that right now.
195-
DataFlowPrivate::yieldStoreStep(nodeFrom, _, nodeTo)
193+
exists(DataFlow::ContentSet contentSet |
194+
DataFlowPrivate::readStep(nodeFrom, contentSet, nodeTo) and
195+
defaultTaintReadContent(contentSet)
196+
)
196197
}
197198

198199
/**

0 commit comments

Comments
 (0)