@@ -11,21 +11,35 @@ private import semmle.python.ApiGraphs
1111 */
1212predicate defaultTaintSanitizer ( DataFlow:: Node node ) { none ( ) }
1313
14+ /**
15+ * Holds if default taint tracking should read content `contentSet` implicitly and
16+ * propagate taint from a container to reads of that content.
17+ */
18+ private predicate defaultTaintReadContent ( DataFlow:: ContentSet contentSet ) {
19+ // Tuple and dictionary content is precise, so use wildcard content sets to avoid
20+ // blowing up the size of `Stage1::readSetEx` (otherwise this predicate would
21+ // expand to one row per (node, distinct key or index) and the framework's
22+ // read-set relation grows quadratically). `ContentSet.getAReadContent` expands
23+ // these wildcards back to the specific contents when matching against stores.
24+ contentSet .isAnyTupleElement ( )
25+ or
26+ contentSet .isAnyDictionaryElement ( )
27+ or
28+ // List and set element content is already imprecise, so no wildcard expansion is
29+ // needed.
30+ contentSet .getAStoreContent ( ) instanceof DataFlow:: ListElementContent
31+ or
32+ contentSet .getAStoreContent ( ) instanceof DataFlow:: SetElementContent
33+ }
34+
1435/**
1536 * Holds if default `TaintTracking::Configuration`s should allow implicit reads
1637 * of `c` at sinks and inputs to additional taint steps.
1738 */
1839bindingset [ node]
1940predicate defaultImplicitTaintRead ( DataFlow:: Node node , DataFlow:: ContentSet c ) {
20- // We allow implicit reads of precise content; imprecise content has already
21- // bubbled up. We use the wildcard content sets here rather than the
22- // per-key/per-index ones to avoid blowing up the size of `Stage1::readSetEx`
23- // (otherwise this predicate would expand to one row per (node, distinct key
24- // or index) and the framework's read-set relation grows quadratically).
25- // `ContentSet.getAReadContent` expands these wildcards back to the specific
26- // contents when matching against stores.
2741 exists ( node ) and
28- ( c . isAnyTupleElement ( ) or c . isAnyDictionaryElement ( ) )
42+ defaultTaintReadContent ( c )
2943}
3044
3145private module Cached {
@@ -171,28 +185,15 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
171185}
172186
173187/**
174- * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
175- * (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
176- * is currently very imprecise, as an example, since we model `dict.get`, we treat any
177- * `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
188+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to reading
189+ * content from containers (lists/sets/dictionaries/tuples): subscripts, iteration,
190+ * constructor invocation, methods.
178191 */
179192predicate containerStep ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
180- // construction by literal
181- //
182- // TODO: once we have proper flow-summary modeling, we might not need this step any
183- // longer -- but there needs to be a matching read-step for the store-step, and we
184- // don't provide that right now.
185- DataFlowPrivate:: listStoreStep ( nodeFrom , _, nodeTo )
186- or
187- DataFlowPrivate:: setStoreStep ( nodeFrom , _, nodeTo )
188- or
189- // comprehension, so there is taint-flow from `x` in `[x for x in xs]` to the
190- // resulting list of the list-comprehension.
191- //
192- // TODO: once we have proper flow-summary modeling, we might not need this step any
193- // longer -- but there needs to be a matching read-step for the store-step, and we
194- // don't provide that right now.
195- DataFlowPrivate:: yieldStoreStep ( nodeFrom , _, nodeTo )
193+ exists ( DataFlow:: ContentSet contentSet |
194+ DataFlowPrivate:: readStep ( nodeFrom , contentSet , nodeTo ) and
195+ defaultTaintReadContent ( contentSet )
196+ )
196197}
197198
198199/**
0 commit comments