Skip to content

Commit 21573d3

Browse files
authored
Merge pull request #966 from rdmarsh2/rdmarsh/cpp/ir-taint-tracking
C++: IR-based taint tracking
2 parents 7649e87 + 07cbbda commit 21573d3

13 files changed

Lines changed: 395 additions & 78 deletions

File tree

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/**
2+
* Provides classes for performing local (intra-procedural) and
3+
* global (inter-procedural) taint-tracking analyses.
4+
*
5+
* We define _taint propagation_ informally to mean that a substantial part of
6+
* the information from the source is preserved at the sink. For example, taint
7+
* propagates from `x` to `x + 100`, but it does not propagate from `x` to `x >
8+
* 100` since we consider a single bit of information to be too little.
9+
*/
10+
11+
import semmle.code.cpp.ir.dataflow.DataFlow
12+
import semmle.code.cpp.ir.dataflow.DataFlow2
13+
private import semmle.code.cpp.ir.IR
14+
15+
module TaintTracking {
16+
/**
17+
* A configuration of interprocedural taint tracking analysis. This defines
18+
* sources, sinks, and any other configurable aspect of the analysis. Each
19+
* use of the taint tracking library must define its own unique extension of
20+
* this abstract class.
21+
*
22+
* A taint-tracking configuration is a special data flow configuration
23+
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
24+
* necessarily preserve values but are still relevant from a taint-tracking
25+
* perspective. (For example, string concatenation, where one of the operands
26+
* is tainted.)
27+
*
28+
* To create a configuration, extend this class with a subclass whose
29+
* characteristic predicate is a unique singleton string. For example, write
30+
*
31+
* ```
32+
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
33+
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
34+
* // Override `isSource` and `isSink`.
35+
* // Optionally override `isSanitizer`.
36+
* // Optionally override `isAdditionalTaintStep`.
37+
* }
38+
* ```
39+
*
40+
* Then, to query whether there is flow between some `source` and `sink`,
41+
* write
42+
*
43+
* ```
44+
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
45+
* ```
46+
*
47+
* Multiple configurations can coexist, but it is unsupported to depend on a
48+
* `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
49+
* overridden predicates that define sources, sinks, or additional steps.
50+
* Instead, the dependency should go to a `TaintTracking::Configuration2` or
51+
* a `DataFlow{2,3,4}::Configuration`.
52+
*/
53+
abstract class Configuration extends DataFlow::Configuration {
54+
bindingset[this]
55+
Configuration() { any() }
56+
57+
/** Holds if `source` is a taint source. */
58+
// overridden to provide taint-tracking specific qldoc
59+
abstract override predicate isSource(DataFlow::Node source);
60+
61+
/** Holds if `sink` is a taint sink. */
62+
// overridden to provide taint-tracking specific qldoc
63+
abstract override predicate isSink(DataFlow::Node sink);
64+
65+
/**
66+
* Holds if taint should not flow into `node`.
67+
*/
68+
predicate isSanitizer(DataFlow::Node node) { none() }
69+
70+
/**
71+
* Holds if the additional taint propagation step
72+
* from `source` to `target` must be taken into account in the analysis.
73+
* This step will only be followed if `target` is not in the `isSanitizer`
74+
* predicate.
75+
*/
76+
predicate isAdditionalTaintStep(DataFlow::Node source, DataFlow::Node target) { none() }
77+
78+
final override predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
79+
80+
final override predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
81+
this.isAdditionalTaintStep(source, target)
82+
or
83+
localTaintStep(source, target)
84+
}
85+
}
86+
87+
/**
88+
* A taint-tracking configuration that is backed by the `DataFlow2` library
89+
* instead of `DataFlow`. Use this class when taint-tracking configurations
90+
* or data-flow configurations must depend on each other.
91+
*
92+
* See `TaintTracking::Configuration` for the full documentation.
93+
*/
94+
abstract class Configuration2 extends DataFlow2::Configuration {
95+
bindingset[this]
96+
Configuration2() { any() }
97+
98+
/** Holds if `source` is a taint source. */
99+
// overridden to provide taint-tracking specific qldoc
100+
abstract override predicate isSource(DataFlow::Node source);
101+
102+
/** Holds if `sink` is a taint sink. */
103+
// overridden to provide taint-tracking specific qldoc
104+
abstract override predicate isSink(DataFlow::Node sink);
105+
106+
/**
107+
* Holds if taint should not flow into `node`.
108+
*/
109+
predicate isSanitizer(DataFlow::Node node) { none() }
110+
111+
/**
112+
* Holds if the additional taint propagation step
113+
* from `source` to `target` must be taken into account in the analysis.
114+
* This step will only be followed if `target` is not in the `isSanitizer`
115+
* predicate.
116+
*/
117+
predicate isAdditionalTaintStep(DataFlow::Node source, DataFlow::Node target) { none() }
118+
119+
final override predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
120+
121+
final override predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
122+
this.isAdditionalTaintStep(source, target)
123+
or
124+
localTaintStep(source, target)
125+
}
126+
}
127+
128+
/**
129+
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
130+
* (intra-procedural) step.
131+
*/
132+
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
133+
// Taint can flow into using ordinary data flow.
134+
DataFlow::localFlowStep(nodeFrom, nodeTo)
135+
or
136+
// Taint can flow through expressions that alter the value but preserve
137+
// more than one bit of it _or_ expressions that follow data through
138+
// pointer indirections.
139+
nodeTo.getAnOperand().getDefinitionInstruction() = nodeFrom and
140+
(
141+
nodeTo instanceof ArithmeticInstruction
142+
or
143+
nodeTo instanceof BitwiseInstruction
144+
or
145+
nodeTo instanceof PointerArithmeticInstruction
146+
or
147+
nodeTo instanceof FieldAddressInstruction
148+
)
149+
or
150+
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
151+
}
152+
153+
/**
154+
* Holds if taint may propagate from `source` to `sink` in zero or more local
155+
* (intra-procedural) steps.
156+
*/
157+
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
158+
}

cpp/ql/src/semmle/code/cpp/ir/implementation/Opcode.qll

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,18 @@ abstract class PointerArithmeticOpcode extends BinaryOpcode {}
8585

8686
abstract class PointerOffsetOpcode extends PointerArithmeticOpcode {}
8787

88+
abstract class ArithmeticOpcode extends Opcode {}
89+
90+
abstract class BinaryArithmeticOpcode extends BinaryOpcode, ArithmeticOpcode {}
91+
92+
abstract class UnaryArithmeticOpcode extends UnaryOpcode, ArithmeticOpcode {}
93+
94+
abstract class BitwiseOpcode extends Opcode {}
95+
96+
abstract class BinaryBitwiseOpcode extends BinaryOpcode, BitwiseOpcode {}
97+
98+
abstract class UnaryBitwiseOpcode extends UnaryOpcode, BitwiseOpcode {}
99+
88100
abstract class CompareOpcode extends BinaryOpcode {}
89101

90102
abstract class RelationalOpcode extends CompareOpcode {}
@@ -143,18 +155,18 @@ module Opcode {
143155
class CopyValue extends UnaryOpcode, CopyOpcode, TCopyValue { override final string toString() { result = "CopyValue" } }
144156
class Load extends CopyOpcode, OpcodeWithLoad, TLoad { override final string toString() { result = "Load" } }
145157
class Store extends CopyOpcode, MemoryAccessOpcode, TStore { override final string toString() { result = "Store" } }
146-
class Add extends BinaryOpcode, TAdd { override final string toString() { result = "Add" } }
147-
class Sub extends BinaryOpcode, TSub { override final string toString() { result = "Sub" } }
148-
class Mul extends BinaryOpcode, TMul { override final string toString() { result = "Mul" } }
149-
class Div extends BinaryOpcode, TDiv { override final string toString() { result = "Div" } }
150-
class Rem extends BinaryOpcode, TRem { override final string toString() { result = "Rem" } }
151-
class Negate extends UnaryOpcode, TNegate { override final string toString() { result = "Negate" } }
152-
class ShiftLeft extends BinaryOpcode, TShiftLeft { override final string toString() { result = "ShiftLeft" } }
153-
class ShiftRight extends BinaryOpcode, TShiftRight { override final string toString() { result = "ShiftRight" } }
154-
class BitAnd extends BinaryOpcode, TBitAnd { override final string toString() { result = "BitAnd" } }
155-
class BitOr extends BinaryOpcode, TBitOr { override final string toString() { result = "BitOr" } }
156-
class BitXor extends BinaryOpcode, TBitXor { override final string toString() { result = "BitXor" } }
157-
class BitComplement extends UnaryOpcode, TBitComplement { override final string toString() { result = "BitComplement" } }
158+
class Add extends BinaryArithmeticOpcode, TAdd { override final string toString() { result = "Add" } }
159+
class Sub extends BinaryArithmeticOpcode, TSub { override final string toString() { result = "Sub" } }
160+
class Mul extends BinaryArithmeticOpcode, TMul { override final string toString() { result = "Mul" } }
161+
class Div extends BinaryArithmeticOpcode, TDiv { override final string toString() { result = "Div" } }
162+
class Rem extends BinaryArithmeticOpcode, TRem { override final string toString() { result = "Rem" } }
163+
class Negate extends UnaryArithmeticOpcode, TNegate { override final string toString() { result = "Negate" } }
164+
class ShiftLeft extends BinaryBitwiseOpcode, TShiftLeft { override final string toString() { result = "ShiftLeft" } }
165+
class ShiftRight extends BinaryBitwiseOpcode, TShiftRight { override final string toString() { result = "ShiftRight" } }
166+
class BitAnd extends BinaryBitwiseOpcode, TBitAnd { override final string toString() { result = "BitAnd" } }
167+
class BitOr extends BinaryBitwiseOpcode, TBitOr { override final string toString() { result = "BitOr" } }
168+
class BitXor extends BinaryBitwiseOpcode, TBitXor { override final string toString() { result = "BitXor" } }
169+
class BitComplement extends UnaryBitwiseOpcode, TBitComplement { override final string toString() { result = "BitComplement" } }
158170
class LogicalNot extends UnaryOpcode, TLogicalNot { override final string toString() { result = "LogicalNot" } }
159171
class CompareEQ extends CompareOpcode, TCompareEQ { override final string toString() { result = "CompareEQ" } }
160172
class CompareNE extends CompareOpcode, TCompareNE { override final string toString() { result = "CompareNE" } }

cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -898,67 +898,87 @@ class BinaryInstruction extends Instruction {
898898
}
899899
}
900900

901-
class AddInstruction extends BinaryInstruction {
901+
class ArithmeticInstruction extends Instruction {
902+
ArithmeticInstruction() {
903+
getOpcode() instanceof ArithmeticOpcode
904+
}
905+
}
906+
907+
class BinaryArithmeticInstruction extends ArithmeticInstruction, BinaryInstruction {}
908+
909+
class UnaryArithmeticInstruction extends ArithmeticInstruction, UnaryInstruction {}
910+
911+
class AddInstruction extends BinaryArithmeticInstruction {
902912
AddInstruction() {
903913
getOpcode() instanceof Opcode::Add
904914
}
905915
}
906916

907-
class SubInstruction extends BinaryInstruction {
917+
class SubInstruction extends BinaryArithmeticInstruction {
908918
SubInstruction() {
909919
getOpcode() instanceof Opcode::Sub
910920
}
911921
}
912922

913-
class MulInstruction extends BinaryInstruction {
923+
class MulInstruction extends BinaryArithmeticInstruction {
914924
MulInstruction() {
915925
getOpcode() instanceof Opcode::Mul
916926
}
917927
}
918928

919-
class DivInstruction extends BinaryInstruction {
929+
class DivInstruction extends BinaryArithmeticInstruction {
920930
DivInstruction() {
921931
getOpcode() instanceof Opcode::Div
922932
}
923933
}
924934

925-
class RemInstruction extends BinaryInstruction {
935+
class RemInstruction extends BinaryArithmeticInstruction {
926936
RemInstruction() {
927937
getOpcode() instanceof Opcode::Rem
928938
}
929939
}
930940

931-
class NegateInstruction extends UnaryInstruction {
941+
class NegateInstruction extends UnaryArithmeticInstruction {
932942
NegateInstruction() {
933943
getOpcode() instanceof Opcode::Negate
934944
}
935945
}
936946

937-
class BitAndInstruction extends BinaryInstruction {
947+
class BitwiseInstruction extends Instruction {
948+
BitwiseInstruction() {
949+
getOpcode() instanceof BitwiseOpcode
950+
}
951+
}
952+
953+
class BinaryBitwiseInstruction extends BitwiseInstruction, BinaryInstruction {}
954+
955+
class UnaryBitwiseInstruction extends BitwiseInstruction, UnaryInstruction {}
956+
957+
class BitAndInstruction extends BinaryBitwiseInstruction {
938958
BitAndInstruction() {
939959
getOpcode() instanceof Opcode::BitAnd
940960
}
941961
}
942962

943-
class BitOrInstruction extends BinaryInstruction {
963+
class BitOrInstruction extends BinaryBitwiseInstruction {
944964
BitOrInstruction() {
945965
getOpcode() instanceof Opcode::BitOr
946966
}
947967
}
948968

949-
class BitXorInstruction extends BinaryInstruction {
969+
class BitXorInstruction extends BinaryBitwiseInstruction {
950970
BitXorInstruction() {
951971
getOpcode() instanceof Opcode::BitXor
952972
}
953973
}
954974

955-
class ShiftLeftInstruction extends BinaryInstruction {
975+
class ShiftLeftInstruction extends BinaryBitwiseInstruction {
956976
ShiftLeftInstruction() {
957977
getOpcode() instanceof Opcode::ShiftLeft
958978
}
959979
}
960980

961-
class ShiftRightInstruction extends BinaryInstruction {
981+
class ShiftRightInstruction extends BinaryBitwiseInstruction {
962982
ShiftRightInstruction() {
963983
getOpcode() instanceof Opcode::ShiftRight
964984
}
@@ -1097,7 +1117,7 @@ class ConvertToDerivedInstruction extends InheritanceConversionInstruction {
10971117
}
10981118
}
10991119

1100-
class BitComplementInstruction extends UnaryInstruction {
1120+
class BitComplementInstruction extends UnaryBitwiseInstruction {
11011121
BitComplementInstruction() {
11021122
getOpcode() instanceof Opcode::BitComplement
11031123
}

0 commit comments

Comments
 (0)