@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
9393 * Holds if this root term is relevant to the ReDoS analysis.
9494 */
9595 predicate isRelevant ( ) {
96- // there is at least one repetition
97- getRoot ( any ( InfiniteRepetitionQuantifier q ) ) = this and
9896 // is actually used as a RegExp
9997 this .isUsedAsRegExp ( ) and
10098 // not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
877875 */
878876signature predicate isCandidateSig ( State state , string pump ) ;
879877
878+ /**
879+ * Holds if `state` is a candidate for ReDoS.
880+ */
881+ signature predicate isCandidateSig ( State state ) ;
882+
883+ /**
884+ * Predicates for constructing a prefix string that leads to a given state.
885+ */
886+ module PrefixConstruction< isCandidateSig / 1 isCandidate> {
887+ /**
888+ * Holds if `state` is the textually last start state for the regular expression.
889+ */
890+ private predicate lastStartState ( State state ) {
891+ exists ( RegExpRoot root |
892+ state =
893+ max ( State s , Location l |
894+ s = stateInRelevantRegexp ( ) and
895+ isStartState ( s ) and
896+ getRoot ( s .getRepr ( ) ) = root and
897+ l = s .getRepr ( ) .getLocation ( )
898+ |
899+ s
900+ order by
901+ l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
902+ l .getEndLine ( )
903+ )
904+ )
905+ }
906+
907+ /**
908+ * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
909+ */
910+ private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
911+
912+ /**
913+ * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
914+ */
915+ int prefixLength ( State start , State state ) =
916+ shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
917+
918+ /**
919+ * Gets the minimum number of transitions it takes to reach `state` from the start state.
920+ */
921+ private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
922+
923+ /**
924+ * Gets a string for which the regular expression will reach `state`.
925+ *
926+ * Has at most one result for any given `state`.
927+ * This predicate will not always have a result even if there is a ReDoS issue in
928+ * the regular expression.
929+ */
930+ string prefix ( State state ) {
931+ lastStartState ( state ) and
932+ result = ""
933+ or
934+ // the search stops past the last redos candidate state.
935+ lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isCandidate ( s ) ) ) ) and
936+ exists ( State prev |
937+ // select a unique predecessor (by an arbitrary measure)
938+ prev =
939+ min ( State s , Location loc |
940+ lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
941+ loc = s .getRepr ( ) .getLocation ( ) and
942+ delta ( s , _, state )
943+ |
944+ s
945+ order by
946+ loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
947+ s .getRepr ( ) .toString ( )
948+ )
949+ |
950+ // greedy search for the shortest prefix
951+ result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
952+ or
953+ not delta ( prev , Epsilon ( ) , state ) and
954+ result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
955+ )
956+ }
957+
958+ /**
959+ * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
960+ */
961+ private string getCanonicalEdgeChar ( State prev , State next ) {
962+ result =
963+ min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
964+ }
965+
966+ /** Gets a state within a regular expression that contains a candidate state. */
967+ pragma [ noinline]
968+ State stateInRelevantRegexp ( ) {
969+ exists ( State s | isCandidate ( s ) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
970+ }
971+ }
972+
880973/**
881974 * A module for pruning candidate ReDoS states.
882975 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
9101003 /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
9111004 private State acceptsAnySuffix ( ) { epsilonSucc * ( result ) = AcceptAnySuffix ( _) }
9121005
913- /**
914- * Predicates for constructing a prefix string that leads to a given state.
915- */
916- private module PrefixConstruction {
917- /**
918- * Holds if `state` is the textually last start state for the regular expression.
919- */
920- private predicate lastStartState ( State state ) {
921- exists ( RegExpRoot root |
922- state =
923- max ( State s , Location l |
924- s = stateInPumpableRegexp ( ) and
925- isStartState ( s ) and
926- getRoot ( s .getRepr ( ) ) = root and
927- l = s .getRepr ( ) .getLocation ( )
928- |
929- s
930- order by
931- l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
932- l .getEndLine ( )
933- )
934- )
935- }
936-
937- /**
938- * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
939- */
940- private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
941-
942- /**
943- * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
944- */
945- int prefixLength ( State start , State state ) =
946- shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
947-
948- /**
949- * Gets the minimum number of transitions it takes to reach `state` from the start state.
950- */
951- private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
952-
953- /**
954- * Gets a string for which the regular expression will reach `state`.
955- *
956- * Has at most one result for any given `state`.
957- * This predicate will not always have a result even if there is a ReDoS issue in
958- * the regular expression.
959- */
960- string prefix ( State state ) {
961- lastStartState ( state ) and
962- result = ""
963- or
964- // the search stops past the last redos candidate state.
965- lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isReDoSCandidate ( s , _) ) ) ) and
966- exists ( State prev |
967- // select a unique predecessor (by an arbitrary measure)
968- prev =
969- min ( State s , Location loc |
970- lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
971- loc = s .getRepr ( ) .getLocation ( ) and
972- delta ( s , _, state )
973- |
974- s
975- order by
976- loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
977- s .getRepr ( ) .toString ( )
978- )
979- |
980- // greedy search for the shortest prefix
981- result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
982- or
983- not delta ( prev , Epsilon ( ) , state ) and
984- result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
985- )
986- }
1006+ predicate isCandidateState ( State s ) { isReDoSCandidate ( s , _) }
9871007
988- /**
989- * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
990- */
991- private string getCanonicalEdgeChar ( State prev , State next ) {
992- result =
993- min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
994- }
995-
996- /** Gets a state within a regular expression that has a pumpable state. */
997- pragma [ noinline]
998- State stateInPumpableRegexp ( ) {
999- exists ( State s | isReDoSCandidate ( s , _) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
1000- }
1001- }
1008+ import PrefixConstruction< isCandidateState / 1 > as Prefix
10021009
10031010 /**
10041011 * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10181025 * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
10191026 */
10201027 private module SuffixConstruction {
1021- import PrefixConstruction
1022-
10231028 /**
10241029 * Holds if all states reachable from `fork` by repeating `w`
10251030 * are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10361041 */
10371042 pragma [ noinline]
10381043 private predicate isLikelyRejectable ( State s ) {
1039- s = stateInPumpableRegexp ( ) and
1044+ s = Prefix :: stateInRelevantRegexp ( ) and
10401045 (
10411046 // exists a reject edge with some char.
10421047 hasRejectEdge ( s )
@@ -1052,15 +1057,15 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10521057 * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
10531058 */
10541059 predicate isRejectState ( State s ) {
1055- s = stateInPumpableRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
1060+ s = Prefix :: stateInRelevantRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
10561061 }
10571062
10581063 /**
10591064 * Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
10601065 */
10611066 pragma [ noopt]
10621067 predicate hasEdgeToLikelyRejectable ( State s ) {
1063- s = stateInPumpableRegexp ( ) and
1068+ s = Prefix :: stateInRelevantRegexp ( ) and
10641069 // all edges (at least one) with some char leads to another state that is rejectable.
10651070 // the `next` states might not share a common suffix, which can cause FPs.
10661071 exists ( string char | char = hasEdgeToLikelyRejectableHelper ( s ) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10761081 */
10771082 pragma [ noinline]
10781083 private string hasEdgeToLikelyRejectableHelper ( State s ) {
1079- s = stateInPumpableRegexp ( ) and
1084+ s = Prefix :: stateInRelevantRegexp ( ) and
10801085 not hasRejectEdge ( s ) and
10811086 not isRejectState ( s ) and
10821087 deltaClosedChar ( s , result , _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10881093 * `prev` to `next` that the character symbol `char`.
10891094 */
10901095 predicate deltaClosedChar ( State prev , string char , State next ) {
1091- prev = stateInPumpableRegexp ( ) and
1092- next = stateInPumpableRegexp ( ) and
1096+ prev = Prefix :: stateInRelevantRegexp ( ) and
1097+ next = Prefix :: stateInRelevantRegexp ( ) and
10931098 deltaClosed ( prev , getAnInputSymbolMatchingRelevant ( char ) , next )
10941099 }
10951100
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10991104 result = getAnInputSymbolMatching ( char )
11001105 }
11011106
1107+ pragma [ noinline]
1108+ RegExpRoot relevantRoot ( ) {
1109+ exists ( RegExpTerm term , State s |
1110+ s .getRepr ( ) = term and isCandidateState ( s ) and result = term .getRootTerm ( )
1111+ )
1112+ }
1113+
11021114 /**
11031115 * Gets a char used for finding possible suffixes inside `root`.
11041116 */
11051117 pragma [ noinline]
11061118 private string relevant ( RegExpRoot root ) {
1107- exists ( ascii ( result ) ) and exists ( root )
1108- or
1109- exists ( InputSymbol s | belongsTo ( s , root ) | result = intersect ( s , _) )
1110- or
1111- // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1112- // The three chars must be kept in sync with `hasSimpleRejectEdge`.
1113- result = [ "|" , "\n" , "Z" ] and exists ( root )
1119+ root = relevantRoot ( ) and
1120+ (
1121+ exists ( ascii ( result ) ) and exists ( root )
1122+ or
1123+ exists ( InputSymbol s | belongsTo ( s , root ) | result = intersect ( s , _) )
1124+ or
1125+ // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
1126+ // The three chars must be kept in sync with `hasSimpleRejectEdge`.
1127+ result = [ "|" , "\n" , "Z" ] and exists ( root )
1128+ )
11141129 }
11151130
11161131 /**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
12081223 predicate hasReDoSResult ( RegExpTerm t , string pump , State s , string prefixMsg ) {
12091224 isReDoSAttackable ( t , pump , s ) and
12101225 (
1211- prefixMsg = "starting with '" + escape ( PrefixConstruction :: prefix ( s ) ) + "' and " and
1212- not PrefixConstruction :: prefix ( s ) = ""
1226+ prefixMsg = "starting with '" + escape ( Prefix :: prefix ( s ) ) + "' and " and
1227+ not Prefix :: prefix ( s ) = ""
12131228 or
1214- PrefixConstruction :: prefix ( s ) = "" and prefixMsg = ""
1229+ Prefix :: prefix ( s ) = "" and prefixMsg = ""
12151230 or
1216- not exists ( PrefixConstruction :: prefix ( s ) ) and prefixMsg = ""
1231+ not exists ( Prefix :: prefix ( s ) ) and prefixMsg = ""
12171232 )
12181233 }
12191234
0 commit comments