|
8 | 8 | import sys |
9 | 9 | import threading |
10 | 10 | import time |
| 11 | +from collections import namedtuple |
| 12 | + |
| 13 | +try: |
| 14 | + # Python 3 |
| 15 | + from urllib.parse import parse_qs |
| 16 | + from urllib.parse import unquote |
| 17 | + from urllib.parse import urlencode |
| 18 | + from urllib.parse import urlsplit |
| 19 | + from urllib.parse import urlunsplit |
| 20 | + |
| 21 | +except ImportError: |
| 22 | + # Python 2 |
| 23 | + from cgi import parse_qs # type: ignore |
| 24 | + from urllib import unquote # type: ignore |
| 25 | + from urllib import urlencode # type: ignore |
| 26 | + from urlparse import urlsplit # type: ignore |
| 27 | + from urlparse import urlunsplit # type: ignore |
| 28 | + |
| 29 | + |
11 | 30 | from datetime import datetime |
12 | 31 | from functools import partial |
13 | 32 |
|
|
43 | 62 |
|
44 | 63 | epoch = datetime(1970, 1, 1) |
45 | 64 |
|
46 | | - |
47 | 65 | # The logger is created here but initialized in the debug support module |
48 | 66 | logger = logging.getLogger("sentry_sdk.errors") |
49 | 67 |
|
50 | 68 | MAX_STRING_LENGTH = 1024 |
51 | 69 | BASE64_ALPHABET = re.compile(r"^[a-zA-Z0-9/+=]*$") |
52 | 70 |
|
| 71 | +SENSITIVE_DATA_SUBSTITUTE = "[Filtered]" |
| 72 | + |
53 | 73 |
|
54 | 74 | def json_dumps(data): |
55 | 75 | # type: (Any) -> bytes |
@@ -374,8 +394,6 @@ def removed_because_over_size_limit(cls): |
374 | 394 | def substituted_because_contains_sensitive_data(cls): |
375 | 395 | # type: () -> AnnotatedValue |
376 | 396 | """The actual value was removed because it contained sensitive information.""" |
377 | | - from sentry_sdk.consts import SENSITIVE_DATA_SUBSTITUTE |
378 | | - |
379 | 397 | return AnnotatedValue( |
380 | 398 | value=SENSITIVE_DATA_SUBSTITUTE, |
381 | 399 | metadata={ |
@@ -1163,6 +1181,79 @@ def from_base64(base64_string): |
1163 | 1181 | return utf8_string |
1164 | 1182 |
|
1165 | 1183 |
|
| 1184 | +Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"]) |
| 1185 | + |
| 1186 | + |
| 1187 | +def sanitize_url(url, remove_authority=True, remove_query_values=True): |
| 1188 | + # type: (str, bool, bool) -> str |
| 1189 | + """ |
| 1190 | + Removes the authority and query parameter values from a given URL. |
| 1191 | + """ |
| 1192 | + parsed_url = urlsplit(url) |
| 1193 | + query_params = parse_qs(parsed_url.query, keep_blank_values=True) |
| 1194 | + |
| 1195 | + # strip username:password (netloc can be usr:pwd@example.com) |
| 1196 | + if remove_authority: |
| 1197 | + netloc_parts = parsed_url.netloc.split("@") |
| 1198 | + if len(netloc_parts) > 1: |
| 1199 | + netloc = "%s:%s@%s" % ( |
| 1200 | + SENSITIVE_DATA_SUBSTITUTE, |
| 1201 | + SENSITIVE_DATA_SUBSTITUTE, |
| 1202 | + netloc_parts[-1], |
| 1203 | + ) |
| 1204 | + else: |
| 1205 | + netloc = parsed_url.netloc |
| 1206 | + else: |
| 1207 | + netloc = parsed_url.netloc |
| 1208 | + |
| 1209 | + # strip values from query string |
| 1210 | + if remove_query_values: |
| 1211 | + query_string = unquote( |
| 1212 | + urlencode({key: SENSITIVE_DATA_SUBSTITUTE for key in query_params}) |
| 1213 | + ) |
| 1214 | + else: |
| 1215 | + query_string = parsed_url.query |
| 1216 | + |
| 1217 | + safe_url = urlunsplit( |
| 1218 | + Components( |
| 1219 | + scheme=parsed_url.scheme, |
| 1220 | + netloc=netloc, |
| 1221 | + query=query_string, |
| 1222 | + path=parsed_url.path, |
| 1223 | + fragment=parsed_url.fragment, |
| 1224 | + ) |
| 1225 | + ) |
| 1226 | + |
| 1227 | + return safe_url |
| 1228 | + |
| 1229 | + |
| 1230 | +ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"]) |
| 1231 | + |
| 1232 | + |
| 1233 | +def parse_url(url, sanitize=True): |
| 1234 | + |
| 1235 | + # type: (str, bool) -> ParsedUrl |
| 1236 | + """ |
| 1237 | + Splits a URL into a url (including path), query and fragment. If sanitize is True, the query |
| 1238 | + parameters will be sanitized to remove sensitive data. The autority (username and password) |
| 1239 | + in the URL will always be removed. |
| 1240 | + """ |
| 1241 | + url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize) |
| 1242 | + |
| 1243 | + parsed_url = urlsplit(url) |
| 1244 | + base_url = urlunsplit( |
| 1245 | + Components( |
| 1246 | + scheme=parsed_url.scheme, |
| 1247 | + netloc=parsed_url.netloc, |
| 1248 | + query="", |
| 1249 | + path=parsed_url.path, |
| 1250 | + fragment="", |
| 1251 | + ) |
| 1252 | + ) |
| 1253 | + |
| 1254 | + return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment) |
| 1255 | + |
| 1256 | + |
1166 | 1257 | if PY37: |
1167 | 1258 |
|
1168 | 1259 | def nanosecond_time(): |
|
0 commit comments