-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathpytor.py
More file actions
192 lines (164 loc) · 6.8 KB
/
pytor.py
File metadata and controls
192 lines (164 loc) · 6.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import time
import random
import copy
import logging as log
from urllib.parse import urlparse
from stem import Signal
from stem.control import Controller
import requests
from requests.exceptions import ConnectionError
from behaviour.base import BehaviourInterface
from behaviour.noProxy import NoProxyBehaviour
from behaviour.tor import TorBehaviour
"""
REQUIREMENTS:
requests > 2.10.0
pysocks
"""
class Pytor(object):
"""
Static properties
"""
lastDomains = {}
def __init__(self, sessionEnabled=False, behaviour: BehaviourInterface = None):
self.showIp = False
self.silent = False
self.sessionEnabled = sessionEnabled
if sessionEnabled:
self.session = requests.Session()
self.behaviour = behaviour or NoProxyBehaviour()
self.maxRetry = 10
self.invalidStringList = []
self.minSourceLength = 150
self.encoding = '' # if is empty, request will guess the encoding
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'en-US,en;q=0.8,ro;q=0.6',
'Cache-Control': 'max-age=0',
# 'Connection':'keep-alive',
# 'upgrade-insecure-requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
self.useProxy = True
# Polite requests
self.politeRequest = False
self.politeRequestsBreak = 1 # number of seconds
def getDomain(self, url):
return urlparse(url).netloc
def getIp(self):
"""
There are multiple options for IP checking:
"https://api.ipify.org/"
"https://httpbin.org/ip"
"""
r = requests.get('https://httpbin.org/ip', proxies=self.behaviour.getProxy(), headers=self.headers)
return r.text
def displayIp(self):
if self.showIp:
try:
log.info('Public IP address ' + self.getIp())
except:
log.exception("Exception on procuring IP from API")
def checkValidity(self, response):
source = response.text
reasons = []
valid = True
if self.minSourceLength != 0 and len(source) < self.minSourceLength:
valid = False
reasons.append({
'type': 'min_source',
'message': 'source length is less than {} chars'.format(self.minSourceLength)
})
if 'privoxy' in source.lower():
valid = False
reasons.append({
'type': 'internal',
'message': '"privoxy" word was found in source. Please check privoxy'
})
if response.status_code == 403:
valid = False
reasons.append({
'type': 'status',
'message': '{} status code response'.format(response.status_code)
})
for invalidString in self.invalidStringList:
if invalidString.lower() in source.lower():
valid = False
reasons.append({
'type': 'keywords',
'message': 'keyword "{}" was found'.format(invalidString)
})
return valid, reasons
def waitForPoliteRequest(self, url):
if self.politeRequest:
domain = self.getDomain(url)
while domain in type(self).lastDomains and \
time.time() - type(self).lastDomains[domain] < self.politeRequestsBreak:
log.info("Let's take a break for domain: {}".format(domain))
time.sleep(self.politeRequestsBreak)
type(self).lastDomains[domain] = time.time()
def checkAndRetry(self, requestsMethod, url, **kwargs):
self.waitForPoliteRequest(url)
response = None
for retry in range(1, self.maxRetry+1):
try:
if self.useProxy:
proxy = self.behaviour.getProxy()
if proxy:
kwargs["proxies"] = self.behaviour.getProxy()
response = requestsMethod(url, **kwargs)
self.behaviour.countRequest()
# set encoding if exists
if self.encoding:
response.encoding = self.encoding
valid, reasons = self.checkValidity(response)
if valid:
break # success exit
else:
for reason in reasons:
log.warning('Retry {}/{} reason: {}'.format(retry, self.maxRetry, reason['message']))
if reason['type'] == 'internal':
time.sleep(6)
elif reason['type'] in ['min_source', 'keywords']:
self.behaviour.changeIdentity()
break
else:
self.behaviour.changeIdentity()
break
except ConnectionError as e:
log.exception('Connexion exception')
except Exception as e:
log.exception('Exception at request send')
return response
def __getattr__(self, name):
"""
Catch all requests methods GET, POST, PUT, DELETE.
name parameter contains one of the above method OR one class attribute
"""
try:
return object.__getattr__(self, name)
except AttributeError:
def handlerFunction(*args, **kwargs):
requestsMethod = getattr(self.session, name) if self.sessionEnabled else getattr(requests, name)
url = args[0]
# force encoding from requests
if 'encoding' in kwargs:
self.encoding = kwargs.pop('encoding')
defaultParams = {
"headers": self.headers,
"proxies": self.getProxy(),
"timeout": (60, 60) # connection and read timeout
}
params = copy.deepcopy(defaultParams)
params.update(kwargs)
# Send and validate initial request
return self.checkAndRetry(requestsMethod, url, **params)
return handlerFunction
pytor = Pytor(behaviour=TorBehaviour())
if __name__ == '__main__':
pytor.password = 'test'
print(pytor.post("http://httpbin.org/post", data={"test": "test"}).text, flush=True)
pytor.newId()
for i in range(10):
print(pytor.get("http://httpbin.org/get").text, flush=True)