-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgatherFw.py
More file actions
60 lines (54 loc) · 1.54 KB
/
gatherFw.py
File metadata and controls
60 lines (54 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys, re, pymongo, json
import requests
jsonDict = {}
client = pymongo.MongoClient (host="da0.eecs.utk.edu")
# Get a reference to a particular database
db = client ['bitbucket']
#do for forks, watchers, commits, clone, pullrequests
collName = 'followers'
if (len (sys .argv) > 1):
collName = sys .argv [1]
def chunks(l, n):
if n < 1: n = 1
return [l[i:i + n] for i in range(0, len(l), n)]
f = open (collName + '.todo')
coll1 = db [collName]
for n in f:
n = n .rstrip ()
url = "https://bitbucket.org/api/2.0/users/" + n + "/" + collName
url1 = url + "/?pagelen=100"
v = []
size = 0
while True:
try:
r = requests.get (url1)
t = r.text
jsonDict = json.loads (t)
if ('values' in jsonDict):
for el in jsonDict ['values']:
v. append (el)
size += sys .getsizeof (el)
if 'next' not in jsonDict:
print str (len (v)) + ';' + str (size) + ';' + url1
sys .stdout .flush ()
break
else: url1 = jsonDict['next']
print str (len (v)) + ';' + str (size) + ';' + url1
sys .stdout .flush ()
except Exception as e:
print "Could not get:"
print url
print e
sys .stdout .flush ()
break
if len (v) > 0:
# size may be bigger in bson, factor of 2 doesnot always suffice
if (size < 16777216/3):
coll1.insert ( { 'url': url, 'values': v } )
else:
s = size;
n = 3*s/16777216
i = 0
for ch in chunks (v, n):
coll1.insert ( { 'chunk': i, 'url': url, 'values': ch } )
i = i + 1