-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgatherMetaData.py
More file actions
39 lines (32 loc) · 913 Bytes
/
gatherMetaData.py
File metadata and controls
39 lines (32 loc) · 913 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re, pymongo, json
import requests
jsonDict = {}
url = 'https://api.bitbucket.org/2.0/repositories/?pagelen=100'
client = pymongo.MongoClient (host="da0.eecs.utk.edu")
# Get a reference to a particular database
db = client ['bitbucket']
# Reference a particular collection in the database
coll = db ['repos']
while True:
r = requests.get (url)
t = r.text
jsonDict = json.loads (t)
for prj in jsonDict ['values']:
coll .insert (prj)
if 'next' not in jsonDict: break
else: url = jsonDict['next']
#f = open ('divided')
#for l in f:
# ar = l .rstrip () .split(';')
# t = int (ar [0])
# n = ar[2]
# try:
# r = requests.get (url + n)
# if (r.ok):
# t = r.text
# jsonDict = json.loads (t)
# coll.insert (jsonDict)
# else:
# print l + '\n'
# except requests.exceptions.ConnectionError:
# sys.stderr.write('could not get ' + l + '\n')