forked from OpenExoplanetCatalogue/oec_meta
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate.python
More file actions
executable file
·152 lines (124 loc) · 4.87 KB
/
generate.python
File metadata and controls
executable file
·152 lines (124 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/python
import xml.etree.ElementTree as ET, glob, os, sys, re, csv
import subprocess
ansi_escape = re.compile(r'\\x1b[^\\r]*')
from datetime import date
# Nicely indents the XML output
def indent(elem, level=0):
i = "\n" + level*"\t"
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
# Loop over all files and create new data
totalcommits = 0
totalplanets = 0
totalsystems = 0
totalbinaries = 0
discoveryyear= {}
for i in xrange(1992,date.today().year+1):
discoveryyear[i] = 0
totalconfirmedsystems = 0
totalconfirmedplanets = 0
totalcontributors = []
totalcontributorsemail = []
aliases = []
for filename in glob.glob("open_exoplanet_catalogue/systems/*.xml"):
metafilename = '/'.join(filename.split("/")[1:])
f = open(filename, 'rt')
root = ET.parse(f).getroot()
f.close()
metaroot = ET.Element("system")
systemname = root.findtext("./name").encode('utf-8')
ET.SubElement(metaroot,"name").text = systemname
contributors = os.popen("cd open_exoplanet_catalogue && git log '"+metafilename+"' | grep '^Author:' | sort | uniq -c -i ").readlines()
sys.stdout.write('.')
sys.stdout.flush()
cstag = ET.SubElement(metaroot,"contributors")
for contributor in contributors:
if len(contributor.strip())<1:
continue
rows= [contributor[0:7], contributor[16:].strip()]
commits = int(rows[0])
email = re.search('<(.*)>', rows[1]).group(1)
name = re.search('(.*) <', rows[1]).group(1).title()
ctag = ET.SubElement(cstag,"contributor")
ctag.text = unicode(name,'utf-8')
ctag.attrib['email'] = email
ctag.attrib['commits'] = "%d"% commits
totalcommits += commits
if not email in totalcontributorsemail:
totalcontributors.append(name)
totalcontributorsemail.append(email)
links = os.popen("cd open_exoplanet_catalogue && git --no-pager log '"+metafilename+"' | grep -oE '\\b(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'").readlines()
if len(links)>0:
links = set(links)
#print "Links: %d" % len(links),
astag = ET.SubElement(metaroot,"links")
for link in links:
atag = ET.SubElement(astag,"link")
atag.text = unicode(link.strip(),'utf-8')
names = root.findall("./name")
for name in names:
aliases.append([name.text.encode('utf-8'),systemname,"system",names[0].text.encode('utf-8')])
stars = root.findall(".//star")
for star in stars:
names = star.findall("./name")
for name in names:
aliases.append([name.text.encode('utf-8'),systemname,"star",names[0].text.encode('utf-8')])
#print ""
confirmedsystem = 0
planets = root.findall(".//planet")
for planet in planets:
names = planet.findall("./name")
for name in names:
aliases.append([name.text.encode('utf-8'),systemname,"planet",names[0].text.encode('utf-8')])
lists = planet.findall(".//list")
totalplanets += 1
for l in lists:
if "Confirmed planets" in l.text:
totalconfirmedplanets += 1
confirmedsystem = 1
datet = int(planet.findtext("./discoveryyear"))
if datet in discoveryyear:
discoveryyear[datet] += 1
if confirmedsystem==1:
totalconfirmedsystems += 1
totalsystems +=1
binaries = root.findall(".//binary")
if binaries:
totalbinaries += 1
indent(metaroot)
ET.ElementTree(metaroot).write(metafilename)
#if totalsystems >3: break
print ""
statroot = ET.Element("statistiscs")
ET.SubElement(statroot,"commits").text = "%d" % totalcommits
contributors = ET.SubElement(statroot,"contributors")
for c in totalcontributors:
ET.SubElement(contributors,"contributor").text = unicode(c,'utf-8')
contributors.attrib["num"] = "%d" % len(totalcontributors)
ET.SubElement(statroot,"planets").text = "%d" % totalplanets
ET.SubElement(statroot,"systems").text = "%d" % totalsystems
ET.SubElement(statroot,"binaries").text = "%d" % totalbinaries
dytag = ET.SubElement(statroot,"discoveryyear")
for i in xrange(1992,date.today().year+1):
ET.SubElement(dytag,"y%d"%i).text = "%d" % discoveryyear[i]
ET.SubElement(statroot,"confirmedplanets").text = "%d" % totalconfirmedplanets
ET.SubElement(statroot,"confirmedsystems").text = "%d" % totalconfirmedsystems
lastupdate = os.popen("cd open_exoplanet_catalogue && git log -1 --date=short --format=%ad").readlines()[0].strip()
ET.SubElement(statroot,"lastupdate").text = lastupdate
ET.SubElement(statroot,"lastcommittimestamp").text = os.popen("cd open_exoplanet_catalogue && git log -1 --pretty=format:%ct").readlines()[0].strip()
indent(statroot)
ET.ElementTree(statroot).write("statistics.xml")
with open('aliases.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
spamwriter.writerows(aliases)