-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmanipulating_data.py
More file actions
122 lines (88 loc) · 3.96 KB
/
manipulating_data.py
File metadata and controls
122 lines (88 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
from __future__ import division
import csv
import dateutil.parser
import cleandata
from pprint import pprint
from collections import defaultdict
def picker(field_name):
'''returns a function that picks a field out of a dict'''
return lambda row: row[field_name]
def pluck(field_name, rows):
'''turn a list of dicts into the list of field_name values'''
return map(picker(field_name), rows)
def group_by(grouper, rows, value_transform=None):
# key is output of grouper, value is list of rows
grouped = defaultdict(list)
for row in rows:
grouped[grouper(row)].append(row)
if value_transform is None:
return grouped
else:
return { key : value_transform(rows)
for key, rows in grouped.iteritems() }
def percent_price_change(yesterday, today):
return today['closing_price'] / yesterday['closing_price'] - 1
def day_over_day_changes(grouped_rows):
# sort the rows by date
ordered = sorted(grouped_rows, key=picker('date'))
# zip with an offset to get pairs of consecutive days
return [{ 'symbol' : today['symbol'],
'date' : today['date'],
'change' : percent_price_change(yesterday, today)}
for yesterday, today in zip(ordered, ordered[1:])]
# to combine percent changes, we add 1 to each, multiply them, and subtract 1
# for instance, if we combine +10% and -20%, the overall change is
# (1 + 10%) * (1 - 20%) - 1 = 1.1 * .8 - 1 = -12%
def combine_pct_changes(pct_change1, pct_change2):
return (1 + pct_change1) * (1 + pct_change2) - 1
def overall_change(changes):
return reduce(combine_pct_changes, pluck("change", changes))
if __name__ == '__main__':
data = []
parser_dict = {'date': dateutil.parser.parse,
'closing_price': float}
with open('stocks.csv', 'rb') as f:
reader = csv.DictReader(f, delimiter="\t")
data = [cleandata.parse_dict(row, parser_dict) for row in reader]
max_aapl_price = max(row['closing_price']
for row in data
if row['symbol'] == 'AAPL')
print('AAPL Max Closing Price: %f' % max_aapl_price)
# group rows by symbol
by_symbol = defaultdict(list)
for row in data:
by_symbol[row['symbol']].append(row)
# use a dict comprehension to find the max for each symbol
max_price_by_symbol = { symbol : max(row['closing_price']
for row in grouped_rows)
for symbol, grouped_rows in by_symbol.iteritems() }
pprint(max_price_by_symbol)
# now find the max using grouop_by
max_price_by_symbol = group_by(picker('symbol'),
data,
lambda rows: max(pluck('closing_price', rows)))
pprint(max_price_by_symbol)
# find the largest and smallest one-day percent change
# key is symbol, value is list of "change" dicts
changes_by_symbol = group_by(picker('symbol'), data, day_over_day_changes)
# collect all "change" dicts into one big list
all_changes = [change
for changes in changes_by_symbol.values()
for change in changes]
print('max:')
pprint(max(all_changes, key=picker("change")))
# {'change': 0.3283582089552237,
# 'date': datetime.datetime(1997, 8, 6, 0, 0),
# 'symbol': 'AAPL'}
# see, e.g. http://news.cnet.com/2100-1001-202143.html
print('min:')
pprint(min(all_changes, key=picker("change")))
# {'change': -0.5193370165745856,
# 'date': datetime.datetime(2000, 9, 29, 0, 0),
# 'symbol': 'AAPL'}
# see, e.g. http://money.cnn.com/2000/09/29/markets/techwrap/
overall_change_by_month = group_by(lambda row: row['date'].month,
all_changes,
overall_change)
print('overall_change_by_month: %s' %overall_change_by_month)