-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathiaCollectionCSVer.py
More file actions
54 lines (48 loc) · 2.08 KB
/
iaCollectionCSVer.py
File metadata and controls
54 lines (48 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import internetarchive
import json
import sys
import csv
argv = sys.argv
outfile = argv[4]
csv_out = csv.writer(open(outfile, 'w'), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
def main(argv, csv_out):
search_string = argv[1]
collection = argv[2]
new_collection = argv[3]
search_collection = internetarchive.search_items('collection:' + collection)
getMetadata(search_string, collection, new_collection, outfile, csv_out, search_collection)
def searchAndWrite(csv_out, search_string, item_identifier, collection, new_collection, title, description,subjects):
if any([search_string in title]):
addLineForNewCollection(item_identifier, collection, new_collection)
elif any([search_string in description]):
addLineForNewCollection(item_identifier, collection, new_collection)
else:
if type(subjects) is list:
for subject in subjects:
if any([search_string in subject]):
addLineForNewCollection(item_identifier, collection, new_collection)
else:
if any([search_string in subjects]):
addLineForNewCollection(item_identifier,collection,new_collection)
def getMetadata(search_string, collection, new_collection, outfile, csv_out, search_collection):
print str(search_collection.num_found) + " items in collection"
for result in search_collection:
item_identifier = result['identifier']
item = internetarchive.get_item(item_identifier)
metadata = item.item_metadata['metadata']
print "Downloading " + item_identifier + " ..."
if 'title' in metadata:
title = metadata['title']
if 'subject' in metadata:
subjects = metadata['subject']
if 'description' in metadata:
description = metadata['description']
searchAndWrite(csv_out, search_string, item_identifier, collection, new_collection, title, description, subjects)
def addLineForNewCollection(item_identifier, collection, new_collection):
rowdata = ''
print('Added ' + item_identifier + ' to CSV')
rowdata = [item_identifier]
rowdata.append(collection)
rowdata.append(new_collection)
csv_out.writerow(rowdata)
main(argv, csv_out)