diff --git a/tap_github.py b/tap_github.py index 8bff0a3c..b2339e56 100644 --- a/tap_github.py +++ b/tap_github.py @@ -24,7 +24,8 @@ 'stargazers': ['user_id'], 'releases': ['id'], 'reviews': ['id'], - 'review_comments': ['id'] + 'review_comments': ['id'], + 'tags': ['node_id'] } class AuthException(Exception): @@ -185,6 +186,28 @@ def do_discover(): # dump catalog print(json.dumps(catalog, indent=2)) +def get_all_tags(schemas, repo_path, state, mdata): + # The volume of tags can safely be considered low + + with metrics.record_counter('tags') as counter: + for response in authed_get_all_pages( + 'tags', + 'https://api.github.com/repos/{}/tags?sort=node_id&direction=desc'.format(repo_path) + ): + tags = response.json() + extraction_time = singer.utils.now() + for t in tags: + t['_sdc_repository'] = repo_path + + # transform and write release record + with singer.Transformer() as transformer: + rec = transformer.transform(t, schemas, metadata=metadata.to_map(mdata)) + singer.write_record('tags', rec, time_extracted=extraction_time) + singer.write_bookmark(state, repo_path, 'tags', {'since': singer.utils.strftime(extraction_time)}) + counter.increment() + + return state + def get_all_releases(schemas, repo_path, state, mdata): # Releases doesn't seem to have an `updated_at` property, yet can be edited. # For this reason and since the volume of release can safely be considered low, @@ -485,7 +508,8 @@ def get_stream_from_catalog(stream_id, catalog): 'collaborators': get_all_collaborators, 'pull_requests': get_all_pull_requests, 'releases': get_all_releases, - 'stargazers': get_all_stargazers + 'stargazers': get_all_stargazers, + 'tags': get_all_tags, } SUB_STREAMS = { diff --git a/tap_github/tags.json b/tap_github/tags.json new file mode 100644 index 00000000..157d3518 --- /dev/null +++ b/tap_github/tags.json @@ -0,0 +1,33 @@ +{ + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "name": { + "type": ["null", "string"] + }, + "zipball_url": { + "type": ["null", "string"] + }, + "tarball_url": { + "type": ["null", "string"] + }, + "commit": { + "type": ["null", "object"], + "additionalProperties": false, + "properties": { + "sha": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + } + } + }, + "node_id": { + "type": ["null", "string"] + }, + "_sdc_repository": { + "type": ["string"] + } + } +}