Skip to content

Commit 47308f4

Browse files
authored
Merge pull request #359 from CoinStatsHQ/pr/aws-kinesis-support
Added support for AWS Kinesis
2 parents 956695b + f4403a7 commit 47308f4

File tree

4 files changed

+95
-0
lines changed

4 files changed

+95
-0
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# MIT License
2+
#
3+
# Copyright (c) 2022 CoinStats LLC
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
import json
24+
import typing as t
25+
import uuid
26+
from itertools import zip_longest
27+
28+
import boto3
29+
30+
_KINESIS_BATCH_LIMIT = 500
31+
32+
33+
def _uuid_partition_key(_: dict) -> str:
34+
return uuid.uuid4().hex
35+
36+
37+
class KinesisItemExporter:
38+
def __init__(
39+
self,
40+
stream_name: str,
41+
partition_key_callable: t.Callable[[dict], str] = _uuid_partition_key,
42+
):
43+
import boto3
44+
self._stream_name = stream_name
45+
self._partition_key_callable = partition_key_callable
46+
self._kinesis_client = None # initialized in .open
47+
48+
def open(self) -> None:
49+
self._kinesis_client = boto3.client('kinesis')
50+
51+
def export_items(self, items: t.Iterable[dict]) -> None:
52+
sentinel = object()
53+
chunks = zip_longest(
54+
*(iter(items),) * _KINESIS_BATCH_LIMIT,
55+
fillvalue=sentinel,
56+
)
57+
for chunk in chunks:
58+
self._kinesis_client.put_records(
59+
StreamName=self._stream_name,
60+
Records=[
61+
{
62+
'Data': _serialize_item(item),
63+
'PartitionKey': self._partition_key_callable(item),
64+
}
65+
for item in chunk
66+
if item is not sentinel
67+
],
68+
)
69+
70+
def export_item(self, item: dict) -> None:
71+
self._kinesis_client.put_record(
72+
StreamName=self._stream_name,
73+
Data=_serialize_item(item),
74+
PartitionKey=self._partition_key_callable(item),
75+
)
76+
77+
def close(self):
78+
pass
79+
80+
81+
def _serialize_item(item: dict) -> bytes:
82+
return json.dumps(item).encode()

ethereumetl/cli/stream.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
'or Postgres connection url e.g. postgresql+pg8000://postgres:admin@127.0.0.1:5432/ethereum; '
4343
'or GCS bucket e.g. gs://your-bucket-name; '
4444
'or kafka, output name and connection host:port e.g. kafka/127.0.0.1:9092 '
45+
'or Kinesis, e.g. kinesis://your-data-stream-name'
4546
'If not specified will print to console')
4647
@click.option('-s', '--start-block', default=None, show_default=True, type=int, help='Start block')
4748
@click.option('-e', '--entity-types', default=','.join(EntityType.ALL_FOR_INFURA), show_default=True, type=str,

ethereumetl/streaming/item_exporter_creator.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ def create_item_exporter(output):
5151
batch_max_latency=2,
5252
batch_max_messages=1000,
5353
enable_message_ordering=enable_message_ordering)
54+
elif item_exporter_type == ItemExporterType.KINESIS:
55+
from blockchainetl.jobs.exporters.kinesis_item_exporter import KinesisItemExporter
56+
item_exporter = KinesisItemExporter(
57+
stream_name=output[len('kinesis://'):],
58+
)
5459
elif item_exporter_type == ItemExporterType.POSTGRES:
5560
from blockchainetl.jobs.exporters.postgres_item_exporter import PostgresItemExporter
5661
from blockchainetl.streaming.postgres_utils import create_insert_statement_for_table
@@ -109,6 +114,8 @@ def get_bucket_and_path_from_gcs_output(output):
109114
def determine_item_exporter_type(output):
110115
if output is not None and output.startswith('projects'):
111116
return ItemExporterType.PUBSUB
117+
if output is not None and output.startswith('kinesis://'):
118+
return ItemExporterType.KINESIS
112119
if output is not None and output.startswith('kafka'):
113120
return ItemExporterType.KAFKA
114121
elif output is not None and output.startswith('postgresql'):
@@ -123,6 +130,7 @@ def determine_item_exporter_type(output):
123130

124131
class ItemExporterType:
125132
PUBSUB = 'pubsub'
133+
KINESIS = 'kinesis'
126134
POSTGRES = 'postgres'
127135
GCS = 'gcs'
128136
CONSOLE = 'console'

setup.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ def read(fname):
5555
# Later versions break the build in Travis CI for Python 3.7.2
5656
'grpcio==1.46.3'
5757
],
58+
'streaming-kinesis': [
59+
'boto3==1.24.11',
60+
'botocore==1.27.11',
61+
],
5862
'dev': [
5963
'pytest~=4.3.0'
6064
]

0 commit comments

Comments
 (0)