Skip to content

Commit b6a23b5

Browse files
author
liyan.90210
committed
feat auto update sdk
1 parent 90d9d65 commit b6a23b5

21 files changed

+557
-77
lines changed

Changelog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
### Change log
22

3+
2026-03-26 Bumped to version v1.0.219
4+
- Updated apis for tls/vod
5+
36
2026-03-19 Bumped to version v1.0.218
47
- Updated apis for content_security
58

volcengine/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# coding:utf-8
2-
VERSION='v1.0.218'
2+
VERSION='v1.0.219'

volcengine/tls/TLSService.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,15 @@ def put_logs_v2(self, request: PutLogsV2Request) -> PutLogsResponse:
448448
new_log = log_group.logs.add()
449449
new_log.time = v.time
450450
# 设置纳秒级时间戳(如果提供)
451-
if v.time_ns is not None:
451+
if v.time_ns is not None and hasattr(new_log, "TimeNs"):
452452
new_log.TimeNs = v.time_ns
453453
for key in v.log_dict.keys():
454454
log_content = new_log.contents.add()
455455
log_content.key = str(key)
456456
log_content.value = str(v.log_dict[key])
457457
put_logs_request = PutLogsRequest(request.topic_id, log_group_list,
458-
request.hash_key, request.compression, request.content_md5)
458+
request.hash_key, request.compression, request.content_md5,
459+
enable_nanosecond=request.enable_nanosecond)
459460
return self.put_logs(put_logs_request)
460461

461462
def describe_cursor(self, describe_cursor_request: DescribeCursorRequest) -> DescribeCursorResponse:

volcengine/tls/producer/batch_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def add_now(self, config: ProducerConfig, executor_service, client, memory_lock,
2020
if self.batch_log is not None:
2121
task = SendBatchTask(
2222
self.batch_log, config, memory_lock, client, retry_queue)
23-
executor_service.submit(task.run())
23+
executor_service.submit(task.run)
2424
self.batch_log = None
2525

2626
def remove_batch(self, batch_logs: ['BatchLog']) -> None:

volcengine/tls/producer/log_dispatcher.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import threading
2+
import time
23
from concurrent.futures import ThreadPoolExecutor
34
from typing import Dict
45

56
from volcengine.tls.TLSService import TLSService
6-
from volcengine.tls.log_pb2 import LogGroup
7+
from volcengine.tls.log_pb2 import LogGroup, LogGroupList
78
from volcengine.tls.producer.batch_manager import BatchManager
89
from volcengine.tls.producer.batch_semaphore import BatchSemaphore
910
from volcengine.tls.producer.producer_model import ProducerConfig, BatchLog, CallBack
@@ -104,8 +105,48 @@ def do_add(self, hash_key: str, topic_id: str, source: str, filename: str,
104105
raise TLSException(error_code="AddBatch Error",
105106
error_message="closed LogDispatcher cannot receive logs anymore")
106107

108+
log_count = 0
109+
earliest_log_time = None
110+
latest_log_time = None
111+
for log in log_group.logs:
112+
if log.time <= 0:
113+
now_ns = time.time_ns()
114+
log.time = int(now_ns // 1_000_000)
115+
if self.producer_config.enable_nanosecond and hasattr(log, "TimeNs"):
116+
try:
117+
if not log.HasField("TimeNs"):
118+
log.TimeNs = int(now_ns % 1_000_000)
119+
except ValueError:
120+
log.TimeNs = int(now_ns % 1_000_000)
121+
log_count += 1
122+
normalized_time = log.time
123+
if log.time < 1e10:
124+
normalized_time = log.time * 1000
125+
elif log.time < 1e15:
126+
normalized_time = log.time
127+
else:
128+
normalized_time = log.time // int(1e6)
129+
if earliest_log_time is None or normalized_time < earliest_log_time:
130+
earliest_log_time = normalized_time
131+
if latest_log_time is None or normalized_time > latest_log_time:
132+
latest_log_time = normalized_time
133+
107134
# 计算批次大小
108-
batch_size = len(log_group.SerializeToString())
135+
def _varint_len(x: int) -> int:
136+
if x < 0:
137+
return 10
138+
n = 1
139+
while x >= (1 << 7):
140+
n += 1
141+
x >>= 7
142+
return n
143+
144+
def _tag_len(field_number: int) -> int:
145+
return _varint_len((field_number << 3) | 2)
146+
147+
group_size = log_group.ByteSize()
148+
log_group_list_groups_field_number = LogGroupList.DESCRIPTOR.fields_by_name["log_groups"].number
149+
batch_size = _tag_len(log_group_list_groups_field_number) + _varint_len(group_size) + group_size
109150
self.producer_config.check_batch_size(batch_size)
110151

111152
# 获取内存锁
@@ -138,19 +179,21 @@ def do_add(self, hash_key: str, topic_id: str, source: str, filename: str,
138179

139180
# 同步操作
140181
with batch_manager.lock:
141-
self.add_to_batch_manager(batch_key, log_group, callback, batch_size, batch_manager)
182+
self.add_to_batch_manager(batch_key, log_group, callback, batch_size, batch_manager,
183+
log_count, earliest_log_time, latest_log_time)
142184
except Exception as e:
143185
# 发生异常时释放内存锁
144186
self.memory_lock.release(batch_size)
145187
raise TLSException(error_code="Add Batch Error", error_message="dispatcher add batch concurrent error")
146188

147189
def add_to_batch_manager(self, batch_key: BatchLog.BatchKey, log_group: LogGroup,
148-
callback: CallBack, batch_size: int, batch_manager: BatchManager) -> None:
190+
callback: CallBack, batch_size: int, batch_manager: BatchManager,
191+
log_count: int, earliest_log_time: int, latest_log_time: int) -> None:
149192
"""将日志添加到批次管理器"""
150193
# 尝试添加到现有批次
151194
batch_log = batch_manager.batch_log
152195
if batch_log is not None:
153-
success = batch_log.try_add(log_group, batch_size, callback)
196+
success = batch_log.try_add(log_group, batch_size, callback, log_count, earliest_log_time, latest_log_time)
154197
if success:
155198
# 检查是否已满需要发送
156199
if batch_manager.full_and_send_batch_request():
@@ -176,7 +219,7 @@ def add_to_batch_manager(self, batch_key: BatchLog.BatchKey, log_group: LogGroup
176219
batch_log = BatchLog(batch_key, self.producer_config)
177220
batch_manager.batch_log = batch_log
178221

179-
success = batch_log.try_add(log_group, batch_size, callback)
222+
success = batch_log.try_add(log_group, batch_size, callback, log_count, earliest_log_time, latest_log_time)
180223
if not success:
181224
self.LOG.error(
182225
f"tryAdd batchLog failed, batchKey = {str(batch_key)}, batchSize = {batch_size}, "

volcengine/tls/producer/producer.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import time
33
from typing import Optional
44

5-
from volcengine.tls.log_pb2 import LogGroup
5+
from volcengine.tls.log_pb2 import LogGroup, LogGroupList
66
from volcengine.tls.producer.batch_semaphore import BatchSemaphore
77
from volcengine.tls.producer.log_dispatcher import LogDispatcher
88
from volcengine.tls.producer.mover import Mover
@@ -57,49 +57,83 @@ def send_logs_v2(self, hash_key: Optional[str], topic_id: str, source: Optional[
5757
max_log_group_count = ProducerConfig.MAX_LOG_GROUP_COUNT
5858
max_log_group_size = ProducerConfig.MAX_BATCH_SIZE
5959

60+
def _varint_len(x: int) -> int:
61+
if x < 0:
62+
return 10
63+
n = 1
64+
while x >= (1 << 7):
65+
n += 1
66+
x >>= 7
67+
return n
68+
69+
def _tag_len(field_number: int) -> int:
70+
return _varint_len((field_number << 3) | 2)
71+
72+
log_groups_field_number = LogGroup.DESCRIPTOR.fields_by_name["logs"].number
73+
log_group_list_groups_field_number = LogGroupList.DESCRIPTOR.fields_by_name["log_groups"].number
74+
6075
log_group = LogGroup()
6176
if source is not None:
6277
log_group.source = source
6378
if filename is not None:
6479
log_group.filename = filename
6580

81+
base_group_size = log_group.ByteSize()
82+
current_estimated_group_size = base_group_size
6683
current_count = 0
6784
for v in logs:
68-
new_log = log_group.logs.add() # pylint: disable=no-member
85+
new_log = getattr(log_group, "logs").add()
6986
new_log.time = v.time
87+
if v.time_ns is not None and hasattr(new_log, "TimeNs"):
88+
new_log.TimeNs = v.time_ns
7089
for key in v.log_dict.keys():
7190
log_content = new_log.contents.add()
7291
log_content.key = str(key)
7392
log_content.value = str(v.log_dict[key])
7493

7594
current_count += 1
76-
log_group_size = len(log_group.SerializeToString())
77-
if log_group_size > max_log_group_size:
95+
log_size = new_log.ByteSize()
96+
current_estimated_group_size += _tag_len(log_groups_field_number) + _varint_len(log_size) + log_size
97+
group_size = current_estimated_group_size
98+
entry_size = _tag_len(log_group_list_groups_field_number) + _varint_len(group_size) + group_size
99+
if entry_size > max_log_group_size:
78100
if current_count == 1:
101+
actual_group_size = log_group.ByteSize()
102+
actual_entry_size = _tag_len(log_group_list_groups_field_number) + _varint_len(actual_group_size) + actual_group_size
79103
raise TLSException(
80104
error_code="InvalidArgument",
81-
error_message=f"log size {log_group_size} is larger than MAX_LOG_SIZE {max_log_group_size}"
105+
error_message=f"log size {actual_entry_size} is larger than MAX_LOG_SIZE {max_log_group_size}"
82106
)
83-
log_group.logs.pop()
107+
getattr(log_group, "logs").pop()
84108
current_count -= 1
109+
current_estimated_group_size -= _tag_len(log_groups_field_number) + _varint_len(log_size) + log_size
85110
self.dispatcher.add_batch(hash_key, topic_id, source, filename, log_group, callback)
86111
log_group = LogGroup()
87112
if source is not None:
88113
log_group.source = source
89114
if filename is not None:
90115
log_group.filename = filename
91-
new_log = log_group.logs.add() # pylint: disable=no-member
116+
base_group_size = log_group.ByteSize()
117+
new_log = getattr(log_group, "logs").add()
92118
new_log.time = v.time
119+
if v.time_ns is not None and hasattr(new_log, "TimeNs"):
120+
new_log.TimeNs = v.time_ns
93121
for key in v.log_dict.keys():
94122
log_content = new_log.contents.add()
95123
log_content.key = str(key)
96124
log_content.value = str(v.log_dict[key])
97125
current_count = 1
98-
log_group_size = len(log_group.SerializeToString())
99-
if log_group_size > max_log_group_size:
126+
current_estimated_group_size = base_group_size
127+
log_size = new_log.ByteSize()
128+
current_estimated_group_size += _tag_len(log_groups_field_number) + _varint_len(log_size) + log_size
129+
group_size = current_estimated_group_size
130+
entry_size = _tag_len(log_group_list_groups_field_number) + _varint_len(group_size) + group_size
131+
if entry_size > max_log_group_size:
132+
actual_group_size = log_group.ByteSize()
133+
actual_entry_size = _tag_len(log_group_list_groups_field_number) + _varint_len(actual_group_size) + actual_group_size
100134
raise TLSException(
101135
error_code="InvalidArgument",
102-
error_message=f"log size {log_group_size} is larger than MAX_LOG_SIZE {max_log_group_size}"
136+
error_message=f"log size {actual_entry_size} is larger than MAX_LOG_SIZE {max_log_group_size}"
103137
)
104138

105139
if current_count >= max_log_group_count:
@@ -110,6 +144,8 @@ def send_logs_v2(self, hash_key: Optional[str], topic_id: str, source: Optional[
110144
if filename is not None:
111145
log_group.filename = filename
112146
current_count = 0
147+
base_group_size = log_group.ByteSize()
148+
current_estimated_group_size = base_group_size
113149

114150
if current_count > 0:
115151
self.dispatcher.add_batch(hash_key, topic_id, source, filename, log_group, callback)

volcengine/tls/producer/producer_model.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def __init__(self, endpoint: str, region: str, access_key: str, access_secret: s
9696
self.max_block_ms = self.DEFAULT_BLOCK_MS
9797
self.retry_count = self.DEFAULT_RETRY_COUNT
9898
self.max_reserved_attempts = self.DEFAULT_RESERVED_ATTEMPTS
99+
self.enable_nanosecond = False
99100
self.client_config = ClientConfig(endpoint, region, access_key, access_secret, token)
100101

101102
def __str__(self) -> str:
@@ -107,6 +108,7 @@ def __str__(self) -> str:
107108
f"max_block_ms={self.max_block_ms}, "
108109
f"retry_count={self.retry_count}, "
109110
f"max_reserved_attempts={self.max_reserved_attempts}, "
111+
f"enable_nanosecond={self.enable_nanosecond}, "
110112
f"client_config={self.client_config})")
111113

112114
def valid_config(self) -> None:
@@ -241,6 +243,8 @@ def __init__(self, batch_key: 'BatchLog.BatchKey', producer_config: ProducerConf
241243
self.batch_key = batch_key
242244
self.current_batch_size = 0
243245
self.current_batch_count = 0
246+
self.earliest_log_time = None
247+
self.latest_log_time = None
244248
self.call_back_list = []
245249
self.log_group_list = LogGroupList()
246250
self.producer_config = producer_config
@@ -254,13 +258,29 @@ def __init__(self, batch_key: 'BatchLog.BatchKey', producer_config: ProducerConf
254258
self.base_increase_backoff_ms = 1000 # 1秒
255259
self.LOG = get_logger(__name__)
256260

257-
def try_add(self, log_group: LogGroup, batch_size: int, call_back: Optional[CallBack]) -> bool:
261+
def try_add(self, log_group: LogGroup, batch_size: int, call_back: Optional[CallBack],
262+
log_count: Optional[int] = None, earliest_log_time: Optional[int] = None,
263+
latest_log_time: Optional[int] = None) -> bool:
258264
"""尝试添加日志组到批次中"""
259265
current_batch_count = self.current_batch_count
260266
current_batch_size = self.current_batch_size
267+
if log_count is None:
268+
log_count = len(log_group.logs)
269+
for log in log_group.logs:
270+
normalized_time = log.time
271+
if log.time < 1e10:
272+
normalized_time = log.time * 1000
273+
elif log.time < 1e15:
274+
normalized_time = log.time
275+
else:
276+
normalized_time = log.time // int(1e6)
277+
if earliest_log_time is None or normalized_time < earliest_log_time:
278+
earliest_log_time = normalized_time
279+
if latest_log_time is None or normalized_time > latest_log_time:
280+
latest_log_time = normalized_time
261281

262282
# 检查是否超过阈值
263-
if (len(log_group.logs) + current_batch_count > ProducerConfig.MAX_BATCH_COUNT or
283+
if (log_count + current_batch_count > ProducerConfig.MAX_BATCH_COUNT or
264284
batch_size + current_batch_size > ProducerConfig.MAX_BATCH_SIZE):
265285
return False
266286

@@ -271,8 +291,13 @@ def try_add(self, log_group: LogGroup, batch_size: int, call_back: Optional[Call
271291
self.call_back_list.append(call_back)
272292

273293
# 更新当前计数
274-
self.current_batch_count = current_batch_count + len(log_group.logs)
294+
self.current_batch_count = current_batch_count + log_count
275295
self.current_batch_size = current_batch_size + batch_size
296+
if log_count > 0 and earliest_log_time is not None and latest_log_time is not None:
297+
if self.earliest_log_time is None or earliest_log_time < self.earliest_log_time:
298+
self.earliest_log_time = earliest_log_time
299+
if self.latest_log_time is None or latest_log_time > self.latest_log_time:
300+
self.latest_log_time = latest_log_time
276301

277302
return True
278303

volcengine/tls/producer/send_batch_task.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,18 @@ def run(self) -> None:
3232
def send_request(self) -> None:
3333
"""构建并发送日志请求"""
3434
batch_key = self.batch_log.batch_key
35-
put_logs_request = PutLogsRequest(batch_key.topic_id, self.batch_log.log_group_list, batch_key.shard_hash)
35+
put_logs_request = PutLogsRequest(
36+
batch_key.topic_id,
37+
self.batch_log.log_group_list,
38+
batch_key.shard_hash,
39+
log_count=self.batch_log.current_batch_count,
40+
earliest_log_time=self.batch_log.earliest_log_time,
41+
latest_log_time=self.batch_log.latest_log_time,
42+
enable_nanosecond=self.producer_config.enable_nanosecond,
43+
)
44+
45+
if not self.calibrate_batch_size():
46+
return
3647

3748
try:
3849
put_logs_response = self.client.put_logs(put_logs_request)
@@ -45,6 +56,24 @@ def send_request(self) -> None:
4556

4657
self.handle_success(put_logs_response)
4758

59+
def calibrate_batch_size(self) -> bool:
60+
estimated = int(self.batch_log.current_batch_size)
61+
actual = int(self.batch_log.log_group_list.ByteSize())
62+
delta = actual - estimated
63+
if delta > 0:
64+
max_block_ms = self.producer_config.max_block_ms
65+
if max_block_ms == 0:
66+
self.memory_lock.acquire(delta)
67+
else:
68+
acquired = self.memory_lock.acquire(delta, timeout=max_block_ms / 1000)
69+
if not acquired:
70+
self.handle_exception(Exception("buffer full when calibrating batch size"))
71+
return False
72+
elif delta < 0:
73+
self.memory_lock.release(-delta)
74+
self.batch_log.current_batch_size = actual
75+
return True
76+
4877
def handle_failure(self) -> None:
4978
"""处理失败日志"""
5079
self.LOG.info(f"send batch failed, batch: {self.batch_log}")

volcengine/tls/test/producer_split_unit_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ def test_split_by_count(self):
3333

3434
groups = producer.dispatcher.groups
3535
self.assertEqual(3, len(groups))
36-
self.assertEqual(10000, len(groups[0].logs))
37-
self.assertEqual(10000, len(groups[1].logs))
38-
self.assertEqual(5000, len(groups[2].logs))
36+
self.assertEqual(10000, len(getattr(groups[0], "logs")))
37+
self.assertEqual(10000, len(getattr(groups[1], "logs")))
38+
self.assertEqual(5000, len(getattr(groups[2], "logs")))
3939
for g in groups:
40-
self.assertLessEqual(len(g.logs), ProducerConfig.MAX_LOG_GROUP_COUNT)
40+
self.assertLessEqual(len(getattr(g, "logs")), ProducerConfig.MAX_LOG_GROUP_COUNT)
4141
self.assertLessEqual(len(g.SerializeToString()), ProducerConfig.MAX_BATCH_SIZE)
4242

4343
def test_split_by_size(self):
@@ -84,7 +84,7 @@ def test_batch_log_group_list_count_never_exceeds_32768(self):
8484
for _ in range(6):
8585
g = LogGroup()
8686
for _ in range(10000):
87-
g.logs.append(Log())
87+
getattr(g, "logs").append(Log())
8888
groups.append(g)
8989

9090
batches = []

0 commit comments

Comments
 (0)