Skip to content

Commit 50ae0aa

Browse files
authored
fix(testing-sdk): fix race condition where concurrent updates can cause duplicate invocations (#362)
*Issue #, if available:* *Description of changes:* In some cases, if two updates come at the exact same time, the testing library creates a duplicate invocation. This is due to the invocation tracker only registering the invocation AFTER the `startInvocation` API completes, so it doesn't know there's an active invocation until after the API call completes. If two updates that start an invocation come too quickly, the second one may come before the first one registers the invocation, so it will re-invoke concurrently. The fix is to register the invocation BEFORE the `startInvocation` API is called to ensure it happens synchronously. To help find these bugs, we could also have the testing library to stop allowing the same execution to use different checkpoint tokens and be more properly idempotent. This can come later, as things like this can only really happen if the testing library or language SDK have bugs. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 1fd8959 commit 50ae0aa

28 files changed

+563
-286
lines changed

packages/aws-durable-execution-sdk-js-examples/src/examples/parallel/wait/parallel-wait.history.json

Lines changed: 108 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
{
33
"EventType": "ExecutionStarted",
44
"EventId": 1,
5-
"Id": "a5d643ee-d1a2-4ee7-aed2-5cdc482963b3",
6-
"EventTimestamp": "2025-12-03T22:59:01.757Z",
5+
"Id": "5bf93d96-3d76-4b4a-ab50-6030f2e8519a",
6+
"EventTimestamp": "2025-12-05T00:16:50.853Z",
77
"ExecutionStartedDetails": {
88
"Input": {
99
"Payload": "{}"
@@ -16,7 +16,7 @@
1616
"EventId": 2,
1717
"Id": "c4ca4238a0b92382",
1818
"Name": "parent-block",
19-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
19+
"EventTimestamp": "2025-12-05T00:16:50.869Z",
2020
"ContextStartedDetails": {}
2121
},
2222
{
@@ -25,7 +25,7 @@
2525
"EventId": 3,
2626
"Id": "ea66c06c1e1c05fa",
2727
"Name": "parallel-branch-0",
28-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
28+
"EventTimestamp": "2025-12-05T00:16:50.869Z",
2929
"ParentId": "c4ca4238a0b92382",
3030
"ContextStartedDetails": {}
3131
},
@@ -35,11 +35,11 @@
3535
"EventId": 4,
3636
"Id": "2f221a18eb863803",
3737
"Name": "wait-1-second",
38-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
38+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
3939
"ParentId": "ea66c06c1e1c05fa",
4040
"WaitStartedDetails": {
4141
"Duration": 1,
42-
"ScheduledEndTimestamp": "2025-12-03T22:59:02.759Z"
42+
"ScheduledEndTimestamp": "2025-12-05T00:16:51.872Z"
4343
}
4444
},
4545
{
@@ -48,7 +48,7 @@
4848
"EventId": 5,
4949
"Id": "98c6f2c2287f4c73",
5050
"Name": "parallel-branch-1",
51-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
51+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
5252
"ParentId": "c4ca4238a0b92382",
5353
"ContextStartedDetails": {}
5454
},
@@ -57,12 +57,12 @@
5757
"SubType": "Wait",
5858
"EventId": 6,
5959
"Id": "6151f5ab282d90e4",
60-
"Name": "wait-2-seconds",
61-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
60+
"Name": "wait-1-second-again",
61+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
6262
"ParentId": "98c6f2c2287f4c73",
6363
"WaitStartedDetails": {
64-
"Duration": 2,
65-
"ScheduledEndTimestamp": "2025-12-03T22:59:03.759Z"
64+
"Duration": 1,
65+
"ScheduledEndTimestamp": "2025-12-05T00:16:51.872Z"
6666
}
6767
},
6868
{
@@ -71,7 +71,7 @@
7171
"EventId": 7,
7272
"Id": "13cee27a2bd93915",
7373
"Name": "parallel-branch-2",
74-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
74+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
7575
"ParentId": "c4ca4238a0b92382",
7676
"ContextStartedDetails": {}
7777
},
@@ -80,114 +80,161 @@
8080
"SubType": "Wait",
8181
"EventId": 8,
8282
"Id": "b425e0c75591aa8f",
83-
"Name": "wait-5-seconds",
84-
"EventTimestamp": "2025-12-03T22:59:01.759Z",
83+
"Name": "wait-2-seconds",
84+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
8585
"ParentId": "13cee27a2bd93915",
86+
"WaitStartedDetails": {
87+
"Duration": 2,
88+
"ScheduledEndTimestamp": "2025-12-05T00:16:52.872Z"
89+
}
90+
},
91+
{
92+
"EventType": "ContextStarted",
93+
"SubType": "ParallelBranch",
94+
"EventId": 9,
95+
"Id": "3a170a9fe4f47efa",
96+
"Name": "parallel-branch-3",
97+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
98+
"ParentId": "c4ca4238a0b92382",
99+
"ContextStartedDetails": {}
100+
},
101+
{
102+
"EventType": "WaitStarted",
103+
"SubType": "Wait",
104+
"EventId": 10,
105+
"Id": "a4e1cd317d54f087",
106+
"Name": "wait-5-seconds",
107+
"EventTimestamp": "2025-12-05T00:16:50.872Z",
108+
"ParentId": "3a170a9fe4f47efa",
86109
"WaitStartedDetails": {
87110
"Duration": 5,
88-
"ScheduledEndTimestamp": "2025-12-03T22:59:06.759Z"
111+
"ScheduledEndTimestamp": "2025-12-05T00:16:55.872Z"
89112
}
90113
},
91114
{
92115
"EventType": "InvocationCompleted",
93-
"EventId": 9,
94-
"EventTimestamp": "2025-12-03T22:59:01.810Z",
116+
"EventId": 11,
117+
"EventTimestamp": "2025-12-05T00:16:50.924Z",
95118
"InvocationCompletedDetails": {
96-
"StartTimestamp": "2025-12-03T22:59:01.757Z",
97-
"EndTimestamp": "2025-12-03T22:59:01.810Z",
119+
"StartTimestamp": "2025-12-05T00:16:50.853Z",
120+
"EndTimestamp": "2025-12-05T00:16:50.924Z",
98121
"Error": {},
99-
"RequestId": "50ccf531-dbcd-4392-a08b-2c4c60bb4c80"
122+
"RequestId": "24b3c358-2e22-4f84-9423-577754ffe3c7"
100123
}
101124
},
102125
{
103126
"EventType": "WaitSucceeded",
104127
"SubType": "Wait",
105-
"EventId": 10,
128+
"EventId": 12,
106129
"Id": "2f221a18eb863803",
107130
"Name": "wait-1-second",
108-
"EventTimestamp": "2025-12-03T22:59:02.761Z",
131+
"EventTimestamp": "2025-12-05T00:16:51.872Z",
109132
"ParentId": "ea66c06c1e1c05fa",
110133
"WaitSucceededDetails": {
111134
"Duration": 1
112135
}
113136
},
137+
{
138+
"EventType": "WaitSucceeded",
139+
"SubType": "Wait",
140+
"EventId": 13,
141+
"Id": "6151f5ab282d90e4",
142+
"Name": "wait-1-second-again",
143+
"EventTimestamp": "2025-12-05T00:16:51.872Z",
144+
"ParentId": "98c6f2c2287f4c73",
145+
"WaitSucceededDetails": {
146+
"Duration": 1
147+
}
148+
},
114149
{
115150
"EventType": "ContextSucceeded",
116151
"SubType": "ParallelBranch",
117-
"EventId": 11,
152+
"EventId": 14,
118153
"Id": "ea66c06c1e1c05fa",
119154
"Name": "parallel-branch-0",
120-
"EventTimestamp": "2025-12-03T22:59:02.770Z",
155+
"EventTimestamp": "2025-12-05T00:16:51.875Z",
156+
"ParentId": "c4ca4238a0b92382",
157+
"ContextSucceededDetails": {
158+
"Result": {}
159+
}
160+
},
161+
{
162+
"EventType": "ContextSucceeded",
163+
"SubType": "ParallelBranch",
164+
"EventId": 15,
165+
"Id": "98c6f2c2287f4c73",
166+
"Name": "parallel-branch-1",
167+
"EventTimestamp": "2025-12-05T00:16:51.875Z",
121168
"ParentId": "c4ca4238a0b92382",
122169
"ContextSucceededDetails": {
123170
"Result": {}
124171
}
125172
},
126173
{
127174
"EventType": "InvocationCompleted",
128-
"EventId": 12,
129-
"EventTimestamp": "2025-12-03T22:59:02.822Z",
175+
"EventId": 16,
176+
"EventTimestamp": "2025-12-05T00:16:51.925Z",
130177
"InvocationCompletedDetails": {
131-
"StartTimestamp": "2025-12-03T22:59:02.762Z",
132-
"EndTimestamp": "2025-12-03T22:59:02.822Z",
178+
"StartTimestamp": "2025-12-05T00:16:51.872Z",
179+
"EndTimestamp": "2025-12-05T00:16:51.925Z",
133180
"Error": {},
134-
"RequestId": "52e8da81-1d04-4aae-8a67-c051fe2f9043"
181+
"RequestId": "2119315f-aff7-4503-9556-8288b7cfaa5a"
135182
}
136183
},
137184
{
138185
"EventType": "WaitSucceeded",
139186
"SubType": "Wait",
140-
"EventId": 13,
141-
"Id": "6151f5ab282d90e4",
187+
"EventId": 17,
188+
"Id": "b425e0c75591aa8f",
142189
"Name": "wait-2-seconds",
143-
"EventTimestamp": "2025-12-03T22:59:03.760Z",
144-
"ParentId": "98c6f2c2287f4c73",
190+
"EventTimestamp": "2025-12-05T00:16:52.873Z",
191+
"ParentId": "13cee27a2bd93915",
145192
"WaitSucceededDetails": {
146193
"Duration": 2
147194
}
148195
},
149196
{
150197
"EventType": "ContextSucceeded",
151198
"SubType": "ParallelBranch",
152-
"EventId": 14,
153-
"Id": "98c6f2c2287f4c73",
154-
"Name": "parallel-branch-1",
155-
"EventTimestamp": "2025-12-03T22:59:03.761Z",
199+
"EventId": 18,
200+
"Id": "13cee27a2bd93915",
201+
"Name": "parallel-branch-2",
202+
"EventTimestamp": "2025-12-05T00:16:52.877Z",
156203
"ParentId": "c4ca4238a0b92382",
157204
"ContextSucceededDetails": {
158205
"Result": {}
159206
}
160207
},
161208
{
162209
"EventType": "InvocationCompleted",
163-
"EventId": 15,
164-
"EventTimestamp": "2025-12-03T22:59:03.813Z",
210+
"EventId": 19,
211+
"EventTimestamp": "2025-12-05T00:16:52.929Z",
165212
"InvocationCompletedDetails": {
166-
"StartTimestamp": "2025-12-03T22:59:03.760Z",
167-
"EndTimestamp": "2025-12-03T22:59:03.813Z",
213+
"StartTimestamp": "2025-12-05T00:16:52.873Z",
214+
"EndTimestamp": "2025-12-05T00:16:52.929Z",
168215
"Error": {},
169-
"RequestId": "1abb1bfd-2628-4f0e-80c7-0fe60bd3f416"
216+
"RequestId": "5907d455-ef8a-4021-b822-bcffa5dd2d07"
170217
}
171218
},
172219
{
173220
"EventType": "WaitSucceeded",
174221
"SubType": "Wait",
175-
"EventId": 16,
176-
"Id": "b425e0c75591aa8f",
222+
"EventId": 20,
223+
"Id": "a4e1cd317d54f087",
177224
"Name": "wait-5-seconds",
178-
"EventTimestamp": "2025-12-03T22:59:06.760Z",
179-
"ParentId": "13cee27a2bd93915",
225+
"EventTimestamp": "2025-12-05T00:16:55.873Z",
226+
"ParentId": "3a170a9fe4f47efa",
180227
"WaitSucceededDetails": {
181228
"Duration": 5
182229
}
183230
},
184231
{
185232
"EventType": "ContextSucceeded",
186233
"SubType": "ParallelBranch",
187-
"EventId": 17,
188-
"Id": "13cee27a2bd93915",
189-
"Name": "parallel-branch-2",
190-
"EventTimestamp": "2025-12-03T22:59:06.762Z",
234+
"EventId": 21,
235+
"Id": "3a170a9fe4f47efa",
236+
"Name": "parallel-branch-3",
237+
"EventTimestamp": "2025-12-05T00:16:55.878Z",
191238
"ParentId": "c4ca4238a0b92382",
192239
"ContextSucceededDetails": {
193240
"Result": {}
@@ -196,32 +243,32 @@
196243
{
197244
"EventType": "ContextSucceeded",
198245
"SubType": "Parallel",
199-
"EventId": 18,
246+
"EventId": 22,
200247
"Id": "c4ca4238a0b92382",
201248
"Name": "parent-block",
202-
"EventTimestamp": "2025-12-03T22:59:06.762Z",
249+
"EventTimestamp": "2025-12-05T00:16:55.880Z",
203250
"ContextSucceededDetails": {
204251
"Result": {
205-
"Payload": "{\"all\":[{\"index\":0,\"status\":\"SUCCEEDED\"},{\"index\":1,\"status\":\"SUCCEEDED\"},{\"index\":2,\"status\":\"SUCCEEDED\"}],\"completionReason\":\"ALL_COMPLETED\"}"
252+
"Payload": "{\"all\":[{\"index\":0,\"status\":\"SUCCEEDED\"},{\"index\":1,\"status\":\"SUCCEEDED\"},{\"index\":2,\"status\":\"SUCCEEDED\"},{\"index\":3,\"status\":\"SUCCEEDED\"}],\"completionReason\":\"ALL_COMPLETED\"}"
206253
}
207254
}
208255
},
209256
{
210257
"EventType": "InvocationCompleted",
211-
"EventId": 19,
212-
"EventTimestamp": "2025-12-03T22:59:06.762Z",
258+
"EventId": 23,
259+
"EventTimestamp": "2025-12-05T00:16:55.881Z",
213260
"InvocationCompletedDetails": {
214-
"StartTimestamp": "2025-12-03T22:59:06.760Z",
215-
"EndTimestamp": "2025-12-03T22:59:06.762Z",
261+
"StartTimestamp": "2025-12-05T00:16:55.874Z",
262+
"EndTimestamp": "2025-12-05T00:16:55.881Z",
216263
"Error": {},
217-
"RequestId": "9c2ae0a0-53bf-497f-880f-cd211084c897"
264+
"RequestId": "6fa7d6b2-831c-4e48-a0c6-9cb81bdfe3c6"
218265
}
219266
},
220267
{
221268
"EventType": "ExecutionSucceeded",
222-
"EventId": 20,
223-
"Id": "a5d643ee-d1a2-4ee7-aed2-5cdc482963b3",
224-
"EventTimestamp": "2025-12-03T22:59:06.762Z",
269+
"EventId": 24,
270+
"Id": "5bf93d96-3d76-4b4a-ab50-6030f2e8519a",
271+
"EventTimestamp": "2025-12-05T00:16:55.881Z",
225272
"ExecutionSucceededDetails": {
226273
"Result": {
227274
"Payload": "\"Completed waits\""

packages/aws-durable-execution-sdk-js-examples/src/examples/parallel/wait/parallel-wait.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,16 @@ createTests({
1515

1616
const parentBlockOp = runner.getOperation("parent-block");
1717
const wait1SecondOp = runner.getOperation("wait-1-second");
18+
const wait1SecondAgainOp = runner.getOperation("wait-1-second-again");
1819
const wait2SecondsOp = runner.getOperation("wait-2-seconds");
1920
const wait5SecondsOp = runner.getOperation("wait-5-seconds");
2021

2122
expect(execution.getResult()).toBe("Completed waits");
22-
expect(execution.getOperations()).toHaveLength(7);
23+
expect(execution.getOperations()).toHaveLength(9);
2324

24-
expect(parentBlockOp.getChildOperations()).toHaveLength(3);
25+
expect(parentBlockOp.getChildOperations()).toHaveLength(4);
2526

27+
expect(wait1SecondAgainOp.getWaitDetails()!.waitSeconds).toBe(1);
2628
expect(wait1SecondOp.getWaitDetails()!.waitSeconds!).toBe(1);
2729
expect(wait2SecondsOp.getWaitDetails()!.waitSeconds!).toBe(2);
2830
expect(wait5SecondsOp.getWaitDetails()!.waitSeconds!).toBe(5);

packages/aws-durable-execution-sdk-js-examples/src/examples/parallel/wait/parallel-wait.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ export const handler = withDurableExecution(
1616
await context.parallel("parent-block", [
1717
async (childContext: DurableContext) =>
1818
await childContext.wait("wait-1-second", { seconds: 1 }),
19+
async (childContext: DurableContext) =>
20+
await childContext.wait("wait-1-second-again", { seconds: 1 }),
1921
async (childContext: DurableContext) =>
2022
await childContext.wait("wait-2-seconds", { seconds: 2 }),
2123
async (childContext: DurableContext) =>

packages/aws-durable-execution-sdk-js-testing/src/checkpoint-server/handlers/__tests__/callbacks.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { CheckpointManager } from "../../storage/checkpoint-manager";
1414
import {
1515
createExecutionId,
1616
createCallbackId,
17+
createInvocationId,
1718
} from "../../utils/tagged-strings";
1819

1920
describe("callbacks handlers", () => {
@@ -28,6 +29,7 @@ describe("callbacks handlers", () => {
2829
executionManager.startExecution({
2930
executionId: mockExecutionId,
3031
payload: '{"test": "data"}',
32+
invocationId: createInvocationId(),
3133
});
3234

3335
const storage = executionManager.getCheckpointsByExecution(mockExecutionId);

packages/aws-durable-execution-sdk-js-testing/src/checkpoint-server/handlers/__tests__/checkpoint-handlers.test.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ import {
1212
processCheckpointDurableExecution,
1313
} from "../checkpoint-handlers";
1414
import { ExecutionManager } from "../../storage/execution-manager";
15-
import { createExecutionId } from "../../utils/tagged-strings";
15+
import {
16+
createExecutionId,
17+
createInvocationId,
18+
} from "../../utils/tagged-strings";
1619
import { encodeCheckpointToken } from "../../utils/checkpoint-token";
1720

1821
// Mock only external dependencies we can't control
@@ -28,6 +31,7 @@ describe("checkpoint handlers", () => {
2831
const invocationResult = executionManager.startExecution({
2932
executionId,
3033
payload: '{"test": "data"}',
34+
invocationId: createInvocationId(),
3135
});
3236

3337
const storage = executionManager.getCheckpointsByExecution(executionId);

0 commit comments

Comments
 (0)