Skip to content

Commit 5794b1a

Browse files
Merge branch 'main' into fix/flagd-infinite-connection-retries
Signed-off-by: lea konvalinka <lea.konvalinka@dynatrace.com>
2 parents a636257 + ba277bf commit 5794b1a

File tree

12 files changed

+307
-44
lines changed

12 files changed

+307
-44
lines changed

providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/Config.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ public final class Config {
2323
static final int DEFAULT_MAX_CACHE_SIZE = 1000;
2424
static final int DEFAULT_OFFLINE_POLL_MS = 5000;
2525
static final long DEFAULT_KEEP_ALIVE = 0;
26+
static final String DEFAULT_REINITIALIZE_ON_ERROR = "false";
2627

2728
static final String RESOLVER_ENV_VAR = "FLAGD_RESOLVER";
2829
static final String HOST_ENV_VAR_NAME = "FLAGD_HOST";
@@ -55,6 +56,7 @@ public final class Config {
5556
static final String KEEP_ALIVE_MS_ENV_VAR_NAME = "FLAGD_KEEP_ALIVE_TIME_MS";
5657
static final String TARGET_URI_ENV_VAR_NAME = "FLAGD_TARGET_URI";
5758
static final String STREAM_RETRY_GRACE_PERIOD = "FLAGD_RETRY_GRACE_PERIOD";
59+
static final String REINITIALIZE_ON_ERROR_ENV_VAR_NAME = "FLAGD_REINITIALIZE_ON_ERROR";
5860

5961
static final String RESOLVER_RPC = "rpc";
6062
static final String RESOLVER_IN_PROCESS = "in-process";

providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdOptions.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,16 @@ public class FlagdOptions {
213213
@Builder.Default
214214
private String defaultAuthority = fallBackToEnvOrDefault(Config.DEFAULT_AUTHORITY_ENV_VAR_NAME, null);
215215

216+
/**
217+
* !EXPERIMENTAL!
218+
* Whether to reinitialize the channel (TCP connection) after the grace period is exceeded.
219+
* This can help recover from connection issues by creating fresh connections.
220+
* Particularly useful for troubleshooting network issues related to proxies or service meshes.
221+
*/
222+
@Builder.Default
223+
private boolean reinitializeOnError = Boolean.parseBoolean(
224+
fallBackToEnvOrDefault(Config.REINITIALIZE_ON_ERROR_ENV_VAR_NAME, Config.DEFAULT_REINITIALIZE_ON_ERROR));
225+
216226
/**
217227
* Builder overwrite in order to customize the "build" method.
218228
*

providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/FlagdProvider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public FlagdProvider(final FlagdOptions options) {
9292
}
9393
hooks.add(new SyncMetadataHook(this::getEnrichedContext));
9494
contextEnricher = options.getContextEnricher();
95-
errorExecutor = Executors.newSingleThreadScheduledExecutor();
95+
errorExecutor = Executors.newSingleThreadScheduledExecutor(new FlagdThreadFactory("flagd-provider-thread"));
9696
gracePeriod = options.getRetryGracePeriod();
9797
deadline = options.getDeadline();
9898
}
@@ -106,7 +106,7 @@ public FlagdProvider(final FlagdOptions options) {
106106
deadline = Config.DEFAULT_DEADLINE;
107107
gracePeriod = Config.DEFAULT_STREAM_RETRY_GRACE_PERIOD;
108108
hooks.add(new SyncMetadataHook(this::getEnrichedContext));
109-
errorExecutor = Executors.newSingleThreadScheduledExecutor();
109+
errorExecutor = Executors.newSingleThreadScheduledExecutor(new FlagdThreadFactory("flagd-provider-thread"));
110110
if (initialized) {
111111
this.syncResources.initialize();
112112
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package dev.openfeature.contrib.providers.flagd;
2+
3+
import java.util.Objects;
4+
import java.util.concurrent.ThreadFactory;
5+
import java.util.concurrent.atomic.AtomicInteger;
6+
7+
/**
8+
* Thread factory for the Flagd provider to allow named daemon threads to be created.
9+
*/
10+
class FlagdThreadFactory implements ThreadFactory {
11+
12+
private final AtomicInteger counter = new AtomicInteger();
13+
private final String namePrefix;
14+
15+
/**
16+
* {@link FlagdThreadFactory}'s constructor.
17+
*
18+
* @param namePrefix Prefix used for setting the new thread's name.
19+
*/
20+
FlagdThreadFactory(String namePrefix) {
21+
this.namePrefix = Objects.requireNonNull(namePrefix, "namePrefix must not be null");
22+
}
23+
24+
@Override
25+
public Thread newThread(Runnable runnable) {
26+
final Thread thread = new Thread(runnable);
27+
thread.setDaemon(true);
28+
thread.setName(namePrefix + "-" + counter.incrementAndGet());
29+
return thread;
30+
}
31+
}

providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/InProcessResolver.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public class InProcessResolver implements Resolver {
4141
private final Consumer<FlagdProviderEvent> onConnectionEvent;
4242
private final Operator operator;
4343
private final String scope;
44+
private final QueueSource queueSource;
4445

4546
/**
4647
* Resolves flag values using
@@ -52,7 +53,8 @@ public class InProcessResolver implements Resolver {
5253
* connection/stream
5354
*/
5455
public InProcessResolver(FlagdOptions options, Consumer<FlagdProviderEvent> onConnectionEvent) {
55-
this.flagStore = new FlagStore(getConnector(options));
56+
this.queueSource = getQueueSource(options);
57+
this.flagStore = new FlagStore(queueSource);
5658
this.onConnectionEvent = onConnectionEvent;
5759
this.operator = new Operator();
5860
this.scope = options.getSelector();
@@ -97,6 +99,19 @@ public void init() throws Exception {
9799
stateWatcher.start();
98100
}
99101

102+
/**
103+
* Called when the provider enters error state after grace period.
104+
* Attempts to reinitialize the sync connector if enabled.
105+
*/
106+
@Override
107+
public void onError() {
108+
if (queueSource instanceof SyncStreamQueueSource) {
109+
SyncStreamQueueSource syncConnector = (SyncStreamQueueSource) queueSource;
110+
// only reinitialize if option is enabled
111+
syncConnector.reinitializeChannelComponents();
112+
}
113+
}
114+
100115
/**
101116
* Shutdown in-process resolver.
102117
*
@@ -150,7 +165,7 @@ public ProviderEvaluation<Value> objectEvaluation(String key, Value defaultValue
150165
.build();
151166
}
152167

153-
static QueueSource getConnector(final FlagdOptions options) {
168+
static QueueSource getQueueSource(final FlagdOptions options) {
154169
if (options.getCustomConnector() != null) {
155170
return options.getCustomConnector();
156171
}

providers/flagd/src/main/java/dev/openfeature/contrib/providers/flagd/resolver/process/storage/connector/sync/SyncStreamQueueSource.java

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@
2727
import lombok.extern.slf4j.Slf4j;
2828

2929
/**
30-
* Implements the {@link QueueSource} contract and emit flags obtained from flagd sync gRPC contract.
30+
* Implements the {@link QueueSource} contract and emit flags obtained from
31+
* flagd sync gRPC contract.
3132
*/
3233
@Slf4j
3334
@SuppressFBWarnings(
3435
value = {"EI_EXPOSE_REP"},
35-
justification = "Random is used to generate a variation & flag configurations require exposing")
36+
justification = "We need to expose the BlockingQueue to allow consumers to read from it")
3637
public class SyncStreamQueueSource implements QueueSource {
3738
private static final int QUEUE_SIZE = 5;
3839

@@ -45,14 +46,32 @@ public class SyncStreamQueueSource implements QueueSource {
4546
private final String selector;
4647
private final String providerId;
4748
private final boolean syncMetadataDisabled;
48-
private final ChannelConnector channelConnector;
49+
private final boolean reinitializeOnError;
50+
private final FlagdOptions options;
4951
private final BlockingQueue<QueuePayload> outgoingQueue = new LinkedBlockingQueue<>(QUEUE_SIZE);
50-
private final FlagSyncServiceStub flagSyncStub;
51-
private final FlagSyncServiceBlockingStub metadataStub;
5252
private final List<String> fatalStatusCodes;
53+
private volatile GrpcComponents grpcComponents;
5354

5455
/**
55-
* Creates a new SyncStreamQueueSource responsible for observing the event stream.
56+
* Container for gRPC components to ensure atomicity during reinitialization.
57+
* All three components are updated together to prevent consumers from seeing
58+
* an inconsistent state where components are from different channel instances.
59+
*/
60+
private static class GrpcComponents {
61+
final ChannelConnector channelConnector;
62+
final FlagSyncServiceStub flagSyncStub;
63+
final FlagSyncServiceBlockingStub metadataStub;
64+
65+
GrpcComponents(ChannelConnector connector, FlagSyncServiceStub stub, FlagSyncServiceBlockingStub blockingStub) {
66+
this.channelConnector = connector;
67+
this.flagSyncStub = stub;
68+
this.metadataStub = blockingStub;
69+
}
70+
}
71+
72+
/**
73+
* Creates a new SyncStreamQueueSource responsible for observing the event
74+
* stream.
5675
*/
5776
public SyncStreamQueueSource(final FlagdOptions options) {
5877
streamDeadline = options.getStreamDeadlineMs();
@@ -61,12 +80,10 @@ public SyncStreamQueueSource(final FlagdOptions options) {
6180
providerId = options.getProviderId();
6281
maxBackoffMs = options.getRetryBackoffMaxMs();
6382
syncMetadataDisabled = options.isSyncMetadataDisabled();
64-
channelConnector = new ChannelConnector(options, ChannelBuilder.nettyChannel(options));
65-
flagSyncStub =
66-
FlagSyncServiceGrpc.newStub(channelConnector.getChannel()).withWaitForReady();
67-
metadataStub = FlagSyncServiceGrpc.newBlockingStub(channelConnector.getChannel())
68-
.withWaitForReady();
6983
fatalStatusCodes = options.getFatalStatusCodes();
84+
reinitializeOnError = options.isReinitializeOnError();
85+
this.options = options;
86+
initializeChannelComponents();
7087
}
7188

7289
// internal use only
@@ -79,12 +96,51 @@ protected SyncStreamQueueSource(
7996
deadline = options.getDeadline();
8097
selector = options.getSelector();
8198
providerId = options.getProviderId();
82-
channelConnector = connectorMock;
8399
maxBackoffMs = options.getRetryBackoffMaxMs();
84-
flagSyncStub = stubMock;
85100
syncMetadataDisabled = options.isSyncMetadataDisabled();
86-
metadataStub = blockingStubMock;
87101
fatalStatusCodes = options.getFatalStatusCodes();
102+
reinitializeOnError = options.isReinitializeOnError();
103+
this.options = options;
104+
this.grpcComponents = new GrpcComponents(connectorMock, stubMock, blockingStubMock);
105+
}
106+
107+
/** Initialize channel connector and stubs. */
108+
private synchronized void initializeChannelComponents() {
109+
ChannelConnector newConnector = new ChannelConnector(options, ChannelBuilder.nettyChannel(options));
110+
FlagSyncServiceStub newFlagSyncStub =
111+
FlagSyncServiceGrpc.newStub(newConnector.getChannel()).withWaitForReady();
112+
FlagSyncServiceBlockingStub newMetadataStub =
113+
FlagSyncServiceGrpc.newBlockingStub(newConnector.getChannel()).withWaitForReady();
114+
115+
// atomic assignment of all components as a single unit
116+
grpcComponents = new GrpcComponents(newConnector, newFlagSyncStub, newMetadataStub);
117+
}
118+
119+
/** Reinitialize channel connector and stubs on error. */
120+
public synchronized void reinitializeChannelComponents() {
121+
if (!reinitializeOnError || shutdown.get()) {
122+
return;
123+
}
124+
125+
log.info("Reinitializing channel gRPC components in attempt to restore stream.");
126+
GrpcComponents oldComponents = grpcComponents;
127+
128+
try {
129+
// create new channel components first
130+
initializeChannelComponents();
131+
} catch (Exception e) {
132+
log.error("Failed to reinitialize channel components", e);
133+
return;
134+
}
135+
136+
// shutdown old connector after successful reinitialization
137+
if (oldComponents != null && oldComponents.channelConnector != null) {
138+
try {
139+
oldComponents.channelConnector.shutdown();
140+
} catch (Exception e) {
141+
log.debug("Error shutting down old channel connector during reinitialization", e);
142+
}
143+
}
88144
}
89145

90146
/** Initialize sync stream connector. */
@@ -111,7 +167,7 @@ public void shutdown() throws InterruptedException {
111167
log.debug("Shutdown already in progress or completed");
112168
return;
113169
}
114-
this.channelConnector.shutdown();
170+
grpcComponents.channelConnector.shutdown();
115171
}
116172

117173
/** Contains blocking calls, to be used concurrently. */
@@ -175,13 +231,14 @@ private void observeSyncStream() {
175231
log.info("Shutdown invoked, exiting event stream listener");
176232
}
177233

178-
// TODO: remove the metadata call entirely after https://github.com/open-feature/flagd/issues/1584
234+
// TODO: remove the metadata call entirely after
235+
// https://github.com/open-feature/flagd/issues/1584
179236
private Struct getMetadata() {
180237
if (syncMetadataDisabled) {
181238
return null;
182239
}
183240

184-
FlagSyncServiceBlockingStub localStub = metadataStub;
241+
FlagSyncServiceBlockingStub localStub = grpcComponents.metadataStub;
185242

186243
if (deadline > 0) {
187244
localStub = localStub.withDeadlineAfter(deadline, TimeUnit.MILLISECONDS);
@@ -196,7 +253,8 @@ private Struct getMetadata() {
196253

197254
return null;
198255
} catch (StatusRuntimeException e) {
199-
// In newer versions of flagd, metadata is part of the sync stream. If the method is unimplemented, we
256+
// In newer versions of flagd, metadata is part of the sync stream. If the
257+
// method is unimplemented, we
200258
// can ignore the error
201259
if (e.getStatus() != null
202260
&& Status.Code.UNIMPLEMENTED.equals(e.getStatus().getCode())) {
@@ -208,7 +266,7 @@ private Struct getMetadata() {
208266
}
209267

210268
private void syncFlags(SyncStreamObserver streamObserver) {
211-
FlagSyncServiceStub localStub = flagSyncStub; // don't mutate the stub
269+
FlagSyncServiceStub localStub = grpcComponents.flagSyncStub; // don't mutate the stub
212270
if (streamDeadline > 0) {
213271
localStub = localStub.withDeadlineAfter(streamDeadline, TimeUnit.MILLISECONDS);
214272
}

providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/FlagdProviderSyncResourcesTest.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ void interruptingWaitingThread_isIgnored() throws InterruptedException {
7979
@Test
8080
void callingInitialize_wakesUpWaitingThread() throws InterruptedException {
8181
final AtomicBoolean isWaiting = new AtomicBoolean();
82-
final AtomicBoolean successfulTest = new AtomicBoolean();
82+
final AtomicLong waitTime = new AtomicLong(Long.MAX_VALUE);
8383
Thread waitingThread = new Thread(() -> {
8484
long start = System.currentTimeMillis();
8585
isWaiting.set(true);
8686
flagdProviderSyncResources.waitForInitialization(10000);
8787
long end = System.currentTimeMillis();
8888
long duration = end - start;
89-
successfulTest.set(duration < MAX_TIME_TOLERANCE * 2);
89+
waitTime.set(duration);
9090
});
9191
waitingThread.start();
9292

@@ -100,7 +100,11 @@ void callingInitialize_wakesUpWaitingThread() throws InterruptedException {
100100

101101
waitingThread.join();
102102

103-
Assertions.assertTrue(successfulTest.get());
103+
Assertions.assertTrue(
104+
waitTime.get() < MAX_TIME_TOLERANCE * 2,
105+
() -> "Wakeup should be almost instant, but took " + waitTime.get()
106+
+ " ms, which is more than the max of"
107+
+ (MAX_TIME_TOLERANCE * 2) + " ms");
104108
}
105109

106110
@Timeout(2)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package dev.openfeature.contrib.providers.flagd;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
import static org.junit.jupiter.api.Assertions.assertThrows;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
class FlagdThreadFactoryTest {
9+
10+
private static final String THREAD_NAME = "testthread";
11+
private final Runnable runnable = () -> {};
12+
13+
@Test
14+
void verifyThreadFactoryThrowsNullPointerExceptionWhenNamePrefixIsNull() {
15+
16+
// Then
17+
var exception = assertThrows(NullPointerException.class, () -> {
18+
// When
19+
new FlagdThreadFactory(null);
20+
});
21+
assertThat(exception.toString()).contains("namePrefix must not be null");
22+
}
23+
24+
@Test
25+
void verifyNewThreadHasNamePrefix() {
26+
27+
var flagdThreadFactory = new FlagdThreadFactory(THREAD_NAME);
28+
var thread = flagdThreadFactory.newThread(runnable);
29+
30+
assertThat(thread.getName()).isEqualTo(THREAD_NAME + "-1");
31+
assertThat(thread.isDaemon()).isTrue();
32+
}
33+
34+
@Test
35+
void verifyNewThreadHasNamePrefixWithIncrement() {
36+
37+
var flagdThreadFactory = new FlagdThreadFactory(THREAD_NAME);
38+
var threadOne = flagdThreadFactory.newThread(runnable);
39+
var threadTwo = flagdThreadFactory.newThread(runnable);
40+
41+
assertThat(threadOne.getName()).isEqualTo(THREAD_NAME + "-1");
42+
assertThat(threadOne.isDaemon()).isTrue();
43+
assertThat(threadTwo.getName()).isEqualTo(THREAD_NAME + "-2");
44+
assertThat(threadTwo.isDaemon()).isTrue();
45+
}
46+
}

providers/flagd/src/test/java/dev/openfeature/contrib/providers/flagd/e2e/State.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,14 @@
99
import dev.openfeature.sdk.FeatureProvider;
1010
import dev.openfeature.sdk.FlagEvaluationDetails;
1111
import dev.openfeature.sdk.MutableContext;
12-
import java.util.LinkedList;
13-
import java.util.List;
1412
import java.util.Optional;
13+
import java.util.concurrent.ConcurrentLinkedQueue;
1514

1615
public class State {
1716
public ProviderType providerType;
1817
public Client client;
1918
public FeatureProvider provider;
20-
public List<Event> events = new LinkedList<>();
19+
public ConcurrentLinkedQueue<Event> events = new ConcurrentLinkedQueue<>();
2120
public Optional<Event> lastEvent;
2221
public FlagSteps.Flag flag;
2322
public MutableContext context = new MutableContext();

0 commit comments

Comments
 (0)