Skip to content

Commit 0571f37

Browse files
committed
feat: runtime info for health probes (#1594)
1 parent e182cd9 commit 0571f37

File tree

25 files changed

+485
-34
lines changed

25 files changed

+485
-34
lines changed

docs/documentation/features.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,18 @@ leader left off should one of them become elected leader.
699699
See sample configuration in the [E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/8865302ac0346ee31f2d7b348997ec2913d5922b/sample-operators/leader-election/src/main/java/io/javaoperatorsdk/operator/sample/LeaderElectionTestOperator.java#L21-L23)
700700
.
701701

702+
## Runtime Info
703+
704+
[RuntimeInfo](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RuntimeInfo.java#L16-L16)
705+
is used mainly to check the actual health of event sources. Based on this information it is easy to implement custom
706+
liveness probes.
707+
708+
[stopOnInformerErrorDuringStartup](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/config/ConfigurationService.java#L168-L168)
709+
setting, where this flag usually needs to be set to false, in order to control the exact liveness properties.
710+
711+
See also an example implementation in the
712+
[WebPage sample](https://github.com/java-operator-sdk/java-operator-sdk/blob/3e2e7c4c834ef1c409d636156b988125744ca911/sample-operators/webpage/src/main/java/io/javaoperatorsdk/operator/sample/WebPageOperator.java#L38-L43)
713+
702714
## Monitoring with Micrometer
703715

704716
## Automatic Generation of CRDs

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/Operator.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,11 @@ public KubernetesClient getKubernetesClient() {
9090
* where there is no obvious entrypoint to the application which can trigger the injection process
9191
* and start the cluster monitoring processes.
9292
*/
93-
public void start() {
93+
public synchronized void start() {
9494
try {
9595
if (started) {
9696
return;
9797
}
98-
started = true;
9998
controllerManager.shouldStart();
10099
final var version = ConfigurationServiceProvider.instance().getVersion();
101100
log.info(
@@ -111,6 +110,7 @@ public void start() {
111110
// the leader election would start subsequently the processor if on
112111
controllerManager.start(!leaderElectionManager.isLeaderElectionEnabled());
113112
leaderElectionManager.start();
113+
started = true;
114114
} catch (Exception e) {
115115
log.error("Error starting operator", e);
116116
stop();
@@ -217,4 +217,11 @@ public int getRegisteredControllersNumber() {
217217
return controllerManager.size();
218218
}
219219

220+
public RuntimeInfo getRuntimeInfo() {
221+
return new RuntimeInfo(this);
222+
}
223+
224+
boolean isStarted() {
225+
return started;
226+
}
220227
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RegisteredController.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import io.fabric8.kubernetes.api.model.HasMetadata;
44
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
55
import io.javaoperatorsdk.operator.api.config.NamespaceChangeable;
6+
import io.javaoperatorsdk.operator.health.ControllerHealthInfo;
67

78
public interface RegisteredController<P extends HasMetadata> extends NamespaceChangeable {
9+
810
ControllerConfiguration<P> getConfiguration();
11+
12+
ControllerHealthInfo getControllerHealthInfo();
13+
914
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package io.javaoperatorsdk.operator;
2+
3+
import java.util.*;
4+
5+
import org.slf4j.Logger;
6+
import org.slf4j.LoggerFactory;
7+
8+
import io.javaoperatorsdk.operator.health.EventSourceHealthIndicator;
9+
import io.javaoperatorsdk.operator.health.InformerWrappingEventSourceHealthIndicator;
10+
11+
/**
12+
* RuntimeInfo in general is available when operator is fully started. You can use "isStarted" to
13+
* check that.
14+
*/
15+
@SuppressWarnings("rawtypes")
16+
public class RuntimeInfo {
17+
18+
private static final Logger log = LoggerFactory.getLogger(RuntimeInfo.class);
19+
20+
private final Set<RegisteredController> registeredControllers;
21+
private final Operator operator;
22+
23+
public RuntimeInfo(Operator operator) {
24+
this.registeredControllers = operator.getRegisteredControllers();
25+
this.operator = operator;
26+
}
27+
28+
public boolean isStarted() {
29+
return operator.isStarted();
30+
}
31+
32+
public Set<RegisteredController> getRegisteredControllers() {
33+
checkIfStarted();
34+
return registeredControllers;
35+
}
36+
37+
private void checkIfStarted() {
38+
if (!isStarted()) {
39+
log.warn(
40+
"Operator not started yet while accessing runtime info, this might lead to an unreliable behavior");
41+
}
42+
}
43+
44+
public boolean allEventSourcesAreHealthy() {
45+
checkIfStarted();
46+
return registeredControllers.stream()
47+
.filter(rc -> !rc.getControllerHealthInfo().unhealthyEventSources().isEmpty())
48+
.findFirst().isEmpty();
49+
}
50+
51+
/**
52+
* @return Aggregated Map with controller related event sources.
53+
*/
54+
55+
public Map<String, Map<String, EventSourceHealthIndicator>> unhealthyEventSources() {
56+
checkIfStarted();
57+
Map<String, Map<String, EventSourceHealthIndicator>> res = new HashMap<>();
58+
for (var rc : registeredControllers) {
59+
res.put(rc.getConfiguration().getName(),
60+
rc.getControllerHealthInfo().unhealthyEventSources());
61+
}
62+
return res;
63+
}
64+
65+
/**
66+
* @return Aggregated Map with controller related event sources that wraps an informer. Thus,
67+
* either a
68+
* {@link io.javaoperatorsdk.operator.processing.event.source.controller.ControllerResourceEventSource}
69+
* or an
70+
* {@link io.javaoperatorsdk.operator.processing.event.source.informer.InformerEventSource}.
71+
*/
72+
public Map<String, Map<String, InformerWrappingEventSourceHealthIndicator>> unhealthyInformerWrappingEventSourceHealthIndicator() {
73+
checkIfStarted();
74+
Map<String, Map<String, InformerWrappingEventSourceHealthIndicator>> res = new HashMap<>();
75+
for (var rc : registeredControllers) {
76+
res.put(rc.getConfiguration().getName(), rc.getControllerHealthInfo()
77+
.unhealthyInformerEventSourceHealthIndicators());
78+
}
79+
return res;
80+
}
81+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
import java.util.Map;
4+
import java.util.stream.Collectors;
5+
6+
import io.javaoperatorsdk.operator.processing.event.EventSourceManager;
7+
8+
@SuppressWarnings("rawtypes")
9+
public class ControllerHealthInfo {
10+
11+
private EventSourceManager<?> eventSourceManager;
12+
13+
public ControllerHealthInfo(EventSourceManager eventSourceManager) {
14+
this.eventSourceManager = eventSourceManager;
15+
}
16+
17+
public Map<String, EventSourceHealthIndicator> eventSourceHealthIndicators() {
18+
return eventSourceManager.allEventSources().entrySet().stream()
19+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
20+
}
21+
22+
public Map<String, EventSourceHealthIndicator> unhealthyEventSources() {
23+
return eventSourceManager.allEventSources().entrySet().stream()
24+
.filter(e -> e.getValue().getStatus() == Status.UNHEALTHY)
25+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
26+
}
27+
28+
public Map<String, InformerWrappingEventSourceHealthIndicator> informerEventSourceHealthIndicators() {
29+
return eventSourceManager.allEventSources().entrySet().stream()
30+
.filter(e -> e.getValue() instanceof InformerWrappingEventSourceHealthIndicator)
31+
.collect(Collectors.toMap(Map.Entry::getKey,
32+
e -> (InformerWrappingEventSourceHealthIndicator) e.getValue()));
33+
34+
}
35+
36+
/**
37+
* @return Map with event sources that wraps an informer. Thus, either a
38+
* {@link io.javaoperatorsdk.operator.processing.event.source.controller.ControllerResourceEventSource}
39+
* or an
40+
* {@link io.javaoperatorsdk.operator.processing.event.source.informer.InformerEventSource}.
41+
*/
42+
public Map<String, InformerWrappingEventSourceHealthIndicator> unhealthyInformerEventSourceHealthIndicators() {
43+
return eventSourceManager.allEventSources().entrySet().stream()
44+
.filter(e -> e.getValue().getStatus() == Status.UNHEALTHY)
45+
.filter(e -> e.getValue() instanceof InformerWrappingEventSourceHealthIndicator)
46+
.collect(Collectors.toMap(Map.Entry::getKey,
47+
e -> (InformerWrappingEventSourceHealthIndicator) e.getValue()));
48+
}
49+
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public interface EventSourceHealthIndicator {
4+
5+
Status getStatus();
6+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public interface InformerHealthIndicator extends EventSourceHealthIndicator {
4+
5+
boolean hasSynced();
6+
7+
boolean isWatching();
8+
9+
boolean isRunning();
10+
11+
@Override
12+
default Status getStatus() {
13+
return isRunning() && hasSynced() && isWatching() ? Status.HEALTHY : Status.UNHEALTHY;
14+
}
15+
16+
String getTargetNamespace();
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
import java.util.Map;
4+
5+
import io.fabric8.kubernetes.api.model.HasMetadata;
6+
import io.javaoperatorsdk.operator.api.config.ResourceConfiguration;
7+
8+
public interface InformerWrappingEventSourceHealthIndicator<R extends HasMetadata>
9+
extends EventSourceHealthIndicator {
10+
11+
Map<String, InformerHealthIndicator> informerHealthIndicators();
12+
13+
@Override
14+
default Status getStatus() {
15+
var nonUp = informerHealthIndicators().values().stream()
16+
.filter(i -> i.getStatus() != Status.HEALTHY).findAny();
17+
18+
return nonUp.isPresent() ? Status.UNHEALTHY : Status.HEALTHY;
19+
}
20+
21+
ResourceConfiguration<R> getInformerConfiguration();
22+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public enum Status {
4+
5+
HEALTHY, UNHEALTHY,
6+
/**
7+
* For event sources where it cannot be determined if it is healthy ot not.
8+
*/
9+
UNKNOWN
10+
11+
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceProvider;
4040
import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceReferencer;
4141
import io.javaoperatorsdk.operator.api.reconciler.dependent.managed.DefaultManagedDependentResourceContext;
42+
import io.javaoperatorsdk.operator.health.ControllerHealthInfo;
4243
import io.javaoperatorsdk.operator.processing.dependent.workflow.ManagedWorkflow;
4344
import io.javaoperatorsdk.operator.processing.dependent.workflow.WorkflowCleanupResult;
4445
import io.javaoperatorsdk.operator.processing.event.EventProcessor;
@@ -67,6 +68,7 @@ public class Controller<P extends HasMetadata>
6768

6869
private final GroupVersionKind associatedGVK;
6970
private final EventProcessor<P> eventProcessor;
71+
private final ControllerHealthInfo controllerHealthInfo;
7072

7173
public Controller(Reconciler<P> reconciler,
7274
ControllerConfiguration<P> configuration,
@@ -87,6 +89,7 @@ public Controller(Reconciler<P> reconciler,
8789
eventSourceManager = new EventSourceManager<>(this);
8890
eventProcessor = new EventProcessor<>(eventSourceManager);
8991
eventSourceManager.postProcessDefaultEventSourcesAfterProcessorInitializer();
92+
controllerHealthInfo = new ControllerHealthInfo(eventSourceManager);
9093

9194
final var context = new EventSourceContext<>(
9295
eventSourceManager.getControllerResourceEventSource(), configuration, kubernetesClient);
@@ -289,6 +292,11 @@ public ControllerConfiguration<P> getConfiguration() {
289292
return configuration;
290293
}
291294

295+
@Override
296+
public ControllerHealthInfo getControllerHealthInfo() {
297+
return controllerHealthInfo;
298+
}
299+
292300
public KubernetesClient getClient() {
293301
return kubernetesClient;
294302
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventSourceManager.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.javaoperatorsdk.operator.processing.event;
22

3+
import java.util.*;
34
import java.util.LinkedHashSet;
45
import java.util.List;
56
import java.util.Objects;
@@ -198,6 +199,11 @@ public Set<EventSource> getRegisteredEventSources() {
198199
.collect(Collectors.toCollection(LinkedHashSet::new));
199200
}
200201

202+
public Map<String, EventSource> allEventSources() {
203+
return eventSources.allNamedEventSources().collect(Collectors.toMap(NamedEventSource::name,
204+
NamedEventSource::original));
205+
}
206+
201207
@SuppressWarnings("unused")
202208
public Stream<? extends EventSourceMetadata> getNamedEventSourcesStream() {
203209
return eventSources.flatMappedSources();

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventSources.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ public Stream<NamedEventSource> additionalNamedEventSources() {
4949
flatMappedSources());
5050
}
5151

52+
public Stream<NamedEventSource> allNamedEventSources() {
53+
return Stream.concat(Stream.of(namedControllerResourceEventSource(),
54+
new NamedEventSource(retryAndRescheduleTimerEventSource,
55+
RETRY_RESCHEDULE_TIMER_EVENT_SOURCE_NAME)),
56+
flatMappedSources());
57+
}
58+
5259
Stream<EventSource> additionalEventSources() {
5360
return Stream.concat(
5461
Stream.of(retryEventSource()).filter(Objects::nonNull),

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/source/EventSource.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.javaoperatorsdk.operator.processing.event.source;
22

3+
import io.javaoperatorsdk.operator.health.EventSourceHealthIndicator;
4+
import io.javaoperatorsdk.operator.health.Status;
35
import io.javaoperatorsdk.operator.processing.LifecycleAware;
46
import io.javaoperatorsdk.operator.processing.event.EventHandler;
57

@@ -10,7 +12,7 @@
1012
* your reconciler implement
1113
* {@link io.javaoperatorsdk.operator.api.reconciler.EventSourceInitializer}.
1214
*/
13-
public interface EventSource extends LifecycleAware {
15+
public interface EventSource extends LifecycleAware, EventSourceHealthIndicator {
1416

1517
/**
1618
* Sets the {@link EventHandler} that is linked to your reconciler when this EventSource is
@@ -23,4 +25,9 @@ public interface EventSource extends LifecycleAware {
2325
default EventSourceStartPriority priority() {
2426
return EventSourceStartPriority.DEFAULT;
2527
}
28+
29+
@Override
30+
default Status getStatus() {
31+
return Status.UNKNOWN;
32+
}
2633
}

0 commit comments

Comments
 (0)