Skip to content

Commit 486d057

Browse files
committed
feat: runtime info for health probes (#1594)
1 parent cc4c8cc commit 486d057

File tree

25 files changed

+486
-39
lines changed

25 files changed

+486
-39
lines changed

docs/documentation/features.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,18 @@ leader left off should one of them become elected leader.
699699
See sample configuration in the [E2E test](https://github.com/java-operator-sdk/java-operator-sdk/blob/8865302ac0346ee31f2d7b348997ec2913d5922b/sample-operators/leader-election/src/main/java/io/javaoperatorsdk/operator/sample/LeaderElectionTestOperator.java#L21-L23)
700700
.
701701

702+
## Runtime Info
703+
704+
[RuntimeInfo](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RuntimeInfo.java#L16-L16)
705+
is used mainly to check the actual health of event sources. Based on this information it is easy to implement custom
706+
liveness probes.
707+
708+
[stopOnInformerErrorDuringStartup](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/config/ConfigurationService.java#L168-L168)
709+
setting, where this flag usually needs to be set to false, in order to control the exact liveness properties.
710+
711+
See also an example implementation in the
712+
[WebPage sample](https://github.com/java-operator-sdk/java-operator-sdk/blob/3e2e7c4c834ef1c409d636156b988125744ca911/sample-operators/webpage/src/main/java/io/javaoperatorsdk/operator/sample/WebPageOperator.java#L38-L43)
713+
702714
## Monitoring with Micrometer
703715

704716
## Automatic Generation of CRDs

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/Operator.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,11 @@ public KubernetesClient getKubernetesClient() {
8080
* where there is no obvious entrypoint to the application which can trigger the injection process
8181
* and start the cluster monitoring processes.
8282
*/
83-
public void start() {
83+
public synchronized void start() {
8484
try {
8585
if (started) {
8686
return;
8787
}
88-
started = true;
8988
controllerManager.shouldStart();
9089
final var version = ConfigurationServiceProvider.instance().getVersion();
9190
log.info(
@@ -101,6 +100,7 @@ public void start() {
101100
// the leader election would start subsequently the processor if on
102101
controllerManager.start(!leaderElectionManager.isLeaderElectionEnabled());
103102
leaderElectionManager.start();
103+
started = true;
104104
} catch (Exception e) {
105105
log.error("Error starting operator", e);
106106
stop();
@@ -208,4 +208,11 @@ public int getRegisteredControllersNumber() {
208208
return controllerManager.size();
209209
}
210210

211+
public RuntimeInfo getRuntimeInfo() {
212+
return new RuntimeInfo(this);
213+
}
214+
215+
boolean isStarted() {
216+
return started;
217+
}
211218
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/RegisteredController.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import io.fabric8.kubernetes.api.model.HasMetadata;
44
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
55
import io.javaoperatorsdk.operator.api.config.NamespaceChangeable;
6+
import io.javaoperatorsdk.operator.health.ControllerHealthInfo;
67

78
public interface RegisteredController<P extends HasMetadata> extends NamespaceChangeable {
9+
810
ControllerConfiguration<P> getConfiguration();
11+
12+
ControllerHealthInfo getControllerHealthInfo();
13+
914
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package io.javaoperatorsdk.operator;
2+
3+
import java.util.*;
4+
5+
import org.slf4j.Logger;
6+
import org.slf4j.LoggerFactory;
7+
8+
import io.javaoperatorsdk.operator.health.EventSourceHealthIndicator;
9+
import io.javaoperatorsdk.operator.health.InformerWrappingEventSourceHealthIndicator;
10+
11+
/**
12+
* RuntimeInfo in general is available when operator is fully started. You can use "isStarted" to
13+
* check that.
14+
*/
15+
@SuppressWarnings("rawtypes")
16+
public class RuntimeInfo {
17+
18+
private static final Logger log = LoggerFactory.getLogger(RuntimeInfo.class);
19+
20+
private final Set<RegisteredController> registeredControllers;
21+
private final Operator operator;
22+
23+
public RuntimeInfo(Operator operator) {
24+
this.registeredControllers = operator.getRegisteredControllers();
25+
this.operator = operator;
26+
}
27+
28+
public boolean isStarted() {
29+
return operator.isStarted();
30+
}
31+
32+
public Set<RegisteredController> getRegisteredControllers() {
33+
checkIfStarted();
34+
return registeredControllers;
35+
}
36+
37+
private void checkIfStarted() {
38+
if (!isStarted()) {
39+
log.warn(
40+
"Operator not started yet while accessing runtime info, this might lead to an unreliable behavior");
41+
}
42+
}
43+
44+
public boolean allEventSourcesAreHealthy() {
45+
checkIfStarted();
46+
return registeredControllers.stream()
47+
.filter(rc -> !rc.getControllerHealthInfo().unhealthyEventSources().isEmpty())
48+
.findFirst().isEmpty();
49+
}
50+
51+
/**
52+
* @return Aggregated Map with controller related event sources.
53+
*/
54+
55+
public Map<String, Map<String, EventSourceHealthIndicator>> unhealthyEventSources() {
56+
checkIfStarted();
57+
Map<String, Map<String, EventSourceHealthIndicator>> res = new HashMap<>();
58+
for (var rc : registeredControllers) {
59+
res.put(rc.getConfiguration().getName(),
60+
rc.getControllerHealthInfo().unhealthyEventSources());
61+
}
62+
return res;
63+
}
64+
65+
/**
66+
* @return Aggregated Map with controller related event sources that wraps an informer. Thus,
67+
* either a
68+
* {@link io.javaoperatorsdk.operator.processing.event.source.controller.ControllerResourceEventSource}
69+
* or an
70+
* {@link io.javaoperatorsdk.operator.processing.event.source.informer.InformerEventSource}.
71+
*/
72+
public Map<String, Map<String, InformerWrappingEventSourceHealthIndicator>> unhealthyInformerWrappingEventSourceHealthIndicator() {
73+
checkIfStarted();
74+
Map<String, Map<String, InformerWrappingEventSourceHealthIndicator>> res = new HashMap<>();
75+
for (var rc : registeredControllers) {
76+
res.put(rc.getConfiguration().getName(), rc.getControllerHealthInfo()
77+
.unhealthyInformerEventSourceHealthIndicators());
78+
}
79+
return res;
80+
}
81+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
import java.util.Map;
4+
import java.util.stream.Collectors;
5+
6+
import io.javaoperatorsdk.operator.processing.event.EventSourceManager;
7+
8+
@SuppressWarnings("rawtypes")
9+
public class ControllerHealthInfo {
10+
11+
private EventSourceManager<?> eventSourceManager;
12+
13+
public ControllerHealthInfo(EventSourceManager eventSourceManager) {
14+
this.eventSourceManager = eventSourceManager;
15+
}
16+
17+
public Map<String, EventSourceHealthIndicator> eventSourceHealthIndicators() {
18+
return eventSourceManager.allEventSources().entrySet().stream()
19+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
20+
}
21+
22+
public Map<String, EventSourceHealthIndicator> unhealthyEventSources() {
23+
return eventSourceManager.allEventSources().entrySet().stream()
24+
.filter(e -> e.getValue().getStatus() == Status.UNHEALTHY)
25+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
26+
}
27+
28+
public Map<String, InformerWrappingEventSourceHealthIndicator> informerEventSourceHealthIndicators() {
29+
return eventSourceManager.allEventSources().entrySet().stream()
30+
.filter(e -> e.getValue() instanceof InformerWrappingEventSourceHealthIndicator)
31+
.collect(Collectors.toMap(Map.Entry::getKey,
32+
e -> (InformerWrappingEventSourceHealthIndicator) e.getValue()));
33+
34+
}
35+
36+
/**
37+
* @return Map with event sources that wraps an informer. Thus, either a
38+
* {@link io.javaoperatorsdk.operator.processing.event.source.controller.ControllerResourceEventSource}
39+
* or an
40+
* {@link io.javaoperatorsdk.operator.processing.event.source.informer.InformerEventSource}.
41+
*/
42+
public Map<String, InformerWrappingEventSourceHealthIndicator> unhealthyInformerEventSourceHealthIndicators() {
43+
return eventSourceManager.allEventSources().entrySet().stream()
44+
.filter(e -> e.getValue().getStatus() == Status.UNHEALTHY)
45+
.filter(e -> e.getValue() instanceof InformerWrappingEventSourceHealthIndicator)
46+
.collect(Collectors.toMap(Map.Entry::getKey,
47+
e -> (InformerWrappingEventSourceHealthIndicator) e.getValue()));
48+
}
49+
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public interface EventSourceHealthIndicator {
4+
5+
Status getStatus();
6+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public interface InformerHealthIndicator extends EventSourceHealthIndicator {
4+
5+
boolean hasSynced();
6+
7+
boolean isWatching();
8+
9+
boolean isRunning();
10+
11+
@Override
12+
default Status getStatus() {
13+
return isRunning() && hasSynced() && isWatching() ? Status.HEALTHY : Status.UNHEALTHY;
14+
}
15+
16+
String getTargetNamespace();
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
import java.util.Map;
4+
5+
import io.fabric8.kubernetes.api.model.HasMetadata;
6+
import io.javaoperatorsdk.operator.api.config.ResourceConfiguration;
7+
8+
public interface InformerWrappingEventSourceHealthIndicator<R extends HasMetadata>
9+
extends EventSourceHealthIndicator {
10+
11+
Map<String, InformerHealthIndicator> informerHealthIndicators();
12+
13+
@Override
14+
default Status getStatus() {
15+
var nonUp = informerHealthIndicators().values().stream()
16+
.filter(i -> i.getStatus() != Status.HEALTHY).findAny();
17+
18+
return nonUp.isPresent() ? Status.UNHEALTHY : Status.HEALTHY;
19+
}
20+
21+
ResourceConfiguration<R> getInformerConfiguration();
22+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package io.javaoperatorsdk.operator.health;
2+
3+
public enum Status {
4+
5+
HEALTHY, UNHEALTHY,
6+
/**
7+
* For event sources where it cannot be determined if it is healthy ot not.
8+
*/
9+
UNKNOWN
10+
11+
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/Controller.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceProvider;
4040
import io.javaoperatorsdk.operator.api.reconciler.dependent.EventSourceReferencer;
4141
import io.javaoperatorsdk.operator.api.reconciler.dependent.managed.DefaultManagedDependentResourceContext;
42+
import io.javaoperatorsdk.operator.health.ControllerHealthInfo;
4243
import io.javaoperatorsdk.operator.processing.dependent.workflow.ManagedWorkflow;
4344
import io.javaoperatorsdk.operator.processing.dependent.workflow.WorkflowCleanupResult;
4445
import io.javaoperatorsdk.operator.processing.event.EventProcessor;
@@ -67,6 +68,7 @@ public class Controller<P extends HasMetadata>
6768

6869
private final GroupVersionKind associatedGVK;
6970
private final EventProcessor<P> eventProcessor;
71+
private final ControllerHealthInfo controllerHealthInfo;
7072

7173
public Controller(Reconciler<P> reconciler,
7274
ControllerConfiguration<P> configuration,
@@ -86,6 +88,7 @@ public Controller(Reconciler<P> reconciler,
8688
eventSourceManager = new EventSourceManager<>(this);
8789
eventProcessor = new EventProcessor<>(eventSourceManager);
8890
eventSourceManager.postProcessDefaultEventSourcesAfterProcessorInitializer();
91+
controllerHealthInfo = new ControllerHealthInfo(eventSourceManager);
8992
}
9093

9194
@Override
@@ -285,6 +288,11 @@ public ControllerConfiguration<P> getConfiguration() {
285288
return configuration;
286289
}
287290

291+
@Override
292+
public ControllerHealthInfo getControllerHealthInfo() {
293+
return controllerHealthInfo;
294+
}
295+
288296
public KubernetesClient getClient() {
289297
return kubernetesClient;
290298
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventSourceManager.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
package io.javaoperatorsdk.operator.processing.event;
22

3-
import java.util.LinkedHashSet;
4-
import java.util.List;
5-
import java.util.Objects;
6-
import java.util.Set;
3+
import java.util.*;
74
import java.util.stream.Collectors;
85

96
import org.slf4j.Logger;
@@ -174,6 +171,11 @@ public Set<EventSource> getRegisteredEventSources() {
174171
.collect(Collectors.toCollection(LinkedHashSet::new));
175172
}
176173

174+
public Map<String, EventSource> allEventSources() {
175+
return eventSources.allNamedEventSources().collect(Collectors.toMap(NamedEventSource::name,
176+
NamedEventSource::original));
177+
}
178+
177179
public ControllerResourceEventSource<P> getControllerResourceEventSource() {
178180
return eventSources.controllerResourceEventSource();
179181
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/EventSources.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ public Stream<NamedEventSource> additionalNamedEventSources() {
4646
flatMappedSources());
4747
}
4848

49+
public Stream<NamedEventSource> allNamedEventSources() {
50+
return Stream.concat(Stream.of(namedControllerResourceEventSource(),
51+
new NamedEventSource(retryAndRescheduleTimerEventSource,
52+
RETRY_RESCHEDULE_TIMER_EVENT_SOURCE_NAME)),
53+
flatMappedSources());
54+
}
55+
4956
Stream<EventSource> additionalEventSources() {
5057
return Stream.concat(
5158
Stream.of(retryEventSource()).filter(Objects::nonNull),

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/source/EventSource.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.javaoperatorsdk.operator.processing.event.source;
22

3+
import io.javaoperatorsdk.operator.health.EventSourceHealthIndicator;
4+
import io.javaoperatorsdk.operator.health.Status;
35
import io.javaoperatorsdk.operator.processing.LifecycleAware;
46
import io.javaoperatorsdk.operator.processing.event.EventHandler;
57

@@ -10,7 +12,7 @@
1012
* your reconciler implement
1113
* {@link io.javaoperatorsdk.operator.api.reconciler.EventSourceInitializer}.
1214
*/
13-
public interface EventSource extends LifecycleAware {
15+
public interface EventSource extends LifecycleAware, EventSourceHealthIndicator {
1416

1517
/**
1618
* Sets the {@link EventHandler} that is linked to your reconciler when this EventSource is
@@ -23,4 +25,9 @@ public interface EventSource extends LifecycleAware {
2325
default EventSourceStartPriority priority() {
2426
return EventSourceStartPriority.DEFAULT;
2527
}
28+
29+
@Override
30+
default Status getStatus() {
31+
return Status.UNKNOWN;
32+
}
2633
}

0 commit comments

Comments
 (0)